#!/usr/bin/env bash
set -eo pipefail # 启用严格错误检查
# ----------------------------------
# 全局配置 (提升可维护性)
# ----------------------------------
```bash
#!/usr/bin/env bash
set -eo pipefail # 启用严格错误检查
# ----------------------------------
# 全局配置 (提升可维护性)
# ----------------------------------
ANALYSIS_DIR="/home/username/analysis" # 统一管理路径
S3_DB_PATH="s3.db"
LOG_DIR="log"
TMP_DB="db.tmp" # 防止直接修改原db文件
LOCK_TIMEOUT=10 # 文件锁超时秒数
# ----------------------------------
# 功能函数
# ----------------------------------
# 初始化环境 (规范命名)
init_environment() {
WORK_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd "${WORK_DIR}" || exit 1
}
# 原子化更新数据库 (添加文件锁)
update_task_db() {
local batch_id db_path="${1:-db}"
[[ -f "${db_path}" ]] || touch "${db_path}"
# 使用 pgrep 精准获取进程信息
while read -r batch_id; do
if ! grep -q "^${LOG_DIR}/${batch_id}.log" "${db_path}"; then
echo "${LOG_DIR}/${batch_id}.log" >> "${TMP_DB}"
fi
done < <(pgrep -f "run_batch_analysis.*todo/torun" | xargs -I{} -- ps -p {} -o args= | awk '{print $NF}' | sed 's@.*/@@')
# 原子化替换原数据库文件
(
flock -w "${LOCK_TIMEOUT}" 200 || exit 1
mv "${TMP_DB}" "${db_path}"
) 200>"${db_path}.lock"
}
# 耗时计算兼容性增强 (支持 macOS 和 Linux)
calculate_duration() {
local start_sec end_sec diff_sec
start_sec=$(date -j -f "%Y-%m-%d %H:%M:%S" "$1" "+%s" 2>/dev/null || date -d "$1" "+%s")
end_sec=$(date -j -f "%Y-%m-%d %H:%M:%S" "$2" "+%s" 2>/dev/null || date -d "$2" "+%s")
diff_sec=$((end_sec - start_sec))
printf "%02d:%02d\n" $((diff_sec/3600)) $(( (diff_sec%3600)/60 ))
}
# ----------------------------------
# 主监控逻辑重构
# ----------------------------------
main() {
init_environment
local stages=('split' 'whole-set' 'merge_sample' 'post_sample' 'merge_region' 'post_region' 'publish' 'Done')
while :; do
update_task_db
# 通过文件描述符高效逐行读取
while IFS= read -r -u9 line; do
[[ "${line}" =~ ^# || "${line,,}" == *error* ]] && continue
log_path="${ANALYSIS_DIR}/${line}"
[[ -f "${log_path}" ]] || continue
# 使用关联数组存储关键信息 (需 Bash 4+)
declare -A log_info=(
[key_lines]="$(grep -m1 'All tasks completed' "${log_path}")"
[fcid]="$(grep -oE '[^/]*[0-9]+' <<< "${log_path}")"
)
# 复杂逻辑拆分函数...
process_log_line "${log_path}" "${log_info[@]}"
done 9< <(grep -iv -e 'done' -e '^#' "${db}")
sleep 30
clear
done
}
# 处理单条日志行 (功能模块化)
process_log_line() {
local log_path="$1" key_lines="$2" fcid="$3"
# 上报逻辑实现...
}
# ----------------------------------
# 执行入口
# ----------------------------------
main "$@"