Linux脚本监控

#!/usr/bin/env bash set -eo pipefail # 启用严格错误检查 # ---------------------------------- # 全局配置 (提升可维护性) # ---------------------------------- ```bash #!/usr/bin/env bash set -eo pipefail # 启用严格错误检查 # ---------------------------------- # 全局配置 (提升可维护性) # ---------------------------------- ANALYSIS_DIR="/home/username/analysis" # 统一管理路径 S3_DB_PATH="s3.db" LOG_DIR="log" TMP_DB="db.tmp" # 防止直接修改原db文件 LOCK_TIMEOUT=10 # 文件锁超时秒数 # ---------------------------------- # 功能函数 # ---------------------------------- # 初始化环境 (规范命名) init_environment() { WORK_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cd "${WORK_DIR}" || exit 1 } # 原子化更新数据库 (添加文件锁) update_task_db() { local batch_id db_path="${1:-db}" [[ -f "${db_path}" ]] || touch "${db_path}" # 使用 pgrep 精准获取进程信息 while read -r batch_id; do if ! grep -q "^${LOG_DIR}/${batch_id}.log" "${db_path}"; then echo "${LOG_DIR}/${batch_id}.log" >> "${TMP_DB}" fi done < <(pgrep -f "run_batch_analysis.*todo/torun" | xargs -I{} -- ps -p {} -o args= | awk '{print $NF}' | sed 's@.*/@@') # 原子化替换原数据库文件 ( flock -w "${LOCK_TIMEOUT}" 200 || exit 1 mv "${TMP_DB}" "${db_path}" ) 200>"${db_path}.lock" } # 耗时计算兼容性增强 (支持 macOS 和 Linux) calculate_duration() { local start_sec end_sec diff_sec start_sec=$(date -j -f "%Y-%m-%d %H:%M:%S" "$1" "+%s" 2>/dev/null || date -d "$1" "+%s") end_sec=$(date -j -f "%Y-%m-%d %H:%M:%S" "$2" "+%s" 2>/dev/null || date -d "$2" "+%s") diff_sec=$((end_sec - start_sec)) printf "%02d:%02d\n" $((diff_sec/3600)) $(( (diff_sec%3600)/60 )) } # ---------------------------------- # 主监控逻辑重构 # ---------------------------------- main() { init_environment local stages=('split' 'whole-set' 'merge_sample' 'post_sample' 'merge_region' 'post_region' 'publish' 'Done') while :; do update_task_db # 通过文件描述符高效逐行读取 while IFS= read -r -u9 line; do [[ "${line}" =~ ^# || "${line,,}" == *error* ]] && continue log_path="${ANALYSIS_DIR}/${line}" [[ -f "${log_path}" ]] || continue # 使用关联数组存储关键信息 (需 Bash 4+) declare -A log_info=( [key_lines]="$(grep -m1 'All tasks completed' "${log_path}")" [fcid]="$(grep -oE '[^/]*[0-9]+' <<< "${log_path}")" ) # 复杂逻辑拆分函数... process_log_line "${log_path}" "${log_info[@]}" done 9< <(grep -iv -e 'done' -e '^#' "${db}") sleep 30 clear done } # 处理单条日志行 (功能模块化) process_log_line() { local log_path="$1" key_lines="$2" fcid="$3" # 上报逻辑实现... } # ---------------------------------- # 执行入口 # ---------------------------------- main "$@"

July 17, 2023 · 2 min · 291 words