diff --git a/sources/bin/check_health.sh b/sources/bin/check_health.sh index 525d948..cf0054f 100644 --- a/sources/bin/check_health.sh +++ b/sources/bin/check_health.sh @@ -37,7 +37,7 @@ # ------------------------- CONFIGURATION starts here ------------------------- # define the version (YYYY-MM-DD) -typeset -r SCRIPT_VERSION="2018-05-20" +typeset -r SCRIPT_VERSION="2018-05-29" # location of parent directory containing KSH functions/HC plugins typeset -r FPATH_PARENT="/opt/hc/lib" # location of custom HC configuration files @@ -58,9 +58,10 @@ typeset -r HOST_NAME="$(hostname)" typeset -r OS_NAME="$(uname -s)" typeset -r LOCK_DIR="${TMP_DIR}/.${SCRIPT_NAME}.lock" typeset -r HC_MSG_FILE="${TMP_DIR}/.${SCRIPT_NAME}.hc.msg.$$" # plugin messages files -typeset -r LOG_SEP="|" # single character only -typeset -r MSG_SEP="%" # single character only -typeset -r MAGIC_QUOTE="!_!" # magic quote +typeset -r LOG_SEP="|" # single character only +typeset -r MSG_SEP="%" # single character only +typeset -t NUM_LOG_FIELDS=6 # current number of fields in $HC_LOG + 1 +typeset -r MAGIC_QUOTE="!_!" # magic quote typeset -r LOG_DIR="/var/opt/hc" typeset -r LOG_FILE="${LOG_DIR}/${SCRIPT_NAME}.log" typeset -r ARCHIVE_DIR="${LOG_DIR}/archive" @@ -91,6 +92,7 @@ typeset LINUX_RELEASE="" typeset ARCHIVE_RC=0 typeset DISABLE_RC=0 typeset ENABLE_RC=0 +typeset FIX_FC=0 typeset RUN_RC=0 typeset RUN_CONFIG_FILE="" typeset RUN_TIME_OUT=0 @@ -384,6 +386,12 @@ then ARG_VERBOSE=0 ARG_LOG=0 fi +# --fix-logs +if (( ARG_ACTION == 12 )) && [[ -n "${ARG_HC}" ]] +then + print -u2 "ERROR: you can only use '--fix-logs' in combination with '--with-history'" + exit 1 +fi # --timeout if (( ARG_TIME_OUT > 0 )) then @@ -393,17 +401,17 @@ then if (( ARG_TIME_OUT < HC_MIN_TIME_OUT )) then print -u2 "ERROR: you cannot specify a value for '--timeout' smaller than ${HC_MIN_TIME_OUT} (see \$HC_MIN_TIME_OUT})" - exit 1 + exit 1 fi if (( ARG_TIME_OUT < HC_TIME_OUT )) then - print -u2 "ERROR: you cannot specify a value for '--timeout' smaller than ${HC_TIME_OUT} (see ${CONFIG_FILE})" - exit 1 + print -u2 "ERROR: you cannot specify a value for '--timeout' smaller than ${HC_TIME_OUT} (see ${CONFIG_FILE})" + exit 1 fi HC_TIME_OUT=${ARG_TIME_OUT} else print -u2 "ERROR: you can only specify a value for '--timeout' in combination with '--run'" - exit 1 + exit 1 fi fi @@ -504,8 +512,8 @@ cat << EOT Execute/report simple health checks (HC) on UNIX hosts. Syntax: ${SCRIPT_DIR}/${SCRIPT_NAME} [--help] | [--help-terse] | [--version] | - [--list=] | [--list-core] | [--fix-symlinks] | [--show-stats] | (--disable-all | enable-all) | - (--check-host | ((--archive | --check | --enable | --disable | --run [--timeout=] | --show) --hc= [--config-file=] [hc-args="])) + [--list=] | [--list-core] | [--fix-symlinks] | [--show-stats] | (--disable-all | enable-all) | [--fix-logs [--with-history]] | + (--check-host | ((--archive | --check | --enable | --disable | --run [--timeout=] | --show) --hc= [--config-file=] [hc-args="])) [--display=] ([--debug] [--debug-level=]) [--no-monitor] [--no-log] [--no-lock] [--flip-rc] [--notify=] [--mail-to=] [--sms-to= --sms-provider=] [--report= ( ([--last] | [--today]) | ([--reverse] [--id= [--detail]] [--with-history]) ) ] @@ -529,6 +537,7 @@ Parameters: --display : display HC results in a formatted way. Default is STDOUT (see --list-core for available formats) --enable : enable HC(s). --enable-all : enable all HCs. +--fix-logs : fix rogue log entries (can be used with --with-history) --fix-symlinks : update symbolic links for the KSH autoloader. --flip-rc : exit the health checker with the RC (return code) of the HC plugin instead of its own RC (will be discarded) This option may only be specified when executing a single HC plugin @@ -680,16 +689,34 @@ CMD_LINE="$*" [[ -z "${CMD_LINE}" ]] && display_usage && exit 0 for CMD_PARAMETER in ${CMD_LINE} do + # ARG_ACTION is a toggle, do not allow double toggles case ${CMD_PARAMETER} in -archive|--archive) - ARG_ACTION=10 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else" + exit 1 + else + ARG_ACTION=10 + fi + ARG_LOCK=1 ;; -check|--check) ARG_ACTION=1 ;; -c|-check-host|--check-host) + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=4 + fi ARG_CHECK_HOST=1 - ARG_ACTION=4 ;; -config-file=*) ARG_CONFIG_FILE="${CMD_PARAMETER#-config-file=}" @@ -712,10 +739,22 @@ do ARG_DETAIL=1 ;; -d|-disable|--disable) - ARG_ACTION=2 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=2 + fi ;; -disable-all|--disable-all) - ARG_ACTION=6 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=6 + fi ;; -display|--display) # STDOUT as default @@ -728,10 +767,22 @@ do ARG_DISPLAY="${CMD_PARAMETER#--display=}" ;; -e|-enable|--enable) - ARG_ACTION=3 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=3 + fi ;; -enable-all|--enable-all) - ARG_ACTION=7 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=7 + fi ;; -f|-fix-symlinks|--fix-symlinks) read_config @@ -742,6 +793,16 @@ do fix_symlinks exit 0 ;; + -fix-logs|--fix-logs) + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=12 + fi + ARG_LOCK=1 + ;; -flip-rc|--flip-rc) ARG_FLIP_RC=1 ;; @@ -770,15 +831,33 @@ do ARG_LAST=1 ;; -list|--list) - ARG_ACTION=9 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=9 + fi ;; -list=*) ARG_LIST="${CMD_PARAMETER#-list=}" - ARG_ACTION=9 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=9 + fi ;; --list=*) ARG_LIST="${CMD_PARAMETER#--list=}" - ARG_ACTION=9 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=9 + fi ;; -list-hc|--list-hc|-list-all|--list-all) print -u2 "WARN: deprecated option. Use --list | --list=" @@ -816,34 +895,69 @@ do ARG_MONITOR=0 ;; -report|--report) # compatability support <2017-12-15 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=8 + fi # STDOUT as default ARG_REPORT="std" ARG_LOG=0; ARG_VERBOSE=0 - ARG_ACTION=8 ;; -report=*) + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=8 + fi ARG_REPORT="${CMD_PARAMETER#-report=}" ARG_LOG=0; ARG_VERBOSE=0 - ARG_ACTION=8 ;; --report=*) + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=8 + fi ARG_REPORT="${CMD_PARAMETER#--report=}" ARG_LOG=0; ARG_VERBOSE=0 - ARG_ACTION=8 ;; -reverse|--reverse) ARG_REVERSE=1 ;; -r|-run|--run) - ARG_ACTION=4 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=4 + fi ;; -s|-show|--show) - ARG_ACTION=5 - ARG_LOG=0 - ARG_VERBOSE=0 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=5 + fi + ARG_LOG=0; ARG_VERBOSE=0 ;; -show-stats|--show-stats) - ARG_ACTION=11 + if (( ARG_ACTION > 0 )) + then + print -u2 "ERROR: you cannot request two actions at the same time" + exit 1 + else + ARG_ACTION=11 + fi ;; -sms-provider=*) ARG_SMS_PROVIDER="${CMD_PARAMETER#-sms-provider=}" @@ -921,8 +1035,8 @@ fi log "*** start of ${SCRIPT_NAME} [${CMD_LINE}] ***" (( ARG_LOG != 0 )) && log "logging takes places in ${LOG_FILE}" -# check/create lock file & write PID file (only for --run) -(( ARG_ACTION == 4 )) && check_lock_dir +# check/create lock file & write PID file (only for --run/--archive/--fix-logs) +(( ARG_ACTION == 4 || ARG_ACTION == 11 || ARG_ACTION == 12 )) && check_lock_dir # general HC log HC_LOG="${LOG_DIR}/hc.log" @@ -1048,7 +1162,7 @@ case ${ARG_ACTION} in HC_TIME_OUT=60 fi fi - + # run HC with or without monitor if (( ARG_MONITOR == 0 )) then @@ -1185,7 +1299,7 @@ case ${ARG_ACTION} in case ${ARCHIVE_RC} in 0) log "no archiving needed for ${ARG_HC}" - ;; + ;; 1) log "successfully archived log entries for ${ARG_HC}" ;; @@ -1197,7 +1311,24 @@ case ${ARG_ACTION} in ;; 11) # show HC event statistics show_statistics - ;; + ;; + 12) + # fix rogue log entries + fix_logs + FIX_RC=$? + case ${FIX_RC} in + 0) + : # feedback via fix_logs() + ;; + 1) + log "successfully fixed log entries" + ;; + 2) + log "failed to fix log entries [RC=${FIX_RC}]" + EXIT_CODE=1 + ;; + esac + ;; esac # finish up work diff --git a/sources/lib/core/include_core.sh b/sources/lib/core/include_core.sh index e130e0b..eb3686b 100644 --- a/sources/lib/core/include_core.sh +++ b/sources/lib/core/include_core.sh @@ -28,12 +28,13 @@ # DOES: archive log entries for a given HC # EXPECTS: HC name [string] # RETURNS: 0=no archiving needed; 1=archiving OK; 2=archiving NOK -# REQUIRES: n/a +# REQUIRES: ${HC_LOG} function archive_hc { (( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" typeset HC_NAME="${1}" typeset ARCHIVE_FILE="" +typeset ARCHIVE_RC=0 typeset YEAR_MONTH="" typeset LOG_COUNT=0 typeset ARCHIVE_RC=0 @@ -46,22 +47,23 @@ trap "rm -f ${TMP1_FILE} ${TMP2_FILE} ${SAVE_LOG_FILE} >/dev/null 2>&1; return 1 # isolate messages from HC, find unique %Y-%m combinations grep ".*${LOG_SEP}${HC_NAME}${LOG_SEP}" ${HC_LOG} 2>/dev/null |\ - cut -f1 -d"${LOG_SEP}" | cut -f1 -d' ' | cut -f1-2 -d'-' | sort -u |\ + cut -f1 -d"${LOG_SEP}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null |\ + cut -f1-2 -d'-' 2>/dev/null | sort -u 2>/dev/null |\ while read YEAR_MONTH do # find all messages for that YEAR-MONTH combination grep "${YEAR_MONTH}.*${LOG_SEP}${HC_NAME}${LOG_SEP}" ${HC_LOG} >${TMP1_FILE} - LOG_COUNT=$(wc -l ${TMP1_FILE} | cut -f1 -d' ') + LOG_COUNT=$(wc -l ${TMP1_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null) log "# of entries in ${YEAR_MONTH} to archive: ${LOG_COUNT}" # combine existing archived messages and resort ARCHIVE_FILE="${ARCHIVE_DIR}/hc.${YEAR_MONTH}.log" - cat ${ARCHIVE_FILE} ${TMP1_FILE} 2>/dev/null | sort -u >${TMP2_FILE} + cat ${ARCHIVE_FILE} ${TMP1_FILE} 2>/dev/null | sort -u >${TMP2_FILE} 2>/dev/null mv ${TMP2_FILE} ${ARCHIVE_FILE} 2>/dev/null || { warn "failed to move archive file, aborting" return 2 } - LOG_COUNT=$(wc -l ${ARCHIVE_FILE} | cut -f1 -d' ') + LOG_COUNT=$(wc -l ${ARCHIVE_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null) log "# entries in ${ARCHIVE_FILE} now: ${LOG_COUNT}" # remove archived messages from the $HC_LOG (but create a backup first!) @@ -76,7 +78,7 @@ do warn "failed to move HC log file, aborting" return 2 } - LOG_COUNT=$(wc -l ${HC_LOG} | cut -f1 -d' ') + LOG_COUNT=$(wc -l ${HC_LOG} 2>/dev/null | cut -f1 -d' ' 2>/dev/null ) log "# entries in ${HC_LOG} now: ${LOG_COUNT}" ARCHIVE_RC=1 else @@ -92,6 +94,29 @@ rm -f ${TMP1_FILE} ${TMP2_FILE} ${SAVE_HC_LOG} >/dev/null 2>&1 return ${ARCHIVE_RC} } +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: count_log_errors() +# DOES: check hc log file(s) for rogue entries. Log entries may get scrambled +# if the append operation in handle_hc() does not happen fully atomically. +# This means that log entries are written without line separator (same line) +# There is no proper way to avoid this without an extra file locking utility +# EXPECTS: path to log file to check +# OUTPUTS: number of errors [number] +# RETURNS: 0 +# REQUIRES: n/a +function count_log_errors +{ +(( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +typeset LOG_STASH="${1}" +typeset ERROR_COUNT=0 + +ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" 'BEGIN { num = 0 } { if (NF>'"${NUM_LOG_FIELDS}"') { num++ }} END { print num }' 2>/dev/null) + +print ${ERROR_COUNT} + +return 0 +} + # ----------------------------------------------------------------------------- # @(#) FUNCTION: debug() # DOES: handle debug messages @@ -110,7 +135,6 @@ done return 0 } - # ----------------------------------------------------------------------------- # @(#) FUNCTION: die() # DOES: handle fatal errors and exit script @@ -528,10 +552,6 @@ if (( DO_REPORT_STD == 0 )) && (( ARG_DETAIL != 0 )) then die "you cannot specify '--detail' without '--report'" fi -if (( DO_REPORT_STD == 0 )) && (( ARG_HISTORY != 0 )) -then - die "you cannot specify '--with-history' without '--report'" -fi if (( DO_REPORT_STD == 0 )) && [[ -n "${ARG_FAIL_ID}" ]] then die "you cannot specify '--id' without '--report'" @@ -604,6 +624,167 @@ done return 0 } +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: fix_logs() +# DOES: fix hc log file(s) with rogue entries +# EXPECTS: n/a +# REQUIRES: n/a +# RETURNS: 0=no fix needed; 1=fix OK; 2=fix NOK +# NOTE: this routine rewrites the HC log(s). Since we cannot use file locking, +# some log entries may be lost if the HC is accessing the HC log during +# the rewrite operation!! +function fix_logs +{ +(( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +typeset FIX_FILE="" +typeset FIX_RC=0 +typeset LOG_STASH="" +typeset ERROR_COUNT=0 +typeset STASH_COUNT=0 +typeset TMP_COUNT=0 +typeset SAVE_TMP_FILE="${TMP_DIR}/.$0.save.log.$$" +typeset TMP_FILE="${TMP_DIR}/.$0.tmp.log.$$" + +if (( ARG_HISTORY != 0 )) +then + set +f # file globbing must be on + LOG_STASH="${HC_LOG} ${ARCHIVE_DIR}/hc.*.log" +else + LOG_STASH="${HC_LOG}" +fi + +# set local trap for clean-up +trap "[[ -f ${TMP_FILE} ]] && rm -f ${TMP_FILE} >/dev/null 2>&1; return 1" 1 2 3 15 + +# check and rewrite log file(s) +find ${LOG_STASH} -type f -print 2>/dev/null | while read FIX_FILE +do + log "fixing log file ${FIX_FILE} ..." + + # count before rewrite + STASH_COUNT=$(wc -l ${FIX_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null) + + # does it have errors? + ERROR_COUNT=$(count_log_errors ${FIX_FILE}) + + # rewrite if needed + if (( ERROR_COUNT > 0 )) + then + >${TMP_FILE} 2>/dev/null + cat ${FIX_FILE} 2>/dev/null | awk -F"${LOG_SEP}" -v OFS="${LOG_SEP}" ' + + BEGIN { max_log_fields = '"${NUM_LOG_FIELDS}"' + max_fields = (max_log_fields - 1) * 2 + glue_field = max_log_fields - 1 + } + + # Fix log lines that were smashed together because of unatomic appends + # This can lead to 4 distinct cases that we need to rewrite based on + # whether a FAIL_ID is present in each part of the log line. + # Following examples are based on a log file with 5 standard fields: + # case 1: NO (FAIL_ID) + NO (FAIL_ID) -> 9 fields + # case 2: NO (FAIL_ID) + YES (FAIL_ID) -> 10 fields + # case 3: YES (FAIL_ID) + NO (FAIL_ID) -> 10 fields + # case 4: YES (FAIL_ID) + YES (FAIL_ID) -> 11 fields + + { + if (NF > max_log_fields) { + # rogue line that needs rewriting + if (NF < max_fields) { + # case 1 + for (i=1;i${TMP_FILE} 2>/dev/null + + # count after rewrite + TMP_COUNT=$(wc -l ${TMP_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null) + + # bail out when we do not have enough records + if (( TMP_COUNT <= STASH_COUNT )) + then + warn "found inconsistent record count (${TMP_COUNT}<${STASH_COUNT}), aborting" + return 2 + fi + + # swap log file (but create a backup first!) + cp -p ${FIX_FILE} ${SAVE_TMP_FILE} 2>/dev/null + if (( $? == 0 )) + then + mv ${TMP_FILE} ${FIX_FILE} 2>/dev/null + if (( $? > 0 )) + then + warn "failed to move/update log file, rolling back" + mv ${SAVE_TMP_FILE} ${FIX_FILE} 2>/dev/null + return 2 + fi + FIX_RC=1 + else + warn "failed to create a backup of original log file, aborting" + return 2 + fi + + # clean up temporary file(s) + rm -f ${SAVE_TMP_FILE} ${TMP_FILE} >/dev/null 2>&1 + else + log "no fixing needed for ${FIX_FILE}" + fi + + ERROR_COUNT=0 +done + +return ${FIX_RC} +} + # ----------------------------------------------------------------------------- # @(#) FUNCTION: handle_hc() # DOES: handle HC results diff --git a/sources/lib/core/report_std.sh b/sources/lib/core/report_std.sh index d1de76e..3423270 100644 --- a/sources/lib/core/report_std.sh +++ b/sources/lib/core/report_std.sh @@ -20,7 +20,7 @@ # DOES: report HC events on STDOUT # EXPECTS: n/a # RETURNS: 0 -# REQUIRES: init_hc(), list_hc(), $EVENTS_DIR, $HC_LOG +# REQUIRES: count_log_errors(), init_hc(), list_hc(), $EVENTS_DIR, $HC_LOG # # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! @@ -30,7 +30,7 @@ function report_std { # ------------------------- CONFIGURATION starts here ------------------------- -typeset _VERSION="2018-04-29" # YYYY-MM-DD +typeset _VERSION="2018-05-27" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match # ------------------------- CONFIGURATION ends here --------------------------- @@ -40,11 +40,14 @@ init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" typeset _DIR_PREFIX="" typeset _FAIL_COUNT=0 +typeset _ERROR_COUNT=0 +typeset _ERROR_TOTAL_COUNT=0 typeset _HC_LAST="" typeset _HC_LAST_TIME="" typeset _HC_LAST_STC=0 typeset _HC_LAST_FAIL_ID="-" typeset _ID_NEEDLE="" +typeset _CHECK_FILE="" typeset _LOG_STASH="" typeset _REPORT_LINE="" typeset _SORT_CMD="" @@ -80,13 +83,13 @@ then # use of cat is not useless here, makes sure END {} gets executed even # if $_LOG STASH contains non-existing files (because of * wildcard) cat ${_LOG_STASH} 2>/dev/null | awk -F "${LOG_SEP}" -v needle_time="${_HC_LAST_TIME}" -v needle_hc="${_HC_LAST}" \ - ' + ' BEGIN { last_stc = 0 last_fail_id = "-" } { - if ($1 ~ needle_time && $2 ~ needle_hc) { + if (($1 ~ needle_time && $2 ~ needle_hc) && NF <= '"${NUM_LOG_FIELDS}"') { last_event_stc = $3 last_stc = last_stc + last_event_stc last_event_fail_id = $5 @@ -103,7 +106,7 @@ then "${_HC_LAST}" "${_HC_LAST_TIME}" "${_HC_LAST_FAIL_ID}" "${_HC_LAST_STC}" done # disclaimer - print "Note: this report only shows the overall combined status of all events of each HC within exactly" + print "NOTE: this report only shows the overall combined status of all events of each HC within exactly" print " the *same* time stamp (seconds precise). It may therefore fail to report certain FAIL IDs." print " Use '--report' to get the exact list of failure events." # other reports @@ -136,12 +139,12 @@ else printf "%120s\n" | tr ' ' - # print failed events - # not a useless use of cat here + # not a useless use of cat here # (sort baulks if $_LOG STASH contains non-existing files (because of * wildcard)) cat ${_LOG_STASH} 2>/dev/null | ${_SORT_CMD} 2>/dev/null | awk -F"${LOG_SEP}" -v id_needle="${_ID_NEEDLE}" \ ' { - if ($5 ~ id_needle) { + if ($5 ~ id_needle && NF <= '"${NUM_LOG_FIELDS}"') { printf ("| %-20s | %-14s | %-30s | %-s\n", $1, $5, $2, $4) } } @@ -149,7 +152,7 @@ else printf "\n%-s\n" "SUMMARY: ${_FAIL_COUNT} failed HC event(s) found." else # print failed events (we may have multiple events for 1 FAIL ID) - # not a useless use of cat here + # not a useless use of cat here # (sort baulks if $_LOG STASH contains non-existing files (because of * wildcard)) cat ${_LOG_STASH} 2>/dev/null | ${_SORT_CMD} 2>/dev/null | awk -F"${LOG_SEP}" -v id_needle="${_ID_NEEDLE}" \ ' BEGIN { @@ -157,14 +160,14 @@ else dashes = sprintf("%36s",""); gsub (/ /, "-", dashes); } { - if ($5 ~ id_needle) { + if ($5 ~ id_needle && NF <= '"${NUM_LOG_FIELDS}"') { printf ("%36sMSG #%03d%36s", dashes, event_count, dashes) printf ("\nTime : %-s\nHC : %-s\nDetail : %-s\n", $1, $2, $4) event_count++ } } ' 2>/dev/null - + _DIR_PREFIX="$(expr substr ${ARG_FAIL_ID} 1 4)-$(expr substr ${ARG_FAIL_ID} 5 2)" printf "%37sSTDOUT%37s\n" | tr ' ' -; # display non-empty STDOUT file(s) @@ -191,6 +194,19 @@ else fi fi +# check consistency of log(s) +find ${_LOG_STASH} -type f -print 2>/dev/null | while read _CHECK_FILE +do + _ERROR_COUNT=$(count_log_errors ${_CHECK_FILE}) + if (( _ERROR_COUNT > 0 )) + then + print "NOTE: found ${_ERROR_COUNT} rogue entr(y|ies) in log file ${_CHECK_FILE}" + _ERROR_TOTAL_COUNT=$(( _ERROR_TOTAL_COUNT + _ERROR_COUNT )) + fi + _ERROR_COUNT=0 +done +(( _ERROR_TOTAL_COUNT > 0 )) && print "NOTE: fix log errors with ${SCRIPT_NAME} --fix-logs [--with-history]" + return 0 }