diff --git a/sources/bin/check_health.sh b/sources/bin/check_health.sh index 5b5ad00..2458d10 100644 --- a/sources/bin/check_health.sh +++ b/sources/bin/check_health.sh @@ -37,7 +37,7 @@ # ------------------------- CONFIGURATION starts here ------------------------- # define the version (YYYY-MM-DD) -typeset -r SCRIPT_VERSION="2018-10-28" +typeset -r SCRIPT_VERSION="2018-10-31" # location of parent directory containing KSH functions/HC plugins typeset -r FPATH_PARENT="/opt/hc/lib" # location of custom HC configuration files @@ -480,7 +480,8 @@ function check_user typeset WHOAMI="" # avoid sub-shell for mksh/pdksh -WHOAMI=$(IFS='()'; set -- "$(id)"; print $2) +# shellcheck disable=SC2046 +WHOAMI=$(IFS='()'; set -- $(id); print $2) if [[ "${WHOAMI}" != "${EXEC_USER}" ]] then print -u2 "ERROR: must be run as user '${EXEC_USER}'" @@ -1152,6 +1153,13 @@ case ${ARG_ACTION} in # execute plug-in(s) print "${ARG_HC}" | tr ',' '\n' | grep -v '^$' | while read -r HC_RUN do + # re-initialize messages stash (log of failed checks) + HC_MSG_VAR="" + : >${HC_MSG_FILE} 2>/dev/null + if (( $? > 0 )) + then + die "unable to reset the \${HC_MSG_FILE} file" + fi # check for HC (function) exists_hc "${HC_RUN}" if (( $? == 0 )) @@ -1169,7 +1177,7 @@ case ${ARG_ACTION} in stat_hc "${HC_RUN}" if (( $? == 0 )) then - # callback for display_init with extra code 'DISABLED' + # call for display_init with extra code 'DISABLED' if (( DO_DISPLAY_INIT == 1 )) then display_init "${HC_RUN}" "" "DISABLED" @@ -1183,7 +1191,15 @@ case ${ARG_ACTION} in HC_STDOUT_LOG="${TMP_DIR}/${HC_RUN}.stdout.log.$$" HC_STDERR_LOG="${TMP_DIR}/${HC_RUN}.stderr.log.$$" : >${HC_STDOUT_LOG} 2>/dev/null + if (( $? > 0 )) + then + die "unable to reset the \${HC_STDOUT_LOG} file" + fi : >${HC_STDERR_LOG} 2>/dev/null + if (( $? > 0 )) + then + die "unable to reset the \${HC_STDERR_LOG} file" + fi # --check-host handling: alternative configuration file, mangle ARG_CONFIG_FILE & HC_TIME_OUT if (( ARG_CHECK_HOST == 1 )) @@ -1211,14 +1227,15 @@ case ${ARG_ACTION} in then log "executed HC: ${HC_RUN} [RC=${RUN_RC}]" else - # callback for display_init with extra code 'ERROR' + # call for display_init with extra code 'ERROR' if (( DO_DISPLAY_INIT == 1 )) then - display_init "${HC_RUN}" "" "ERROR" + # only do call if we have an empty messages stash + # (otherwise handle_hc() will call display_init()) + [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR" else warn "failed to execute HC: ${HC_RUN} [RC=${RUN_RC}]" fi - continue fi else # set trap on SIGUSR1 @@ -1246,27 +1263,29 @@ case ${ARG_ACTION} in # process return codes if (( RUN_RC != 0 )) then - # callback for display_init with extra code 'ERROR' + # call for display_init with extra code 'ERROR' if (( DO_DISPLAY_INIT == 1 )) then - display_init "${HC_RUN}" "" "ERROR" + # only do call if we have an empty messages stash + # (otherwise handle_hc() will call display_init()) + [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR" else warn "failed to execute HC: ${HC_RUN} [RC=${RUN_RC}]" fi - continue else if (( CHILD_ERROR == 0 )) then log "executed HC: ${HC_RUN} [RC=${RUN_RC}]" else - # callback for display_init with extra code 'ERROR' + # call for display_init with extra code 'ERROR' if (( DO_DISPLAY_INIT == 1 )) then - display_init "${HC_RUN}" "" "ERROR" + # only do call if we have an empty messages stash + # (otherwise handle_hc() will call display_init()) + [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR" else warn "failed to execute HC as background process" fi - continue fi fi fi diff --git a/sources/lib/core/include_core.sh b/sources/lib/core/include_core.sh index 3c49922..f027d85 100644 --- a/sources/lib/core/include_core.sh +++ b/sources/lib/core/include_core.sh @@ -97,8 +97,9 @@ return ${ARCHIVE_RC} # ----------------------------------------------------------------------------- # @(#) FUNCTION: count_log_errors() -# DOES: check hc log file(s) for rogue entries. Log entries may get scrambled -# if the append operation in handle_hc() does not happen fully atomically. +# DOES: check hc log file(s) for rogue entries (=lines with NF<>$NUM_LOG_FIELDS +# or empty lines). Log entries may get scrambled if the append operation +# in handle_hc() does not happen fully atomically. # This means that log entries are written without line separator (same line) # There is no proper way to avoid this without an extra file locking utility # EXPECTS: path to log file to check @@ -111,7 +112,14 @@ function count_log_errors typeset LOG_STASH="${1}" typeset ERROR_COUNT=0 -ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" 'BEGIN { num = 0 } { if (NF>'"${NUM_LOG_FIELDS}"') { num++ }} END { print num }' 2>/dev/null) +ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" ' + BEGIN { num = 0 } + { + if (NF>'"${NUM_LOG_FIELDS}"' || $0 == "") { + num++; + } + } + END { print num }' 2>/dev/null) print ${ERROR_COUNT} @@ -641,6 +649,7 @@ function fix_logs typeset FIX_FILE="" typeset FIX_RC=0 typeset LOG_STASH="" +typeset EMPTY_COUNT=0 typeset ERROR_COUNT=0 typeset STASH_COUNT=0 typeset TMP_COUNT=0 @@ -670,6 +679,9 @@ do # does it have errors? ERROR_COUNT=$(count_log_errors ${FIX_FILE}) + # we count the empty lines (again) + EMPTY_COUNT=$(grep -c -E -e '^$' ${FIX_FILE} 2>/dev/null) + # rewrite if needed if (( ERROR_COUNT > 0 )) then @@ -743,17 +755,21 @@ do } } printf ("\n") + } else if ($0 == "") { + # skip empty line + next; } else { # correct log line, no rewrite needed print $0 } }' >${TMP_FILE} 2>/dev/null - # count after rewrite + # count after rewrite (include empty lines again in the count) TMP_COUNT=$(wc -l ${TMP_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null) + TMP_COUNT=$(( TMP_COUNT + EMPTY_COUNT )) # bail out when we do not have enough records - if (( TMP_COUNT <= STASH_COUNT )) + if (( TMP_COUNT < STASH_COUNT )) then warn "found inconsistent record count (${TMP_COUNT}<${STASH_COUNT}), aborting" return 2 @@ -1722,12 +1738,15 @@ awk -F"${LOG_SEP}" '{ END { for (hc in total_count) { - printf ("\t%s:\n", hc) - printf ("\t\t# entries: %s\n", total_count[hc]) - printf ("\t\t# STC==0 : %s\n", ok_count[hc]) - printf ("\t\t# STC<>0 : %s\n", nok_count[hc]) - printf ("\t\tfirst : %s\n", first_entry[hc]) - printf ("\t\tlast : %s\n", last_entry[hc]) + # empty hc variable means count of empty lines in log file + if (hc != "") { + printf ("\t%s:\n", hc) + printf ("\t\t# entries: %s\n", total_count[hc]) + printf ("\t\t# STC==0 : %s\n", ok_count[hc]) + printf ("\t\t# STC<>0 : %s\n", nok_count[hc]) + printf ("\t\tfirst : %s\n", first_entry[hc]) + printf ("\t\tlast : %s\n", last_entry[hc]) + } } } ' ${HC_LOG} 2>/dev/null @@ -1761,12 +1780,15 @@ do END { for (hc in total_count) { - printf ("\t%s:\n", hc) - printf ("\t\t# entries: %s\n", total_count[hc]) - printf ("\t\t# STC==0 : %s\n", ok_count[hc]) - printf ("\t\t# STC<>0 : %s\n", nok_count[hc]) - printf ("\t\tfirst : %s\n", first_entry[hc]) - printf ("\t\tlast : %s\n", last_entry[hc]) + # empty hc variable means count of empty lines in log file + if (hc != "") { + printf ("\t%s:\n", hc) + printf ("\t\t# entries: %s\n", total_count[hc]) + printf ("\t\t# STC==0 : %s\n", ok_count[hc]) + printf ("\t\t# STC<>0 : %s\n", nok_count[hc]) + printf ("\t\tfirst : %s\n", first_entry[hc]) + printf ("\t\tlast : %s\n", last_entry[hc]) + } } } ' ${_ARCHIVE_FILE} 2>/dev/null diff --git a/sources/lib/platform/hp-ux/check_hpux_drd_status.sh b/sources/lib/platform/hp-ux/check_hpux_drd_status.sh index 846c60f..ca6d95b 100644 --- a/sources/lib/platform/hp-ux/check_hpux_drd_status.sh +++ b/sources/lib/platform/hp-ux/check_hpux_drd_status.sh @@ -28,6 +28,7 @@ # @(#) 2018-05-20: added dump_logs() [Patrick Van der Veken] # @(#) 2018-10-18: changed boot status [Patrick Van der Veken] # @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken] +# @(#) 2018-10-31: better result check for DRD output [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -38,7 +39,7 @@ function check_hpux_drd_status # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _DRD_BIN="/opt/drd/bin/drd" -typeset _VERSION="2018-10-28" # YYYY-MM-DD +typeset _VERSION="2018-10-31" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match # ------------------------- CONFIGURATION ends here --------------------------- @@ -120,13 +121,15 @@ else # drd outputs on STDERR ${_DRD_BIN} status >${HC_STDOUT_LOG} 2>&1 _RC=$? + # check for result in output since _RC is not reliable + grep -q -E -e "succeeded" ${HC_STDOUT_LOG} 2>/dev/null || _RC=1 fi # check drd status -if (( _RC == 0 )) && (( $(grep -c -E -e ".*Information succeeded.*" ${HC_STDOUT_LOG} 2>/dev/null) > 0 )) +if (( _RC == 0 )) then - # convert NOW to epoch (pass date values as unquoted parameters) - _NOW_EPOCH=$(data_date2epoch "$(date '+%Y')" "$(date '+%m')" "$(date '+%d')" "$(date '+%H')" "$(date '+%M')" "$(date '+%S')") + # convert NOW to epoch (pass date values as quoted parameters) + _NOW_EPOCH=$(data_date2epoch "$(date '+%Y' 2>/dev/null)" "$(date '+%m' 2>/dev/null)" "$(date '+%d' 2>/dev/null)" "$(date '+%H' 2>/dev/null)" "$(date '+%M' 2>/dev/null)" "$(date '+%S' 2>/dev/null)") # get devices _ORIGINAL_DISK=$(data_strip_space "$(grep "Original Disk:" ${HC_STDOUT_LOG} 2>/dev/null | cut -f2 -d':')")