* Fix in check_hpux_drd_status()

* Fixed a problem in the --run workflow whereby some log_hc's were not being reported * Added handling of empty lines to fix_logs() and count_log_errors()
2018-10-31 07:30:34 +01:00 · 2018-10-31 07:30:34 +01:00 · 393c000deb
commit 393c000deb
parent bed83253f9
3 changed files with 77 additions and 33 deletions
--- a/sources/bin/check_health.sh
+++ b/sources/bin/check_health.sh
@ -37,7 +37,7 @@
 # ------------------------- CONFIGURATION starts here -------------------------
 # define the version (YYYY-MM-DD)
-typeset -r SCRIPT_VERSION="2018-10-28"
+typeset -r SCRIPT_VERSION="2018-10-31"
 # location of parent directory containing KSH functions/HC plugins
 typeset -r FPATH_PARENT="/opt/hc/lib"
 # location of custom HC configuration files
@ -480,7 +480,8 @@ function check_user
 typeset WHOAMI=""
 # avoid sub-shell for mksh/pdksh
-WHOAMI=$(IFS='()'; set -- "$(id)"; print $2)
+# shellcheck disable=SC2046
 WHOAMI=$(IFS='()'; set -- $(id); print $2)
 if [[ "${WHOAMI}" != "${EXEC_USER}" ]]
 then
    print -u2 "ERROR: must be run as user '${EXEC_USER}'"
@ -1152,6 +1153,13 @@ case ${ARG_ACTION} in
        # execute plug-in(s)
        print "${ARG_HC}" | tr ',' '\n' | grep -v '^$' | while read -r HC_RUN
        do
            # re-initialize messages stash (log of failed checks)
            HC_MSG_VAR=""
            : >${HC_MSG_FILE} 2>/dev/null
            if (( $? > 0 ))
            then
                die "unable to reset the \${HC_MSG_FILE} file"
            fi
            # check for HC (function)
            exists_hc "${HC_RUN}"
            if (( $? == 0 ))
@ -1169,7 +1177,7 @@ case ${ARG_ACTION} in
            stat_hc "${HC_RUN}"
            if (( $? == 0 ))
            then
-                # callback for display_init with extra code 'DISABLED'
+                # call for display_init with extra code 'DISABLED'
                if (( DO_DISPLAY_INIT == 1 ))
                then
                    display_init "${HC_RUN}" "" "DISABLED"
@ -1183,7 +1191,15 @@ case ${ARG_ACTION} in
            HC_STDOUT_LOG="${TMP_DIR}/${HC_RUN}.stdout.log.$$"
            HC_STDERR_LOG="${TMP_DIR}/${HC_RUN}.stderr.log.$$"
            : >${HC_STDOUT_LOG} 2>/dev/null
            if (( $? > 0 ))
            then
                die "unable to reset the \${HC_STDOUT_LOG} file"
            fi
            : >${HC_STDERR_LOG} 2>/dev/null
            if (( $? > 0 ))
            then
                die "unable to reset the \${HC_STDERR_LOG} file"
            fi
            # --check-host handling: alternative configuration file, mangle ARG_CONFIG_FILE & HC_TIME_OUT
            if (( ARG_CHECK_HOST == 1 ))
@ -1211,14 +1227,15 @@ case ${ARG_ACTION} in
                then
                    log "executed HC: ${HC_RUN} [RC=${RUN_RC}]"
                else
-                    # callback for display_init with extra code 'ERROR'
+                    # call for display_init with extra code 'ERROR'
                    if (( DO_DISPLAY_INIT == 1 ))
                    then
-                        display_init "${HC_RUN}" "" "ERROR"
+                        # only do call if we have an empty messages stash
                        # (otherwise handle_hc() will call display_init())
                        [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR"
                    else
                        warn "failed to execute HC: ${HC_RUN} [RC=${RUN_RC}]"
                    fi
                    continue
                fi
            else
                # set trap on SIGUSR1
@ -1246,27 +1263,29 @@ case ${ARG_ACTION} in
                # process return codes
                if (( RUN_RC != 0 ))
                then
-                    # callback for display_init with extra code 'ERROR'
+                    # call for display_init with extra code 'ERROR'
                    if (( DO_DISPLAY_INIT == 1 ))
                    then
-                        display_init "${HC_RUN}" "" "ERROR"
+                        # only do call if we have an empty messages stash
                        # (otherwise handle_hc() will call display_init())
                        [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR"
                    else
                        warn "failed to execute HC: ${HC_RUN} [RC=${RUN_RC}]"
                    fi
                    continue
                else
                    if (( CHILD_ERROR == 0 ))
                    then
                        log "executed HC: ${HC_RUN} [RC=${RUN_RC}]"
                    else
-                        # callback for display_init with extra code 'ERROR'
+                        # call for display_init with extra code 'ERROR'
                        if (( DO_DISPLAY_INIT == 1 ))
                        then
-                            display_init "${HC_RUN}" "" "ERROR"
+                            # only do call if we have an empty messages stash
                            # (otherwise handle_hc() will call display_init())
                            [[ -s "${HC_MSG_FILE}" ]] || display_init "${HC_RUN}" "" "ERROR"
                        else
                            warn "failed to execute HC as background process"
                        fi
                        continue
                    fi
                fi
            fi
--- a/sources/lib/core/include_core.sh
+++ b/sources/lib/core/include_core.sh
@ -97,8 +97,9 @@ return ${ARCHIVE_RC}
 # -----------------------------------------------------------------------------
 # @(#) FUNCTION: count_log_errors()
-# DOES: check hc log file(s) for rogue entries. Log entries may get scrambled
+# DOES: check hc log file(s) for rogue entries (=lines with NF<>$NUM_LOG_FIELDS
-#       if the append operation in handle_hc() does not happen fully atomically.
+#       or empty lines). Log entries may get scrambled if the append operation
 #       in handle_hc() does not happen fully atomically.
 #       This means that log entries are written without line separator (same line)
 #       There is no proper way to avoid this without an extra file locking utility
 # EXPECTS: path to log file to check
@ -111,7 +112,14 @@ function count_log_errors
 typeset LOG_STASH="${1}"
 typeset ERROR_COUNT=0
-ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" 'BEGIN { num = 0 } { if (NF>'"${NUM_LOG_FIELDS}"') { num++ }} END { print num }' 2>/dev/null)
+ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" '
    BEGIN { num = 0 }
    {
        if (NF>'"${NUM_LOG_FIELDS}"' || $0 == "") {
            num++;
        }
    }
    END { print num }' 2>/dev/null)
 print ${ERROR_COUNT}
@ -641,6 +649,7 @@ function fix_logs
 typeset FIX_FILE=""
 typeset FIX_RC=0
 typeset LOG_STASH=""
 typeset EMPTY_COUNT=0
 typeset ERROR_COUNT=0
 typeset STASH_COUNT=0
 typeset TMP_COUNT=0
@ -670,6 +679,9 @@ do
    # does it have errors?
    ERROR_COUNT=$(count_log_errors ${FIX_FILE})
    # we count the empty lines (again)
    EMPTY_COUNT=$(grep -c -E -e '^$' ${FIX_FILE} 2>/dev/null)
    # rewrite if needed
    if (( ERROR_COUNT > 0 ))
    then
@ -743,17 +755,21 @@ do
                        }
                    }
                    printf ("\n")
                } else if ($0 == "") {
                    # skip empty line
                    next;
                } else {
                    # correct log line, no rewrite needed
                    print $0
                }
            }' >${TMP_FILE} 2>/dev/null
-        # count after rewrite
+        # count after rewrite (include empty lines again in the count)
        TMP_COUNT=$(wc -l ${TMP_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
        TMP_COUNT=$(( TMP_COUNT + EMPTY_COUNT ))
        # bail out when we do not have enough records
-        if (( TMP_COUNT <= STASH_COUNT ))
+        if (( TMP_COUNT < STASH_COUNT ))
        then
            warn "found inconsistent record count (${TMP_COUNT}<${STASH_COUNT}), aborting"
            return 2
@ -1722,6 +1738,8 @@ awk -F"${LOG_SEP}" '{
                 END {
                    for (hc in total_count) {
                        # empty hc variable means count of empty lines in log file
                        if (hc != "") {
                            printf ("\t%s:\n", hc)
                            printf ("\t\t# entries: %s\n", total_count[hc])
                            printf ("\t\t# STC==0 : %s\n", ok_count[hc])
@ -1730,6 +1748,7 @@ awk -F"${LOG_SEP}" '{
                            printf ("\t\tlast     : %s\n", last_entry[hc])
                        }
                    }
                }
                ' ${HC_LOG} 2>/dev/null
 # archived events
@ -1761,6 +1780,8 @@ do
                    END {
                        for (hc in total_count) {
                            # empty hc variable means count of empty lines in log file
                            if (hc != "") {
                                printf ("\t%s:\n", hc)
                                printf ("\t\t# entries: %s\n", total_count[hc])
                                printf ("\t\t# STC==0 : %s\n", ok_count[hc])
@ -1769,6 +1790,7 @@ do
                                printf ("\t\tlast     : %s\n", last_entry[hc])
                            }
                        }
                    }
                    ' ${_ARCHIVE_FILE} 2>/dev/null
 done
--- a/sources/lib/platform/hp-ux/check_hpux_drd_status.sh
+++ b/sources/lib/platform/hp-ux/check_hpux_drd_status.sh
@ -28,6 +28,7 @@
 # @(#) 2018-05-20: added dump_logs() [Patrick Van der Veken]
 # @(#) 2018-10-18: changed boot status [Patrick Van der Veken]
 # @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken]
 # @(#) 2018-10-31: better result check for DRD output [Patrick Van der Veken]
 # -----------------------------------------------------------------------------
 # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
 #******************************************************************************
@ -38,7 +39,7 @@ function check_hpux_drd_status
 # ------------------------- CONFIGURATION starts here -------------------------
 typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
 typeset _DRD_BIN="/opt/drd/bin/drd"
-typeset _VERSION="2018-10-28"                           # YYYY-MM-DD
+typeset _VERSION="2018-10-31"                           # YYYY-MM-DD
 typeset _SUPPORTED_PLATFORMS="HP-UX"                    # uname -s match
 # ------------------------- CONFIGURATION ends here ---------------------------
@ -120,13 +121,15 @@ else
    # drd outputs on STDERR
    ${_DRD_BIN} status >${HC_STDOUT_LOG} 2>&1
    _RC=$?
    # check for result in output since _RC is not reliable
    grep -q -E -e "succeeded" ${HC_STDOUT_LOG} 2>/dev/null || _RC=1
 fi
 # check drd status
-if (( _RC == 0 )) && (( $(grep -c -E -e ".*Information succeeded.*" ${HC_STDOUT_LOG} 2>/dev/null) > 0 ))
+if (( _RC == 0 ))
 then
-    # convert NOW to epoch (pass date values as unquoted parameters)
+    # convert NOW to epoch (pass date values as quoted parameters)
-    _NOW_EPOCH=$(data_date2epoch "$(date '+%Y')" "$(date '+%m')" "$(date '+%d')" "$(date '+%H')" "$(date '+%M')" "$(date '+%S')")
+    _NOW_EPOCH=$(data_date2epoch "$(date '+%Y' 2>/dev/null)" "$(date '+%m' 2>/dev/null)" "$(date '+%d' 2>/dev/null)" "$(date '+%H' 2>/dev/null)" "$(date '+%M' 2>/dev/null)" "$(date '+%S' 2>/dev/null)")
    # get devices
    _ORIGINAL_DISK=$(data_strip_space "$(grep "Original Disk:" ${HC_STDOUT_LOG} 2>/dev/null | cut -f2 -d':')")