Add --list-details to replace --list and make --list displaying only a terse list of health checks. This makes --list a lot more peformant.

This commit is contained in:
Patrick Van der Veken 2020-03-12 21:50:47 +01:00
parent 2481032360
commit c262342448
2 changed files with 130 additions and 91 deletions

View File

@ -38,7 +38,7 @@
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
# define the version (YYYY-MM-DD) # define the version (YYYY-MM-DD)
typeset -r SCRIPT_VERSION="2019-10-24" typeset -r SCRIPT_VERSION="2020-03-12"
# location of parent directory containing KSH functions/HC plugins # location of parent directory containing KSH functions/HC plugins
typeset -r FPATH_PARENT="/opt/hc/lib" typeset -r FPATH_PARENT="/opt/hc/lib"
# location of custom HC configuration files # location of custom HC configuration files
@ -134,6 +134,7 @@ typeset ARG_HC_ARGS="" # no extra arguments to HC plug-in by default
typeset ARG_HISTORY=0 # include historical events is off by default typeset ARG_HISTORY=0 # include historical events is off by default
typeset ARG_LAST=0 # report last events typeset ARG_LAST=0 # report last events
typeset ARG_LIST="" # list all by default typeset ARG_LIST="" # list all by default
typeset ARG_LIST_DETAILS=0 # list with full details is off by default
typeset ARG_LOCK=1 # lock for concurrent script executions is on by default typeset ARG_LOCK=1 # lock for concurrent script executions is on by default
typeset ARG_LOG=1 # logging is on by default typeset ARG_LOG=1 # logging is on by default
typeset ARG_NO_FIX=0 # fix/healing is not disabled by default typeset ARG_NO_FIX=0 # fix/healing is not disabled by default
@ -571,7 +572,7 @@ cat << EOT
Execute/report simple health checks (HC) on UNIX hosts. Execute/report simple health checks (HC) on UNIX hosts.
Syntax: ${SCRIPT_DIR}/${SCRIPT_NAME} [--help] | [--help-terse] | [--version] | Syntax: ${SCRIPT_DIR}/${SCRIPT_NAME} [--help] | [--help-terse] | [--version] |
[--list=<needle>] | [--list-core] | [--list-include] | [--fix-symlinks] | [--show-stats] | (--archive-all | --disable-all | --enable-all) | [--fix-logs [--with-history]] | [--list=<needle>] | [--list-details] | [--list-core] | [--list-include] | [--fix-symlinks] | [--show-stats] | (--archive-all | --disable-all | --enable-all) | [--fix-logs [--with-history]] |
(--check-host | ((--archive | --check | --enable | --disable | --run [--timeout=<secs>] | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">])) (--check-host | ((--archive | --check | --enable | --disable | --run [--timeout=<secs>] | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">]))
[--display=<method>] ([--debug] [--debug-level=<level>]) [--log-healthy] [--no-fix] [--no-log] [--no-lock] [--no-monitor] [[--flip-rc] [--with-rc=<count|max|sum>]]] [--display=<method>] ([--debug] [--debug-level=<level>]) [--log-healthy] [--no-fix] [--no-log] [--no-lock] [--no-monitor] [[--flip-rc] [--with-rc=<count|max|sum>]]]
[--notify=<method_list>] [--mail-to=<address_list>] [--sms-to=<sms_rcpt> --sms-provider=<name>] [--notify=<method_list>] [--mail-to=<address_list>] [--sms-to=<sms_rcpt> --sms-provider=<name>]
@ -606,12 +607,15 @@ Parameters:
in double quotes (example: --hc_args="arg1,arg2=value,arg3"). in double quotes (example: --hc_args="arg1,arg2=value,arg3").
--id : value of a FAIL ID (must be specified as uninterrupted sequence of numbers) --id : value of a FAIL ID (must be specified as uninterrupted sequence of numbers)
--last : show the last (failed) events for each HC and their combined STC value --last : show the last (failed) events for each HC and their combined STC value
--list : show the available health checks. Use <needle> to search with wildcards. Following details are shown: --list : show the available health checks in a terse manner. Use --list-details for a more extensive list.
--list-details : show the available health checks with following details included:
- health check (plugin) name - health check (plugin) name
- state of the HC plugin (disabled/enabled) - state of the HC plugin (disabled/enabled)
- version of the HC plugin - version of the HC plugin
- whether the HC plugin requires a configuration file in ${CONFIG_DIR} - whether the HC plugin requires a configuration file in ${CONFIG_DIR}
- whether the HC plugin is scheduled by cron - whether the HC plugin is scheduled by cron
- whether the plugin contains a facility for --log-healthy and/or whether it is enabled
- whether the plugin contains fix/healing logic (see --no-fix)
--list-core : show the available core plugins (mail,SMS,...) --list-core : show the available core plugins (mail,SMS,...)
--list-include : show the available includes/libraries --list-include : show the available includes/libraries
--log-healthy : log/show also passed health checks. By default this is off when the plugin support this feature. --log-healthy : log/show also passed health checks. By default this is off when the plugin support this feature.
@ -954,6 +958,10 @@ do
ARG_ACTION=9 ARG_ACTION=9
fi fi
;; ;;
-list-details|--list-details)
ARG_LIST_DETAILS=1
ARG_ACTION=9
;;
-list-hc|--list-hc|-list-all|--list-all) -list-hc|--list-hc|-list-all|--list-all)
print -u2 "WARN: deprecated option. Use --list | --list=<needle>" print -u2 "WARN: deprecated option. Use --list | --list=<needle>"
exit 0 exit 0

View File

@ -30,7 +30,7 @@
# RETURNS: 0 # RETURNS: 0
function version_include_core function version_include_core
{ {
typeset _VERSION="2019-11-03" # YYYY-MM-DD typeset _VERSION="2020-03-12" # YYYY-MM-DD
print "INFO: $0: ${_VERSION#version_*}" print "INFO: $0: ${_VERSION#version_*}"
@ -1594,10 +1594,18 @@ fi
# print header # print header
if [[ "${FACTION}" != "list" ]] if [[ "${FACTION}" != "list" ]]
then then
# shellcheck disable=SC1117 if (( ARG_LIST_DETAILS > 0 ))
printf "%-40s\t%-8s\t%s\t\t%s\t%s\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?" "H+?" "Fix?" then
# shellcheck disable=SC2183,SC1117 # shellcheck disable=SC1117
printf "%110s\n" | tr ' ' - printf "%-40s\t%-8s\t%s\t\t%s\t%s\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?" "H+?" "Fix?"
# shellcheck disable=SC2183,SC1117
printf "%110s\n" | tr ' ' -
else
# shellcheck disable=SC1117
printf "%-40s\t%-8s\n" "Health Check" "State"
# shellcheck disable=SC2183,SC1117
printf "%60s\n" | tr ' ' -
fi
fi fi
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep -v "core$" 2>/dev/null | sort 2>/dev/null |\ print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep -v "core$" 2>/dev/null | sort 2>/dev/null |\
while read -r FDIR while read -r FDIR
@ -1607,68 +1615,10 @@ do
# cache script contents in memory # cache script contents in memory
FSCRIPT=$(<${FFILE}) FSCRIPT=$(<${FFILE})
# --list (basic)
# find function name but skip helper functions in the plug-in file (function _name) # find function name but skip helper functions in the plug-in file (function _name)
FNAME=$(print -R "${FSCRIPT}" | grep -E -e "^function[[:space:]]+[^_]" 2>/dev/null) FNAME=$(print -R "${FSCRIPT}" | grep -E -e "^function[[:space:]]+[^_]" 2>/dev/null)
# look for version string (cut off comments but don't use [:space:] in tr)
FVERSION=$(print -R "${FSCRIPT}" | grep '^typeset _VERSION=' 2>/dev/null |\
awk 'match($0,/[0-9]+-[0-9]+-[0-9]+/){print substr($0, RSTART, RLENGTH)}' 2>/dev/null)
# look for configuration file string
HAS_FCONFIG=$(print -R "${FSCRIPT}" | grep -c '^typeset _CONFIG_FILE=' 2>/dev/null)
if (( HAS_FCONFIG > 0 ))
then
FCONFIG="Yes"
# *.conf.dist first
if [[ -r ${CONFIG_DIR}/${FNAME#function *}.conf.dist ]]
then
# check for log_healthy parameter (config file)
HAS_FHEALTHY=$(_CONFIG_FILE="${CONFIG_DIR}/${FNAME#function *}.conf.dist" data_get_lvalue_from_config 'log_healthy')
case "${HAS_FHEALTHY}" in
no|NO|No)
FHEALTHY="No"
;;
yes|YES|Yes)
FHEALTHY="Yes"
;;
*)
FHEALTHY="N/S"
;;
esac
else
FHEALTHY="N/S"
fi
# *.conf next
if [[ -r ${CONFIG_DIR}/${FNAME#function *}.conf ]]
then
# check for log_healthy parameter (config file)
HAS_FHEALTHY=$(_CONFIG_FILE="${CONFIG_DIR}/${FNAME#function *}.conf" data_get_lvalue_from_config 'log_healthy')
case "${HAS_FHEALTHY}" in
no|NO|No)
FHEALTHY="No"
;;
yes|YES|Yes)
FHEALTHY="Yes"
;;
*)
FHEALTHY="N/S"
;;
esac
fi
# check for log_healthy support through --hc-args (plugin)
elif (( $(print -R "${FSCRIPT}" | grep -c -E -e "_LOG_HEALTHY" 2>/dev/null) > 0 ))
then
FCONFIG="No"
FHEALTHY="S"
else
FCONFIG="No"
FHEALTHY="N/S"
fi
# check fix
if (( $(print -R "${FSCRIPT}" | grep -c -E -e "_HC_CAN_FIX=1" 2>/dev/null) > 0 ))
then
FFIX="Yes"
else
FFIX="No"
fi
# check state # check state
DISABLE_FFILE="$(print ${FFILE##*/} | sed 's/\.sh$//')" DISABLE_FFILE="$(print ${FFILE##*/} | sed 's/\.sh$//')"
if [[ -f "${STATE_PERM_DIR}/${DISABLE_FFILE}.disabled" ]] if [[ -f "${STATE_PERM_DIR}/${DISABLE_FFILE}.disabled" ]]
@ -1677,30 +1627,104 @@ do
else else
FSTATE="enabled" FSTATE="enabled"
fi fi
# reset state when unlinked
[[ -h ${FFILE%%.*} ]] || FSTATE="unlinked" # --list-details
# check scheduling if (( ARG_LIST_DETAILS > 0 ))
is_scheduled "${FNAME#function *}"
# shellcheck disable=SC2181
if (( $? == 0 ))
then then
FSCHEDULED="No" # look for version string (cut off comments but don't use [:space:] in tr)
else FVERSION=$(print -R "${FSCRIPT}" | grep '^typeset _VERSION=' 2>/dev/null |\
FSCHEDULED="Yes" awk 'match($0,/[0-9]+-[0-9]+-[0-9]+/){print substr($0, RSTART, RLENGTH)}' 2>/dev/null)
# look for configuration file string
HAS_FCONFIG=$(print -R "${FSCRIPT}" | grep -c '^typeset _CONFIG_FILE=' 2>/dev/null)
if (( HAS_FCONFIG > 0 ))
then
FCONFIG="Yes"
# *.conf.dist first
if [[ -r ${CONFIG_DIR}/${FNAME#function *}.conf.dist ]]
then
# check for log_healthy parameter (config file)
HAS_FHEALTHY=$(_CONFIG_FILE="${CONFIG_DIR}/${FNAME#function *}.conf.dist" data_get_lvalue_from_config 'log_healthy')
case "${HAS_FHEALTHY}" in
no|NO|No)
FHEALTHY="No"
;;
yes|YES|Yes)
FHEALTHY="Yes"
;;
*)
FHEALTHY="N/S"
;;
esac
else
FHEALTHY="N/S"
fi
# *.conf next
if [[ -r ${CONFIG_DIR}/${FNAME#function *}.conf ]]
then
# check for log_healthy parameter (config file)
HAS_FHEALTHY=$(_CONFIG_FILE="${CONFIG_DIR}/${FNAME#function *}.conf" data_get_lvalue_from_config 'log_healthy')
case "${HAS_FHEALTHY}" in
no|NO|No)
FHEALTHY="No"
;;
yes|YES|Yes)
FHEALTHY="Yes"
;;
*)
FHEALTHY="N/S"
;;
esac
fi
# check for log_healthy support through --hc-args (plugin)
elif (( $(print -R "${FSCRIPT}" | grep -c -E -e "_LOG_HEALTHY" 2>/dev/null) > 0 ))
then
FCONFIG="No"
FHEALTHY="S"
else
FCONFIG="No"
FHEALTHY="N/S"
fi
# check fix
if (( $(print -R "${FSCRIPT}" | grep -c -E -e "_HC_CAN_FIX=1" 2>/dev/null) > 0 ))
then
FFIX="Yes"
else
FFIX="No"
fi
# reset state when unlinked
[[ -h ${FFILE%%.*} ]] || FSTATE="unlinked"
# check scheduling
is_scheduled "${FNAME#function *}"
# shellcheck disable=SC2181
if (( $? == 0 ))
then
FSCHEDULED="No"
else
FSCHEDULED="Yes"
fi
fi fi
# show results # show results
if [[ "${FACTION}" != "list" ]] if [[ "${FACTION}" != "list" ]]
then then
# shellcheck disable=SC1117 if (( ARG_LIST_DETAILS > 0 ))
printf "%-40s\t%-8s\t%s\t%s\t%s\t%s\t%s\n" \ then
"${FNAME#function *}" \ # shellcheck disable=SC1117
"${FSTATE}" \ printf "%-40s\t%-8s\t%s\t%s\t%s\t%s\t%s\n" \
"${FVERSION#typeset _VERSION=*}" \ "${FNAME#function *}" \
"${FCONFIG}" \ "${FSTATE}" \
"${FSCHEDULED}" \ "${FVERSION#typeset _VERSION=*}" \
"${FHEALTHY}" \ "${FCONFIG}" \
"${FFIX}" "${FSCHEDULED}" \
"${FHEALTHY}" \
"${FFIX}"
else
# shellcheck disable=SC1117
printf "%-40s\t%-8s\n" \
"${FNAME#function *}" \
"${FSTATE}"
fi
else else
# shellcheck disable=SC1117 # shellcheck disable=SC1117
printf "%s\n" "${FNAME#function *}" printf "%s\n" "${FNAME#function *}"
@ -1735,11 +1759,18 @@ fi
# legend # legend
if [[ "${FACTION}" != "list" ]] if [[ "${FACTION}" != "list" ]]
then then
print if (( ARG_LIST_DETAILS > 0 ))
print "Config?: plugin has a default configuration file (Yes/No)" then
print "Sched? : plugin is scheduled through cron (Yes/No)" print
print "H+? : plugin can choose whether to log/show passed health checks (Yes/No/Supported/Not supported)" print "Config?: plugin has a default configuration file (Yes/No)"
print "Fix? : plugin contains fix/healing logic (Yes/No) -- not used by default!" print "Sched? : plugin is scheduled through cron (Yes/No)"
print "H+? : plugin can choose whether to log/show passed health checks (Yes/No/Supported/Not supported)"
print "Fix? : plugin contains fix/healing logic (Yes/No) -- not used by default!"
else
print
print "Tip: use --list-details to see a list of health checks with more details"
fi
fi fi
return 0 return 0