diff --git a/build/hpux/hc_hpux_platform/hc_hpux_platform.psf b/build/hpux/hc_hpux_platform/hc_hpux_platform.psf index f1d9665..bd10eb0 100644 --- a/build/hpux/hc_hpux_platform/hc_hpux_platform.psf +++ b/build/hpux/hc_hpux_platform/hc_hpux_platform.psf @@ -63,6 +63,7 @@ This is the OS/platform plugin package" file -m 755 check_hpux_autopath.sh file -m 755 check_hpux_cdsf_cluster.sh file -m 755 check_hpux_cron_status.sh + file -m 755 check_hpux_defunct_processes.sh file -m 755 check_hpux_drd_status.sh file -m 755 check_hpux_file_age.sh file -m 755 check_hpux_file_change.sh @@ -106,6 +107,7 @@ This is the OS/platform plugin package" directory ../../../etc/opt/hc/=/etc/opt/hc file -m 644 check_hpux_autofs.conf.dist file -m 644 check_hpux_cron_status.conf.dist + file -m 644 check_hpux_defunct_processes.conf.dist file -m 644 check_hpux_drd_status.conf.dist file -m 644 check_hpux_file_age.conf.dist file -m 644 check_hpux_file_change.conf.dist diff --git a/etc/opt/hc/check_hpux_defunct_processes.conf.dist b/etc/opt/hc/check_hpux_defunct_processes.conf.dist new file mode 100644 index 0000000..37dfe8f --- /dev/null +++ b/etc/opt/hc/check_hpux_defunct_processes.conf.dist @@ -0,0 +1,27 @@ +#****************************************************************************** +# @(#) check_hpux_defunct_processes.conf +#****************************************************************************** +# This is a configuration file for the check_hpux_defunct_processes HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify above which threshold of defunct processes to generate a HC fail event +# If group_by_ppid is set to 'yes' then the value of 'process_threshold' will +# apply for each PPID, otherwise it will apply to all defunct processes together +# [default: 10] +process_threshold=10 + +# specify whether to group by parent ID or not. +# [default: yes] +group_by_ppid="yes" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/opt/hc/lib/platform/hp-ux/check_hpux_defunct_processes.sh b/opt/hc/lib/platform/hp-ux/check_hpux_defunct_processes.sh new file mode 100644 index 0000000..003bf25 --- /dev/null +++ b/opt/hc/lib/platform/hp-ux/check_hpux_defunct_processes.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_hpux_defunct_processes.sh +#****************************************************************************** +# @(#) Copyright (C) 2021 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_hpux_defunct_processes +# DOES: see _show_usage() +# EXPECTS: n/a +# REQUIRES: data_comma2space(), data_is_numeric(), init_hc(), log_hc() +# +# @(#) HISTORY: +# @(#) 2021-04-07: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_hpux_defunct_processes +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2021-04-07" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_GROUP_BY_PPID="" +typeset _GROUP_BY_PPID="" +typeset _CFG_PROCESS_THRESHOLD="" +typeset _PROCESS_THRESHOLD="" +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _DEFUNCT_PROCS="" +typeset _NUM_DEFUNCT_PROCS="" +typeset _PPID="" + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage "${0}" "${_VERSION}" "${_CONFIG_FILE}" && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read required configuration values +_CFG_PROCESS_THRESHOLD=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'process_threshold') +if [[ -z "${_CFG_PROCESS_THRESHOLD}" ]] +then + # default + _PROCESS_THRESHOLD=10 + log "setting value for parameter process_threshold to its default (10)" +else + data_is_numeric "${_CFG_PROCESS_THRESHOLD}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + warn "value for parameter process_threshold in configuration file ${_CONFIG_FILE} is invalid" + return 1 + else + _PROCESS_THRESHOLD=${_CFG_PROCESS_THRESHOLD} + log "setting value for parameter collect_interval (${_PROCESS_THRESHOLD})" + fi +fi +_CFG_GROUP_BY_PPID=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'group_by_ppid') +case "${_CFG_GROUP_BY_PPID}" in + no|NO|No) + _GROUP_BY_PPID=0 + log "setting value for parameter group_by_ppid (No)" + ;; + *) + # default + _GROUP_BY_PPID=1 + log "setting value for parameter group_by_ppid to its default (Yes)" + ;; +esac +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# collect defunct processes +# shellcheck disable=SC2009 +_DEFUNCT_PROCS=$(UNIX95=1 ps -eo ppid,pid,comm,etime 2>"${HC_STDERR_LOG}" | tee -a "${HC_STDOUT_LOG}" 2>/dev/null | grep '[d]efunct' 2>/dev/null) + +# check defunct processes +if [[ -z "${_DEFUNCT_PROCS}" ]] +then + _MSG="no defunct process(es) detected" + _STC=0 + if (( _LOG_HEALTHY > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 0 +else + if (( _GROUP_BY_PPID > 0 )) + then + # per by PPID + print -R "${_DEFUNCT_PROCS}" | awk ' + + { + # count PIDs per PPID + counts[$1]++; + } + + END { + for (i in counts) print i ":" counts[i] + }' 2>/dev/null | while IFS=":" read -r _PPID _NUM_DEFUNCT_PROCS + do + (( ARG_DEBUG > 0 )) && debug "awk found PPID: ${_PPID} with # procs: ${_NUM_DEFUNCT_PROCS}" + if (( _NUM_DEFUNCT_PROCS <= _PROCESS_THRESHOLD )) + then + _MSG="defunct process(es) detected for PPID (${_PPID}) but are still under threshold (${_NUM_DEFUNCT_PROCS}<=${_PROCESS_THRESHOLD})" + _STC=0 + else + _MSG="defunct process(es) detected for PPID (${_PPID}) and are over threshold (${_NUM_DEFUNCT_PROCS}>${_PROCESS_THRESHOLD})" + _STC=1 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_NUM_DEFUNCT_PROCS}" "${_PROCESS_THRESHOLD}" + fi + done + else + _NUM_DEFUNCT_PROCS=$(print -R "${_DEFUNCT_PROCS}" | wc -l 2>/dev/null) + if (( _NUM_DEFUNCT_PROCS <= _PROCESS_THRESHOLD )) + then + _MSG="defunct process(es) detected but are still under threshold (${_NUM_DEFUNCT_PROCS}<=${_PROCESS_THRESHOLD})" + _STC=0 + else + _MSG="defunct process(es) detected and are over threshold (${_NUM_DEFUNCT_PROCS}>${_PROCESS_THRESHOLD})" + _STC=1 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_NUM_DEFUNCT_PROCS}" "${_PROCESS_THRESHOLD}" + fi + fi +fi + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + process_threshold=<#_of_processes> + group_by_ppid= +PURPOSE : Checks whether there are (too many) defunct processes on the host. +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/hp-ux/check_hpux_ioscan.sh b/opt/hc/lib/platform/hp-ux/check_hpux_ioscan.sh index 9a46807..8910411 100755 --- a/opt/hc/lib/platform/hp-ux/check_hpux_ioscan.sh +++ b/opt/hc/lib/platform/hp-ux/check_hpux_ioscan.sh @@ -32,6 +32,7 @@ # @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken] # @(#) 2019-01-24: arguments fix [Patrick Van der Veken] # @(#) 2019-03-09: added support for --log-healthy [Patrick Van der Veken] +# @(#) 2021-04-07: quotes & shellcheck fixes [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -43,13 +44,13 @@ function check_hpux_ioscan typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _IOSCAN_BIN="/usr/sbin/ioscan" typeset _IOSCAN_OPTS="-Fn" -typeset _VERSION="2019-03-09" # YYYY-MM-DD +typeset _VERSION="2021-04-07" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match # ------------------------- CONFIGURATION ends here --------------------------- # set defaults -(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} -init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +init_hc "${0}" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" typeset _ARGS=$(data_comma2space "$*") typeset _ARG="" typeset _MSG="" @@ -71,7 +72,7 @@ for _ARG in ${_ARGS} do case "${_ARG}" in help) - _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + _show_usage "${0}" "${_VERSION}" "${_CONFIG_FILE}" && return 0 ;; esac done @@ -154,7 +155,8 @@ else _IOSCAN_OPTS="${_IOSCAN_OPTS}u" fi log "executing ioscan with options: ${_IOSCAN_OPTS}" - ${_IOSCAN_BIN} ${_IOSCAN_OPTS} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG} + ${_IOSCAN_BIN} ${_IOSCAN_OPTS} >>"${HC_STDOUT_LOG}" 2>>"${HC_STDERR_LOG}" + # shellcheck disable=SC2181 if (( $? > 0 )) then _MSG="unable to run command: {${_IOSCAN_BIN}}" @@ -166,13 +168,13 @@ else fi # check for requested device classes -grep -E -e ".*:.*:.*:.*:.*:.*:.*:.*:${_IOSCAN_CLASSES}:.*" ${HC_STDOUT_LOG} 2>/dev/null |\ - while read _IOSCAN_LINE +grep -E -e ".*:.*:.*:.*:.*:.*:.*:.*:${_IOSCAN_CLASSES}:.*" "${HC_STDOUT_LOG}" 2>/dev/null |\ + while read -r _IOSCAN_LINE do # possible states are: CLAIMED, UNCLAIMED, DIFF_HW, NO_HW, ERROR, SCAN - _HW_CLASS="$(print ${_IOSCAN_LINE} | cut -f9 -d':')" - _HW_PATH="$(print ${_IOSCAN_LINE} | cut -f11 -d':')" - _HW_STATE="$(print ${_IOSCAN_LINE} | cut -f16 -d':')" + _HW_CLASS=$(print "${_IOSCAN_LINE}" | cut -f9 -d':') + _HW_PATH=$(print "${_IOSCAN_LINE}" | cut -f11 -d':') + _HW_STATE=$(print "${_IOSCAN_LINE}" | cut -f16 -d':') case "${_HW_STATE}" in NO_HW) @@ -214,7 +216,6 @@ function _show_usage cat <<- EOT NAME : $1 VERSION : $2 -CONFIG : $3 with: CONFIG : $3 with parameters: log_healthy= ioscan_classes=