* Add new plugin: check_hpux_defunct_processes
* Small fixes
This commit is contained in:
parent
eec674c972
commit
c8cc6bd89f
@ -63,6 +63,7 @@ This is the OS/platform plugin package"
|
|||||||
file -m 755 check_hpux_autopath.sh
|
file -m 755 check_hpux_autopath.sh
|
||||||
file -m 755 check_hpux_cdsf_cluster.sh
|
file -m 755 check_hpux_cdsf_cluster.sh
|
||||||
file -m 755 check_hpux_cron_status.sh
|
file -m 755 check_hpux_cron_status.sh
|
||||||
|
file -m 755 check_hpux_defunct_processes.sh
|
||||||
file -m 755 check_hpux_drd_status.sh
|
file -m 755 check_hpux_drd_status.sh
|
||||||
file -m 755 check_hpux_file_age.sh
|
file -m 755 check_hpux_file_age.sh
|
||||||
file -m 755 check_hpux_file_change.sh
|
file -m 755 check_hpux_file_change.sh
|
||||||
@ -106,6 +107,7 @@ This is the OS/platform plugin package"
|
|||||||
directory ../../../etc/opt/hc/=/etc/opt/hc
|
directory ../../../etc/opt/hc/=/etc/opt/hc
|
||||||
file -m 644 check_hpux_autofs.conf.dist
|
file -m 644 check_hpux_autofs.conf.dist
|
||||||
file -m 644 check_hpux_cron_status.conf.dist
|
file -m 644 check_hpux_cron_status.conf.dist
|
||||||
|
file -m 644 check_hpux_defunct_processes.conf.dist
|
||||||
file -m 644 check_hpux_drd_status.conf.dist
|
file -m 644 check_hpux_drd_status.conf.dist
|
||||||
file -m 644 check_hpux_file_age.conf.dist
|
file -m 644 check_hpux_file_age.conf.dist
|
||||||
file -m 644 check_hpux_file_change.conf.dist
|
file -m 644 check_hpux_file_change.conf.dist
|
||||||
|
27
etc/opt/hc/check_hpux_defunct_processes.conf.dist
Normal file
27
etc/opt/hc/check_hpux_defunct_processes.conf.dist
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#******************************************************************************
|
||||||
|
# @(#) check_hpux_defunct_processes.conf
|
||||||
|
#******************************************************************************
|
||||||
|
# This is a configuration file for the check_hpux_defunct_processes HC plugin.
|
||||||
|
# All lines starting with a '#' are comment lines.
|
||||||
|
# [default: indicates hardcoded script values if no value is defined here]
|
||||||
|
#******************************************************************************
|
||||||
|
|
||||||
|
# specify whether to also log passed health checks
|
||||||
|
# (warning: this may rapidly grow the HC log)
|
||||||
|
# [default: no]
|
||||||
|
log_healthy="yes"
|
||||||
|
|
||||||
|
# specify above which threshold of defunct processes to generate a HC fail event
|
||||||
|
# If group_by_ppid is set to 'yes' then the value of 'process_threshold' will
|
||||||
|
# apply for each PPID, otherwise it will apply to all defunct processes together
|
||||||
|
# [default: 10]
|
||||||
|
process_threshold=10
|
||||||
|
|
||||||
|
# specify whether to group by parent ID or not.
|
||||||
|
# [default: yes]
|
||||||
|
group_by_ppid="yes"
|
||||||
|
|
||||||
|
|
||||||
|
#******************************************************************************
|
||||||
|
# End of FILE
|
||||||
|
#******************************************************************************
|
212
opt/hc/lib/platform/hp-ux/check_hpux_defunct_processes.sh
Normal file
212
opt/hc/lib/platform/hp-ux/check_hpux_defunct_processes.sh
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
#!/usr/bin/env ksh
|
||||||
|
#******************************************************************************
|
||||||
|
# @(#) check_hpux_defunct_processes.sh
|
||||||
|
#******************************************************************************
|
||||||
|
# @(#) Copyright (C) 2021 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is a free software; you can redistribute it and/or modify
|
||||||
|
# it under the same terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||||
|
#******************************************************************************
|
||||||
|
#
|
||||||
|
# DOCUMENTATION (MAIN)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# @(#) MAIN: check_hpux_defunct_processes
|
||||||
|
# DOES: see _show_usage()
|
||||||
|
# EXPECTS: n/a
|
||||||
|
# REQUIRES: data_comma2space(), data_is_numeric(), init_hc(), log_hc()
|
||||||
|
#
|
||||||
|
# @(#) HISTORY:
|
||||||
|
# @(#) 2021-04-07: initial version [Patrick Van der Veken]
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||||
|
#******************************************************************************
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
function check_hpux_defunct_processes
|
||||||
|
{
|
||||||
|
# ------------------------- CONFIGURATION starts here -------------------------
|
||||||
|
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||||
|
typeset _VERSION="2021-04-07" # YYYY-MM-DD
|
||||||
|
typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match
|
||||||
|
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||||
|
|
||||||
|
# set defaults
|
||||||
|
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||||
|
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||||
|
typeset _ARGS=$(data_comma2space "$*")
|
||||||
|
typeset _ARG=""
|
||||||
|
typeset _MSG=""
|
||||||
|
typeset _STC=0
|
||||||
|
typeset _CFG_GROUP_BY_PPID=""
|
||||||
|
typeset _GROUP_BY_PPID=""
|
||||||
|
typeset _CFG_PROCESS_THRESHOLD=""
|
||||||
|
typeset _PROCESS_THRESHOLD=""
|
||||||
|
typeset _CFG_HEALTHY=""
|
||||||
|
typeset _LOG_HEALTHY=0
|
||||||
|
typeset _DEFUNCT_PROCS=""
|
||||||
|
typeset _NUM_DEFUNCT_PROCS=""
|
||||||
|
typeset _PPID=""
|
||||||
|
|
||||||
|
# handle arguments (originally comma-separated)
|
||||||
|
for _ARG in ${_ARGS}
|
||||||
|
do
|
||||||
|
case "${_ARG}" in
|
||||||
|
help)
|
||||||
|
_show_usage "${0}" "${_VERSION}" "${_CONFIG_FILE}" && return 0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# handle configuration file
|
||||||
|
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||||
|
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||||
|
then
|
||||||
|
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
# read required configuration values
|
||||||
|
_CFG_PROCESS_THRESHOLD=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'process_threshold')
|
||||||
|
if [[ -z "${_CFG_PROCESS_THRESHOLD}" ]]
|
||||||
|
then
|
||||||
|
# default
|
||||||
|
_PROCESS_THRESHOLD=10
|
||||||
|
log "setting value for parameter process_threshold to its default (10)"
|
||||||
|
else
|
||||||
|
data_is_numeric "${_CFG_PROCESS_THRESHOLD}"
|
||||||
|
# shellcheck disable=SC2181
|
||||||
|
if (( $? > 0 ))
|
||||||
|
then
|
||||||
|
warn "value for parameter process_threshold in configuration file ${_CONFIG_FILE} is invalid"
|
||||||
|
return 1
|
||||||
|
else
|
||||||
|
_PROCESS_THRESHOLD=${_CFG_PROCESS_THRESHOLD}
|
||||||
|
log "setting value for parameter collect_interval (${_PROCESS_THRESHOLD})"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
_CFG_GROUP_BY_PPID=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'group_by_ppid')
|
||||||
|
case "${_CFG_GROUP_BY_PPID}" in
|
||||||
|
no|NO|No)
|
||||||
|
_GROUP_BY_PPID=0
|
||||||
|
log "setting value for parameter group_by_ppid (No)"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# default
|
||||||
|
_GROUP_BY_PPID=1
|
||||||
|
log "setting value for parameter group_by_ppid to its default (Yes)"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||||
|
case "${_CFG_HEALTHY}" in
|
||||||
|
yes|YES|Yes)
|
||||||
|
_LOG_HEALTHY=1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# do not override hc_arg
|
||||||
|
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# log_healthy
|
||||||
|
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||||
|
if (( _LOG_HEALTHY > 0 ))
|
||||||
|
then
|
||||||
|
if (( ARG_LOG > 0 ))
|
||||||
|
then
|
||||||
|
log "logging/showing passed health checks"
|
||||||
|
else
|
||||||
|
log "showing passed health checks (but not logging)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "not logging/showing passed health checks"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# collect defunct processes
|
||||||
|
# shellcheck disable=SC2009
|
||||||
|
_DEFUNCT_PROCS=$(UNIX95=1 ps -eo ppid,pid,comm,etime 2>"${HC_STDERR_LOG}" | tee -a "${HC_STDOUT_LOG}" 2>/dev/null | grep '[d]efunct' 2>/dev/null)
|
||||||
|
|
||||||
|
# check defunct processes
|
||||||
|
if [[ -z "${_DEFUNCT_PROCS}" ]]
|
||||||
|
then
|
||||||
|
_MSG="no defunct process(es) detected"
|
||||||
|
_STC=0
|
||||||
|
if (( _LOG_HEALTHY > 0 ))
|
||||||
|
then
|
||||||
|
log_hc "$0" ${_STC} "${_MSG}"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
if (( _GROUP_BY_PPID > 0 ))
|
||||||
|
then
|
||||||
|
# per by PPID
|
||||||
|
print -R "${_DEFUNCT_PROCS}" | awk '
|
||||||
|
|
||||||
|
{
|
||||||
|
# count PIDs per PPID
|
||||||
|
counts[$1]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
for (i in counts) print i ":" counts[i]
|
||||||
|
}' 2>/dev/null | while IFS=":" read -r _PPID _NUM_DEFUNCT_PROCS
|
||||||
|
do
|
||||||
|
(( ARG_DEBUG > 0 )) && debug "awk found PPID: ${_PPID} with # procs: ${_NUM_DEFUNCT_PROCS}"
|
||||||
|
if (( _NUM_DEFUNCT_PROCS <= _PROCESS_THRESHOLD ))
|
||||||
|
then
|
||||||
|
_MSG="defunct process(es) detected for PPID (${_PPID}) but are still under threshold (${_NUM_DEFUNCT_PROCS}<=${_PROCESS_THRESHOLD})"
|
||||||
|
_STC=0
|
||||||
|
else
|
||||||
|
_MSG="defunct process(es) detected for PPID (${_PPID}) and are over threshold (${_NUM_DEFUNCT_PROCS}>${_PROCESS_THRESHOLD})"
|
||||||
|
_STC=1
|
||||||
|
fi
|
||||||
|
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||||
|
then
|
||||||
|
log_hc "$0" ${_STC} "${_MSG}" "${_NUM_DEFUNCT_PROCS}" "${_PROCESS_THRESHOLD}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
_NUM_DEFUNCT_PROCS=$(print -R "${_DEFUNCT_PROCS}" | wc -l 2>/dev/null)
|
||||||
|
if (( _NUM_DEFUNCT_PROCS <= _PROCESS_THRESHOLD ))
|
||||||
|
then
|
||||||
|
_MSG="defunct process(es) detected but are still under threshold (${_NUM_DEFUNCT_PROCS}<=${_PROCESS_THRESHOLD})"
|
||||||
|
_STC=0
|
||||||
|
else
|
||||||
|
_MSG="defunct process(es) detected and are over threshold (${_NUM_DEFUNCT_PROCS}>${_PROCESS_THRESHOLD})"
|
||||||
|
_STC=1
|
||||||
|
fi
|
||||||
|
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||||
|
then
|
||||||
|
log_hc "$0" ${_STC} "${_MSG}" "${_NUM_DEFUNCT_PROCS}" "${_PROCESS_THRESHOLD}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
function _show_usage
|
||||||
|
{
|
||||||
|
cat <<- EOT
|
||||||
|
NAME : $1
|
||||||
|
VERSION : $2
|
||||||
|
CONFIG : $3 with parameters:
|
||||||
|
log_healthy=<yes|no>
|
||||||
|
process_threshold=<#_of_processes>
|
||||||
|
group_by_ppid=<yes|no>
|
||||||
|
PURPOSE : Checks whether there are (too many) defunct processes on the host.
|
||||||
|
LOG HEALTHY : Supported
|
||||||
|
|
||||||
|
EOT
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#******************************************************************************
|
||||||
|
# END of script
|
||||||
|
#******************************************************************************
|
@ -32,6 +32,7 @@
|
|||||||
# @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken]
|
# @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken]
|
||||||
# @(#) 2019-01-24: arguments fix [Patrick Van der Veken]
|
# @(#) 2019-01-24: arguments fix [Patrick Van der Veken]
|
||||||
# @(#) 2019-03-09: added support for --log-healthy [Patrick Van der Veken]
|
# @(#) 2019-03-09: added support for --log-healthy [Patrick Van der Veken]
|
||||||
|
# @(#) 2021-04-07: quotes & shellcheck fixes [Patrick Van der Veken]
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||||
#******************************************************************************
|
#******************************************************************************
|
||||||
@ -43,13 +44,13 @@ function check_hpux_ioscan
|
|||||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||||
typeset _IOSCAN_BIN="/usr/sbin/ioscan"
|
typeset _IOSCAN_BIN="/usr/sbin/ioscan"
|
||||||
typeset _IOSCAN_OPTS="-Fn"
|
typeset _IOSCAN_OPTS="-Fn"
|
||||||
typeset _VERSION="2019-03-09" # YYYY-MM-DD
|
typeset _VERSION="2021-04-07" # YYYY-MM-DD
|
||||||
typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match
|
typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match
|
||||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||||
|
|
||||||
# set defaults
|
# set defaults
|
||||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
init_hc "${0}" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||||
typeset _ARGS=$(data_comma2space "$*")
|
typeset _ARGS=$(data_comma2space "$*")
|
||||||
typeset _ARG=""
|
typeset _ARG=""
|
||||||
typeset _MSG=""
|
typeset _MSG=""
|
||||||
@ -71,7 +72,7 @@ for _ARG in ${_ARGS}
|
|||||||
do
|
do
|
||||||
case "${_ARG}" in
|
case "${_ARG}" in
|
||||||
help)
|
help)
|
||||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
_show_usage "${0}" "${_VERSION}" "${_CONFIG_FILE}" && return 0
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
@ -154,7 +155,8 @@ else
|
|||||||
_IOSCAN_OPTS="${_IOSCAN_OPTS}u"
|
_IOSCAN_OPTS="${_IOSCAN_OPTS}u"
|
||||||
fi
|
fi
|
||||||
log "executing ioscan with options: ${_IOSCAN_OPTS}"
|
log "executing ioscan with options: ${_IOSCAN_OPTS}"
|
||||||
${_IOSCAN_BIN} ${_IOSCAN_OPTS} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
${_IOSCAN_BIN} ${_IOSCAN_OPTS} >>"${HC_STDOUT_LOG}" 2>>"${HC_STDERR_LOG}"
|
||||||
|
# shellcheck disable=SC2181
|
||||||
if (( $? > 0 ))
|
if (( $? > 0 ))
|
||||||
then
|
then
|
||||||
_MSG="unable to run command: {${_IOSCAN_BIN}}"
|
_MSG="unable to run command: {${_IOSCAN_BIN}}"
|
||||||
@ -166,13 +168,13 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# check for requested device classes
|
# check for requested device classes
|
||||||
grep -E -e ".*:.*:.*:.*:.*:.*:.*:.*:${_IOSCAN_CLASSES}:.*" ${HC_STDOUT_LOG} 2>/dev/null |\
|
grep -E -e ".*:.*:.*:.*:.*:.*:.*:.*:${_IOSCAN_CLASSES}:.*" "${HC_STDOUT_LOG}" 2>/dev/null |\
|
||||||
while read _IOSCAN_LINE
|
while read -r _IOSCAN_LINE
|
||||||
do
|
do
|
||||||
# possible states are: CLAIMED, UNCLAIMED, DIFF_HW, NO_HW, ERROR, SCAN
|
# possible states are: CLAIMED, UNCLAIMED, DIFF_HW, NO_HW, ERROR, SCAN
|
||||||
_HW_CLASS="$(print ${_IOSCAN_LINE} | cut -f9 -d':')"
|
_HW_CLASS=$(print "${_IOSCAN_LINE}" | cut -f9 -d':')
|
||||||
_HW_PATH="$(print ${_IOSCAN_LINE} | cut -f11 -d':')"
|
_HW_PATH=$(print "${_IOSCAN_LINE}" | cut -f11 -d':')
|
||||||
_HW_STATE="$(print ${_IOSCAN_LINE} | cut -f16 -d':')"
|
_HW_STATE=$(print "${_IOSCAN_LINE}" | cut -f16 -d':')
|
||||||
|
|
||||||
case "${_HW_STATE}" in
|
case "${_HW_STATE}" in
|
||||||
NO_HW)
|
NO_HW)
|
||||||
@ -214,7 +216,6 @@ function _show_usage
|
|||||||
cat <<- EOT
|
cat <<- EOT
|
||||||
NAME : $1
|
NAME : $1
|
||||||
VERSION : $2
|
VERSION : $2
|
||||||
CONFIG : $3 with:
|
|
||||||
CONFIG : $3 with parameters:
|
CONFIG : $3 with parameters:
|
||||||
log_healthy=<yes|no>
|
log_healthy=<yes|no>
|
||||||
ioscan_classes=<list_of_device_classes_to_check>
|
ioscan_classes=<list_of_device_classes_to_check>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user