* Added Max processes check to check_linux_process_limits plugin

* Added support for log_healthy in list_hc()
This commit is contained in:
Patrick Van der Veken 2018-07-06 11:19:39 +02:00
parent 3369aa3bef
commit 58bee85277
3 changed files with 172 additions and 78 deletions

View File

@ -12,6 +12,7 @@
log_healthy="no"
# -- user stanzas --
# supported limit checks: Max open files; Max processes
# format:
# user:<user name>;<limit name>;<soft threshold in %>;<hard threshold in %>
# note: <limit name> must exactly match the descriptor in /proc/<PID>/limits
@ -19,6 +20,7 @@ log_healthy="no"
user;postfix;Max open files;;75
# -- process stanzas --
# supported limit checks: Max open files
# format:
# process:<process name>;<limit name>;<soft threshold in %>;<hard threshold in %>
# note: <limit name> must exactly match the descriptor in /proc/<PID>/limits

View File

@ -1438,8 +1438,10 @@ typeset FVERSION=""
typeset FCONFIG=""
typeset FSTATE=""
typeset FFILE=""
typeset HAS_FCONFIG=0
typeset FHEALTHY=""
typeset FSCHEDULED=0
typeset HAS_FCONFIG=0
typeset HAS_FHEALTHY=""
typeset DISABLE_FFILE=""
typeset HC_VERSION=""
@ -1454,8 +1456,8 @@ fi
# print header
if [[ "${FACTION}" != "list" ]]
then
printf "%-30s\t%-8s\t%s\t\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?"
printf "%80s\n" | tr ' ' -
printf "%-30s\t%-8s\t%s\t\t%s\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?" "H+?"
printf "%100s\n" | tr ' ' -
fi
print "${FPATH}" | tr ':' '\n' | grep -v "core$" | sort 2>/dev/null | while read -r FDIR
do
@ -1470,8 +1472,27 @@ do
if (( HAS_FCONFIG != 0 ))
then
FCONFIG="Yes"
if [[ -r ${CONFIG_DIR}/${FNAME#function *}.conf ]]
then
# check for log_healthy parameter
HAS_FHEALTHY=$(_CONFIG_FILE="${CONFIG_DIR}/${FNAME#function *}.conf" data_get_lvalue_from_config 'log_healthy')
case "${HAS_FHEALTHY}" in
no|NO|No)
FHEALTHY="No"
;;
yes|YES|Yes)
FHEALTHY="Yes"
;;
*)
FHEALTHY="N/A"
;;
esac
else
FHEALTHY="N/A"
fi
else
FCONFIG="No"
FHEALTHY="N/A"
fi
# check state
DISABLE_FFILE="$(print ${FFILE##*/} | sed 's/\.sh$//')"
@ -1495,12 +1516,13 @@ do
# show results
if [[ "${FACTION}" != "list" ]]
then
printf "%-30s\t%-8s\t%s\t%s\t%s\n" \
printf "%-30s\t%-8s\t%s\t%s\t%s\t%s\n" \
"${FNAME#function *}" \
"${FSTATE}" \
"${FVERSION#typeset _VERSION=*}" \
"${FCONFIG}" \
"${FSCHEDULED}"
"${FSCHEDULED}" \
"${FHEALTHY}"
else
printf "%s\n" "${FNAME#function *}"
fi
@ -1530,6 +1552,12 @@ then
print "current FPATH: ${FPATH}"
fi
# legend
print
print "Config?: plugin has a default configuration files (Yes/No)"
print "Sched? : plugin is scheduled through cron (Yes/No)"
print "H+? : plugin will log/show passed health checks too (Yes/No/Not applicable)"
return 0
}

View File

@ -2,7 +2,7 @@
#******************************************************************************
# @(#) check_linux_process_limits.sh
#******************************************************************************
# @(#) Copyright (C) 2016 by KUDOS BVBA (info@kudos.be). All rights reserved.
# @(#) Copyright (C) 2018 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
@ -46,7 +46,8 @@ typeset _STC=0
typeset _DUMMY=""
typeset _LINE_COUNT=1
typeset _LOG_HEALTHY=0
typeset _OPEN_FILES=0
typeset _MAX_OPEN_FILES=0
typeset _MAX_PROCESSES=0
typeset _PROCESS=""
typeset _PROCESS_LIMIT=""
typeset _PROCESS_SOFT_THRESHOLD=0
@ -129,33 +130,36 @@ do
fi
fi
# collect ps info
(( ARG_DEBUG != 0 )) && debug "collecting information for process class ${_PROCESS}"
_PROCESS_PS=$(_get_psinfo_by_process "${_PROCESS}")
if [[ -z "${_PROCESS_PS}" ]]
then
warn "could not find any matching processes for process ${_PROCESS}"
continue
fi
print "${_PROCESS_PS}" | while read _PROCESS_PS_PID _PROCESS_PS_USER
do
(( ARG_DEBUG != 0 )) && debug "checking process ${_PROCESS_PS_PID}"
# get current values and check thresholds
case "${_PROCESS_LIMIT}" in
"Max open files"|"MAX OPEN FILES"|"max open files")
_OPEN_FILES=$(_get_open_files ${_PROCESS_PS_PID})
# SOFT limit
_check_limit "${_PROCESS_LIMIT}" soft ${_PROCESS_PS_PID} ${_PROCESS_PS_USER} ${_PROCESS} ${_PROCESS_SOFT_THRESHOLD} ${_OPEN_FILES} ${_LOG_HEALTHY}
# HARD limit
_check_limit "${_PROCESS_LIMIT}" hard ${_PROCESS_PS_PID} ${_PROCESS_PS_USER} ${_PROCESS} ${_PROCESS_HARD_THRESHOLD} ${_OPEN_FILES} ${_LOG_HEALTHY}
;;
*)
# no other limits are supported yet ;-)
warn "'${_PROCESS_LIMIT}' is an unsupported limit check"
# which limit to check?
case "${_PROCESS_LIMIT}" in
"Max open files")
# collect ps info
(( ARG_DEBUG != 0 )) && debug "${_PROCESS_LIMIT}: collecting information for process class ${_PROCESS}"
_PROCESS_PS=$(_get_psinfo_by_process "${_PROCESS}")
if [[ -z "${_PROCESS_PS}" ]]
then
warn "${_PROCESS_LIMIT}: could not find any matching processes for process ${_PROCESS}"
continue
;;
esac
done
fi
print "${_PROCESS_PS}" | while read _PROCESS_PS_PID _PROCESS_PS_USER
do
(( ARG_DEBUG != 0 )) && debug "${_PROCESS_LIMIT}: checking process ${_PROCESS_PS_PID}"
# get current values and check thresholds
_MAX_OPEN_FILES=$(_get_open_files ${_PROCESS_PS_PID})
# SOFT limit
_check_limit "${_PROCESS_LIMIT}" soft ${_PROCESS_PS_PID} ${_PROCESS_PS_USER} \
${_PROCESS} ${_PROCESS_SOFT_THRESHOLD} ${_MAX_OPEN_FILES} ${_LOG_HEALTHY}
# HARD limit
_check_limit "${_PROCESS_LIMIT}" hard ${_PROCESS_PS_PID} ${_PROCESS_PS_USER} \
${_PROCESS} ${_PROCESS_HARD_THRESHOLD} ${_MAX_OPEN_FILES} ${_LOG_HEALTHY}
done
;;
*)
# no other limits are supported yet ;-)
warn "'${_PROCESS_LIMIT}' is an unsupported limit check"
continue
;;
esac
_LINE_COUNT=$(( _LINE_COUNT + 1 ))
done
@ -190,33 +194,46 @@ do
fi
fi
# collect ps info
(( ARG_DEBUG != 0 )) && debug "collecting information for user ${_USER}"
_USER_PS=$(_get_psinfo_by_user "${_USER}")
if [[ -z "${_USER_PS}" ]]
then
warn "could not find any matching processes for user ${_USER}"
continue
fi
print "${_USER_PS}" | while read _USER_PS_PID _USER_PS_COMM
do
(( ARG_DEBUG != 0 )) && debug "checking process ${_USER_PS_PID}"
# get current values and check thresholds
case "${_USER_LIMIT}" in
"Max open files"|"MAX OPEN FILES"|"max open files")
_OPEN_FILES=$(_get_open_files ${_USER_PS_PID})
# SOFT limit
_check_limit "${_USER_LIMIT}" soft ${_USER_PS_PID} ${_USER} ${_USER_PS_COMM} ${_USER_SOFT_THRESHOLD} ${_OPEN_FILES} ${_LOG_HEALTHY}
# HARD limit
_check_limit "${_USER_LIMIT}" hard ${_USER_PS_PID} ${_USER} ${_USER_PS_COMM} ${_USER_HARD_THRESHOLD} ${_OPEN_FILES} ${_LOG_HEALTHY}
;;
*)
# no other limits are supported yet ;-)
warn "'${_USER_LIMIT}' is an unsupported limit check"
# which limit to check?
case "${_USER_LIMIT}" in
"Max open files")
# collect ps info
(( ARG_DEBUG != 0 )) && debug "${_USER_LIMIT}: collecting information for user ${_USER}"
_USER_PS=$(_get_psinfo_by_user "${_USER}")
if [[ -z "${_USER_PS}" ]]
then
warn "${_USER_LIMIT}: could not find any matching processes for user ${_USER}"
continue
;;
esac
done
fi
print "${_USER_PS}" | while read _USER_PS_PID _USER_PS_COMM
do
(( ARG_DEBUG != 0 )) && debug "${_USER_LIMIT}: checking process ${_USER_PS_PID}"
# get current values and check thresholds
_MAX_OPEN_FILES=$(_get_open_files ${_USER_PS_PID})
# SOFT limit
_check_limit "${_USER_LIMIT}" soft ${_USER_PS_PID} ${_USER} ${_USER_PS_COMM} \
${_USER_SOFT_THRESHOLD} ${_MAX_OPEN_FILES} ${_LOG_HEALTHY}
# HARD limit
_check_limit "${_USER_LIMIT}" hard ${_USER_PS_PID} ${_USER} ${_USER_PS_COMM} \
${_USER_HARD_THRESHOLD} ${_MAX_OPEN_FILES} ${_LOG_HEALTHY}
done
;;
"Max processes")
(( ARG_DEBUG != 0 )) && debug "${_USER_LIMIT}: collecting information for user ${_USER}"
_MAX_PROCESSES=$(_get_processes ${_USER})
# SOFT limit
_check_limit "${_USER_LIMIT}" soft 0 ${_USER} "" ${_USER_SOFT_THRESHOLD} \
${_MAX_PROCESSES} ${_LOG_HEALTHY}
# HARD limit
_check_limit "${_USER_LIMIT}" hard 0 ${_USER} "" ${_USER_HARD_THRESHOLD} \
${_MAX_PROCESSES} ${_LOG_HEALTHY}
;;
*)
# no other limits are supported yet ;-)
warn "'${_USER_LIMIT}' is an unsupported limit check"
continue
;;
esac
_LINE_COUNT=$(( _LINE_COUNT + 1 ))
done
@ -254,45 +271,79 @@ function _check_limit
{
typeset _LIMIT_NAME="${1}"
typeset _LIMIT_TYPE="${2}"
typeset _LIMIT_PID=${3}
typeset _LIMIT_PID=${3} # can be 0
typeset _LIMIT_USER="${4}"
typeset _LIMIT_PROCESS="${5}"
typeset _LIMIT_PROCESS="${5}" # can be ""
typeset _LIMIT_THRESHOLD=${6}
typeset _CURR_VALUE=${7}
typeset _LOG_HEALTHY=${8}
typeset _LIMIT_COMMAND=""
typeset _LIMIT_ENTRY=""
typeset _LIMIT_FIELD=0
typeset _MSG_BIT=""
(( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
# check for empties
(( _LIMIT_PID == 0 )) && _LIMIT_PID="N/A"
[[ -z "${_LIMIT_PROCESS}" ]] && _LIMIT_PROCESS="N/A"
if [[ -n "${_LIMIT_THRESHOLD}" ]]
then
_LIMIT_ENTRY=$(grep -i "${_LIMIT_NAME}" /proc/${_LIMIT_PID}/limits 2>/dev/null)
if [[ -z "${_LIMIT_ENTRY}" ]]
then
warn "unable to gather limits information (${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS})"
return 1
fi
case "${_LIMIT_TYPE}" in
soft)
_LIMIT_VALUE=$(print "${_LIMIT_ENTRY}" | sed -s "s/${_LIMIT_NAME}//g" 2>/dev/null | awk '{ print $1}' 2>/dev/null)
# get limit value
case "${_LIMIT_NAME}" in
"Max open files")
_LIMIT_ENTRY=$(grep -i "${_LIMIT_NAME}" /proc/${_LIMIT_PID}/limits 2>/dev/null)
if [[ -z "${_LIMIT_ENTRY}" ]]
then
warn "${_LIMIT_TYPE}: unable to gather limits information (${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS})"
return 1
fi
case "${_LIMIT_TYPE}" in
soft)
_LIMIT_FIELD=1
;;
hard)
_LIMIT_FIELD=2
;;
esac
_LIMIT_VALUE=$(print "${_LIMIT_ENTRY}" | sed -s "s/${_LIMIT_NAME}//g" 2>/dev/null |\
awk -v f="${_LIMIT_FIELD}" '{ print $f}' 2>/dev/null)
_MSG_BIT="${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS}"
;;
hard)
_LIMIT_VALUE=$(print "${_LIMIT_ENTRY}" | sed -s "s/${_LIMIT_NAME}//g" 2>/dev/null | awk '{ print $2}' 2>/dev/null)
"Max processes")
case "${_LIMIT_TYPE}" in
soft)
_LIMIT_COMMAND="ulimit -a"
;;
hard)
_LIMIT_COMMAND="ulimit -Ha"
;;
esac
_LIMIT_VALUE=$(su - ${_LIMIT_USER} -c "${_LIMIT_COMMAND}" 2>/dev/null |\
grep -i "max user processes" 2>/dev/null | sed -s "s/max user processes//g" 2>/dev/null |\
awk '{ print $2}' 2>/dev/null)
if [[ -z "${_LIMIT_VALUE}" ]]
then
warn "${_LIMIT_TYPE}: unable to gather limits information (${_LIMIT_USER})"
return 1
fi
_MSG_BIT="${_LIMIT_USER}"
;;
esac
# check limit value -> threshold
if [[ "${_LIMIT_VALUE}" = "unlimited" ]]
then
log "limit (${_LIMIT_TYPE} on '${_LIMIT_NAME}' is unlimited (${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS})"
log "limit (${_LIMIT_TYPE} on '${_LIMIT_NAME}' is unlimited (${_MSG_BIT})"
return 0
else
if (( _CURR_VALUE > (_LIMIT_VALUE * _LIMIT_THRESHOLD / 100) ))
then
_MSG="(${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS}) limit (${_LIMIT_TYPE}) on '${_LIMIT_NAME}' has been surpassed (${_CURR_VALUE} > ${_LIMIT_VALUE} @${_LIMIT_THRESHOLD}%)"
_MSG="(${_MSG_BIT}) limit (${_LIMIT_TYPE}) on '${_LIMIT_NAME}' has been surpassed (${_CURR_VALUE} > ${_LIMIT_VALUE} @${_LIMIT_THRESHOLD}%)"
log_hc "$0" 1 "${_MSG}" ${_CURR_VALUE} $(( _LIMIT_VALUE * _LIMIT_THRESHOLD / 100 ))
else
if (( _LOG_HEALTHY > 0 ))
then
_MSG="(${_LIMIT_PID}/${_LIMIT_USER}/${_LIMIT_PROCESS}) limit (${_LIMIT_TYPE}) on '${_LIMIT_NAME}' is safe (${_CURR_VALUE} <= ${_LIMIT_VALUE} @${_LIMIT_THRESHOLD}%)"
_MSG="(${_MSG_BIT}) limit (${_LIMIT_TYPE}) on '${_LIMIT_NAME}' is safe (${_CURR_VALUE} <= ${_LIMIT_VALUE} @${_LIMIT_THRESHOLD}%)"
log_hc "$0" 0 "${_MSG}" ${_CURR_VALUE} $(( _LIMIT_VALUE * _LIMIT_THRESHOLD / 100 ))
fi
fi
@ -314,6 +365,16 @@ ls -f /proc/${1}/fd/ 2>/dev/null | wc -l 2>/dev/null
return 0
}
# -----------------------------------------------------------------------------
function _get_processes
{
(( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
ps -U ${1} --no-headers 2>/dev/null | wc -l 2>/dev/null
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
@ -325,7 +386,10 @@ CONFIG : $3 with:
and formatted stanzas:
user:<user_name>:<limit_name>:<soft_limit_threshold_%>:<hard_limit_threshold_%>
process:<process_name>:<limit_name>:<soft_limit_threshold_%>:<hard_limit_threshold_%>
PURPOSE : Checks the value(s) of the process limits from /proc/*/limits
PURPOSE : Checks the value(s) of the process limits from /proc/*/limits or ulimit
Currenty following checks are supported:
* Max open files (/proc/*/limits)
* Max processes (ulimit)
EOT