Added support for OpenVZ 7. Now using /proc/user_beancounters instead of the 'vzubc' tool. Added possbility to exclude UBC counters

This commit is contained in:
Patrick Van der Veken 2020-04-10 18:43:01 +02:00
parent 8b6b2e9c31
commit a72fbd1576
2 changed files with 142 additions and 51 deletions

View File

@ -11,6 +11,11 @@
# [default: no]
log_healthy="no"
# specify UBC names to exclude from being checked (comma-separated list)
# [default: none]
# Examples: numfile,numpty
exclude_counters=""
# specify containers for which to check the UBC
# Format:
# [ct:<ct_id>]

View File

@ -19,11 +19,14 @@
# @(#) MAIN: check_linux_vz_ct_counters
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_is_numeric(), data_strip_space(),
# dump_logs(), init_hc(), log_hc(), warn()
# REQUIRES: data_comma2space(), data_is_numeric(), dump_logs(), init_hc(),
# log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-02-08: initial version [Patrick Van der Veken]
# @(#) 2020-04-10: added support for OpenVZ 7. Now using /proc/user_beancounters
# @(#) instead of the 'vzubc' tool. Added possbility to exclude
# @(#) UBC counters [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
@ -33,9 +36,10 @@ function check_linux_vz_ct_counters
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VZUBC_BIN="/usr/sbin/vzubc"
typeset _VZUBC_OPTS="-q -i -r"
typeset _VERSION="2019-02-08" # YYYY-MM-DD
typeset _VZCTL_BIN="/usr/sbin/vzctl"
typeset _PRLCTL_BIN="/bin/prlctl"
typeset _UBC_FILE="/proc/user_beancounters"
typeset _VERSION="2020-04-10" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# ------------------------- CONFIGURATION ends here ---------------------------
@ -46,15 +50,20 @@ typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_EXCLUDE_COUNTERS=""
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CT_ID=""
typeset _UBC_OUTPUT=""
typeset _HAS_VZ6=0
typeset _UBC_DATA=""
typeset _UBC_CT_DATA=""
typeset _UBC_NAME=""
typeset _UBC_FAIL=""
typeset _UBC_CURR_FAIL=""
typeset _UBC_PREV_FAIL=""
typeset _UBC_HELD=""
typeset _UBC_MAX_HELD=""
typeset _RC=0
typeset _UBC_STATE_FILE_STUB="${STATE_PERM_DIR}/vzct.failtcnt"
typeset _UBC_STATE_FILE=""
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
@ -74,6 +83,11 @@ then
return 1
fi
# read configuration values
_CFG_EXCLUDE_COUNTERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'exclude_counters')
if [[ -n "${_CFG_EXCLUDE_COUNTERS}" ]]
then
log "excluding following counters from check: ${_CFG_EXCLUDE_COUNTERS}"
fi
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
@ -99,63 +113,133 @@ else
log "not logging/showing passed health checks"
fi
# check openvz
if [[ ! -x ${_VZUBC_BIN} || -z "${_VZUBC_BIN}" ]]
# check openvz (6.x or 7.x)
if [[ ! -x ${_PRLCTL_BIN} || -z "${_PRLCTL_BIN}" ]]
then
if [[ ! -x ${_VZCTL_BIN} || -z "${_VZCTL_BIN}" ]]
then
warn "OpenVZ is not installed here"
return 1
else
log "OpenVZ 6.x is installed here"
_HAS_VZ6=1
fi
else
log "OpenVZ 7.x is installed here"
fi
if [[ ! -r "${_UBC_FILE}" ]]
then
warn "missing user beancounters file at ${_UBC_FILE}"
return 1
fi
# get bean counters
_UBC_DATA=$(cat ${_UBC_FILE} 2>>${HC_STDERR_LOG})
if (( $? > 0 )) || [[ -z "${_UBC_DATA}" ]]
then
warn "unable to get UBC data from ${_UBC_FILE}"
# dump debug info
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
return 1
fi
# check configuration values
grep -E -e '^ct:' ${_CONFIG_FILE} 2>/dev/null | cut -f2 -d':' 2>/dev/null |\
while read -r _CT_ID
do
# OpenVZ 6.x has only numeric CT IDs
if (( _HAS_VZ6 > 0 ))
then
data_is_numeric "${_CT_ID}"
if (( $? > 0 ))
then
warn "${_CT_ID} appears to be an incorrect value for CT ID"
warn "${_CT_ID} does not appear to be a correct OpenVZ 6 CT ID"
continue
fi
fi
# get bean counters
_UBC_OUTPUT=$(${_VZUBC_BIN} ${_VZUBC_OPTS} ${_CT_ID} 2>>${HC_STDERR_LOG})
if (( $? > 0 )) || [[ -z "${_UBC_OUTPUT}" ]]
# parse UBC data for CT ID
_UBC_CT_DATA=$(print "${_UBC_DATA}" | awk -v ct_id=${_CT_ID} -v exclude_counters="${_CFG_EXCLUDE_COUNTERS}" '
BEGIN {
found_ct = 0;
}
{
# find container start line
if (NF == 7 && $1 !~ /uid/) {
gsub (/:/, "", $1);
if ($1 == ct_id) {
found_ct = 1;
} else {
found_ct = 0;
}
} else {
if (NF == 6 && found_ct > 0 ) {
if ($1 !~ /dummy/ && !match (exclude_counters, $1)) {
printf ("%s:%d:%d:%d\n", $1, $2, $3, $6);
}
}
}
}
' 2>/dev/null)
# check UBC data
if [[ -n "${_UBC_CT_DATA}" ]]
then
warn "unable to run command {${_VZUBC_BIN}}. Container ${_CT_ID} does not exist?"
# dump debug info
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
_RC=1
continue
fi
# check values (data lines start with a space)
print "${_UBC_OUTPUT}" | grep "^ " 2>/dev/null | while read -r _UBC_LINE
print "${_UBC_CT_DATA}" | while IFS=":" read -r _UBC_NAME _UBC_HELD _UBC_MAX_HELD _UBC_CURR_FAIL
do
_UBC_NAME=$(data_strip_space "$(print ${_UBC_LINE} | cut -f1 -d'|' 2>/dev/null)")
_UBC_FAIL=$(data_strip_space "$(print ${_UBC_LINE} | cut -f6 -d'|' 2>/dev/null)")
_UBC_HELD=$(data_strip_space "$(print ${_UBC_LINE} | cut -f2 -d'|' 2>/dev/null | awk '{print $1}')")
_UBC_MAX_HELD=$(data_strip_space "$(print ${_UBC_LINE} | cut -f3 -d'|' 2>/dev/null | awk '{print $1}')")
if [[ -z "${_UBC_FAIL}" ]] || [[ "${_UBC_FAIL}" = '-' ]]
if [[ -z "${_UBC_NAME}" ]] || [[ -z "${_UBC_CURR_FAIL}" ]]
then
warn "unable to parse UBC name and/or fail count values for CT ID ${_CT_ID}"
continue
fi
data_is_numeric "${_UBC_CURR_FAIL}"
if (( $? > 0 ))
then
warn "${_UBC_CURR_FAIL} does not appear to a numeric fail count for CT ID ${_CT_ID}"
continue
fi
# get previous fail count value
_UBC_STATE_FILE="${_UBC_STATE_FILE_STUB}-${_UBC_NAME}_${_CT_ID}"
if [[ -s "${_UBC_STATE_FILE}" ]]
then
_UBC_PREV_FAIL=$(<${_UBC_STATE_FILE} 2>/dev/null)
else
_UBC_PREV_FAIL=0
fi
if (( _UBC_CURR_FAIL > _UBC_PREV_FAIL ))
then
_MSG="${_UBC_NAME} for CT ${_CT_ID} increased with $(( _UBC_CURR_FAIL - _UBC_PREV_FAIL )) [HELD:${_UBC_HELD}/MAX_HELD:${_UBC_MAX_HELD}]"
_STC=1
else
_MSG="${_UBC_NAME} for CT ${_CT_ID} is unchanged [HELD:${_UBC_HELD}/MAX_HELD:${_UBC_MAX_HELD}]"
_STC=0
else
_MSG="${_UBC_NAME} for CT ${_CT_ID} increased with ${_UBC_FAIL} [HELD:${_UBC_HELD}/MAX_HELD:${_UBC_MAX_HELD}]"
_STC=1
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_UBC_HELD}" "${_UBC_MAX_HELD}"
fi
# write current fail count value
if (( ARG_LOG > 0 ))
then
print "${_UBC_CURR_FAIL}" >${_UBC_STATE_FILE}
fi
done
else
warn "unable to find UBC data for CT ID ${_CT_ID}"
continue
fi
done
# add vzubc output to stdout log
print "==== ${_VZUBC_BIN} ${_VZUBC_OPTS} ${_CT_ID} ====" >>${HC_STDOUT_LOG}
# add UBC output to stdout log
print "==== ${_UBC_FILE} ====" >>${HC_STDOUT_LOG}
print "${_UBC_OUTPUT}" >>${HC_STDOUT_LOG}
done
return ${_RC}
return 0
}
# -----------------------------------------------------------------------------
@ -164,9 +248,11 @@ function _show_usage
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with formatted stanzas:
CONFIG : $3 with parameters:
exclude_counters=<ubc_name>,<ubc_name>,...
with formatted stanzas:
ct:<ct_id>
PURPOSE : Checks whether UBC (User Bean Counters) for an OpenVZ containers have
PURPOSE : Checks whether UBC (User Bean Counters) for OpenVZ containers have
increased (failures)
LOG HEALTHY : Supported