* Improved discovery routine & added log_healthy in check_hpux_ignite_backup()

* Improved config file comments
This commit is contained in:
Patrick Van der Veken 2019-01-31 13:39:06 +01:00
parent 3fdb464eb2
commit ada5fbb1e2
25 changed files with 129 additions and 64 deletions

View File

@ -12,7 +12,7 @@ state_file=discovered.file_change
# specify if file names need to be checked/flagged for meta characters [yes|no]
# [default: yes]
do_meta_check=yes
do_meta_check="yes"
# specify files or directories check (directories are automatically expanded)
# format:
@ -20,9 +20,10 @@ do_meta_check=yes
# excl:<full path> (excludes a path)
# note: shell wild cards in the file paths are not supported! Exclusions have
# a higher priority than inclusions.
incl:file1
incl:dir1
excl:dir1/file2
# Examples:
#incl:file1
#incl:dir1
#excl:dir1/file2
#******************************************************************************

View File

@ -8,7 +8,7 @@
# check clone age? (yes|no)
# [default: yes]
check_clone=yes
check_clone="yes"
# maximum days before the drd clone gets flagged as 'too old'
# [default: 30]
@ -16,7 +16,7 @@ clone_age=30
# check sync age? (yes|no)
# [default: yes]
check_sync=yes
check_sync="yes"
# maximum days before the drd sync gets flagged as 'too old'
# [default: 30]

View File

@ -12,7 +12,7 @@ state_file=discovered.file_change
# specify if file names need to be checked/flagged for meta characters [yes|no]
# [default: yes]
do_meta_check=yes
do_meta_check="yes"
# specify files or directories check (directories are automatically expanded)
# format:
@ -20,9 +20,10 @@ do_meta_check=yes
# excl:<full path> (excludes a path)
# note: shell wild cards in the file paths are not supported! Exclusions have
# a higher priority than inclusions.
incl:file1
incl:dir1
excl:dir1/file2
# Examples:
#incl:file1
#incl:dir1
#excl:dir1/file2
#******************************************************************************

View File

@ -8,7 +8,7 @@
# specify whether non-mounted FS should be alerted on (yes|no)
# [default: yes]
ignore_missing_fs=yes
ignore_missing_fs="yes"
# specify mount points and their options (use the mount options as displayed
# by the 'mount' command; order of the options is not important)

View File

@ -9,15 +9,15 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# specify whether to perform inode usage check
# [default: yes]
check_inodes_usage=yes
check_inodes_usage="yes"
# specify whether to perform space usage check
# [default: yes]
check_space_usage=yes
check_space_usage="yes"
# specify the maximum % of inodes usage a filesystem may reach (general threshold)
# [default: 90]
@ -31,6 +31,7 @@ max_space_usage=90
# for a given filesystem, the general threshold will be used (see above). When
# defining a threshold of 0 (zero), then the check will for this give filesystem
# will be skipped (this allows for exclusion of filesystems)
# Format:
# [fs:<mount_point>:<max_inode_usage_%>:<max_space_usage_%]
# Examples:
# check myfs1 with a custom inodes + space usage threshold

View File

@ -10,6 +10,7 @@
# [parid;runtime_status;boot_status]
# runtime_status should be 'on' or 'off'
# boot_status should be 'auto' or 'manual'
# Examples:
#1;on;auto

View File

@ -6,6 +6,11 @@
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# maximum days before backup gets flagged as 'too old'
# [default: 14]
backup_age=14

View File

@ -12,11 +12,11 @@ ioscan_classes="ctl,diag,disk,ext_bus,fc,fcp,i2o,ipmi,lan,lvm,olar,vxvm"
# specify whether ioscan needs to be run in kernel-cached or not (yes|no)
# [default: yes]
kernel_mode=yes
kernel_mode="yes"
# specify whether ioscan needs to report in Agile view or not (yes|no)
# [default: yes]
agile_view=yes
agile_view="yes"
#******************************************************************************

View File

@ -8,8 +8,9 @@
# specify kernel parameters and their values or expression (as reported by kctune)
# [param:<param_name>:<param_value>]
param:nproc:5000
param:filecache_max:50%
# Examples:
#param:nproc:5000
#param:filecache_max:50%
#******************************************************************************

View File

@ -13,7 +13,8 @@ max_kcusage=90
# specify individual kernel parameters and their maximum usage threshold (percentage)
# definitions here will overrides max_kcusage for these parameters
# [param:<param_name>:<param_threshold>]
param:nproc:70
# Examples:
# param:nproc:70
# specify parameters that should be excluded from the check
# comma-separated list (as shown by kctune/kcusage)

View File

@ -9,7 +9,7 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# maximum allowed offset (in milliseconds (positive integers only))
# [default: 500]

View File

@ -9,7 +9,7 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# list of OVPA daemons to check (comma-separated list)
# [default: oacore,midaemon,perfalarm,ttd,ovcd,ovbbccb,perfd (v12)]

View File

@ -9,7 +9,7 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# OE version that should be checked (version string as displayed by 'swlist')
# [default: none]
@ -21,7 +21,7 @@ required_patches=""
# check that all installed filesets are configured?
# [default: yes]
check_filesets=yes
check_filesets="yes"
# list of filesets to exclude from the check (comma-separated list)
# [default: none]

View File

@ -9,11 +9,11 @@
# specify whether to also log passed health checks (yes|no)
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# specify whether to check for new events? (yes|no)
# [default: yes]
check_events=yes
check_events="yes"
# specify the age interval for which to check events
# (format: X:mm|X:hh|X:dd|X:yy, where X stands for an integer and
@ -32,7 +32,7 @@ events_severity="CRITICAL"
# specify whether to send a test event? (yes|no)
# [default: no]
send_test_event=yes
send_test_event="yes"
# specify wait time for test event (seconds)
# [default: 60]

View File

@ -7,6 +7,7 @@
#******************************************************************************
# list of required cluster statuses (format : "parameter=value")
# Examples:
#status=up

View File

@ -7,6 +7,7 @@
#******************************************************************************
# list of required package statuses (format : "package_name:parameter=value")
# Examples:
#my_package|status=up

View File

@ -7,9 +7,10 @@
#******************************************************************************
# client name; maximum amount of warnings allowed; maximum backup age (hours/days/weeks)
testclient1;0;10h
testclient2;0;2d
testclient3;0;5w
# Examples:
#testclient1;0;10h
#testclient2;0;2d
#testclient3;0;5w
#******************************************************************************

View File

@ -12,7 +12,7 @@ state_file=discovered.file_change
# specify if file names need to be checked/flagged for meta characters [yes|no]
# [default: yes]
do_meta_check=yes
do_meta_check="yes"
# specify files or directories check (directories are automatically expanded)
# format:
@ -20,9 +20,10 @@ do_meta_check=yes
# excl:<full path> (excludes a path)
# note: shell wild cards in the file paths are not supported! Exclusions have
# a higher priority than inclusions.
incl:file1
incl:dir1
excl:dir1/file2
# Examples:
#incl:file1
#incl:dir1
#excl:dir1/file2
#******************************************************************************

View File

@ -9,15 +9,15 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# specify whether to perform inode usage check
# [default: yes]
check_inodes_usage=yes
check_inodes_usage="yes"
# specify whether to perform space usage check
# [default: yes]
check_space_usage=yes
check_space_usage="yes"
# specify the maximum % of inodes usage a filesystem may reach (general threshold)
# [default: 90]
@ -31,6 +31,7 @@ max_space_usage=90
# for a given filesystem, the general threshold will be used (see above). When
# defining a threshold of 0 (zero), then the check will for this give filesystem
# will be skipped (this allows for exclusion of filesystems)
# Format:
# [fs:<mount_point>:<max_inode_usage_%>:<max_space_usage_%]
# Examples:
# check myfs1 with a custom inodes + space usage threshold

View File

@ -9,15 +9,15 @@
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy=yes
log_healthy="yes"
# whether to force the use of chronyd?
# [default: no]
force_chrony=no
force_chrony="no"
# whether to force the use of ntpd?
# [default: no]
force_ntp=no
force_ntp="no"
# maximum allowed offset (in milliseconds (positive integers only)
# [default: 500]
@ -25,7 +25,7 @@ max_offset=500
# use IPv4 for ntpq
# [default: no]
ntpq_use_ipv4=no
ntpq_use_ipv4="no"
#******************************************************************************

View File

@ -17,7 +17,8 @@ log_healthy="no"
# user:<user name>;<limit name>;<soft threshold in %>;<hard threshold in %>
# note: <limit name> must exactly match the descriptor in /proc/<PID>/limits
# thresholds are optional (will not be checked when not specified)
user;postfix;Max open files;;75
# Examples:
#user;postfix;Max open files;;75
# -- process stanzas --
# supported limit checks: Max open files
@ -25,7 +26,8 @@ user;postfix;Max open files;;75
# process:<process name>;<limit name>;<soft threshold in %>;<hard threshold in %>
# note: <limit name> must exactly match the descriptor in /proc/<PID>/limits
# thresholds are optional (will not be checked when not specified)
process;nfsd;Max open files;100;80
# Examples:
#process;nfsd;Max open files;100;80
#******************************************************************************

View File

@ -7,6 +7,7 @@
#******************************************************************************
# list of required cluster statuses (format : "parameter=value")
# Examples:
#status=up

View File

@ -7,6 +7,7 @@
#******************************************************************************
# list of required package statuses (format : "package_name:parameter=value")
# Examples:
#my_package|status=up

View File

@ -10,6 +10,7 @@
# [ctid;runtime_status;boot_status]
# runtime_status should be 'running' or 'stopped'
# boot_status should be 'yes' or 'no'
# Examples:
#100;running;yes

View File

@ -28,6 +28,7 @@
# @(#) 2016-06-03: small fix [Patrick Van der Veken]
# @(#) 2018-10-28: fixed (linter) errors [Patrick Van der Veken]
# @(#) 2019-01-24: arguments fix [Patrick Van der Veken]
# @(#) 2019-01-31: Improve discovery routine + add log_healthy [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
@ -39,7 +40,9 @@ function check_hpux_ignite_backup
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
# backup DONE identifier
typeset _IGNITE_NEEDLE="^DONE"
typeset _VERSION="2019-01-24" # YYYY-MM-DD
typeset _IGNITE_SERVER_FILE="/var/opt/ignite/server/ignite.defs"
typeset _IGNITE_CLIENTS_DIR="/var/opt/ignite/clients"
typeset _VERSION="2019-01-31" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="HP-UX" # uname -s match
# ------------------------- CONFIGURATION ends here ---------------------------
@ -50,6 +53,8 @@ typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _BACKUP_AGE=0
typeset _EXCLUDE_HOSTS=""
typeset _IGNITE_HOST=""
@ -89,24 +94,55 @@ esac
log "backup age to check: ${_BACKUP_AGE} days"
_EXCLUDE_HOSTS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'exclude_hosts')
[[ -n "${_EXCLUDE_HOSTS}" ]] && log "excluding hosts: $(print ${_EXCLUDE_HOSTS})"
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# check if this host is an Ignite-UX server
if [[ ! -f ${_IGNITE_SERVER_FILE} ]]
then
warn "host is not an Ignite-UX server"
return 1
fi
# perform check on Ignite 'client_status' files
if [[ -d /var/opt/ignite/clients ]]
if [[ -d ${_IGNITE_CLIENTS_DIR} ]]
then
_OLD_PWD="$(pwd)"
# shellcheck disable=SC2164
cd /var/opt/ignite/clients
cd ${_IGNITE_CLIENTS_DIR}
# shellcheck disable=SC2181
if (( $? > 0 ))
then
_MSG="unable to run command: cd /var/opt/ignite/clients"
log_hc "$0" 1 "${_MSG}"
warn "unable to run command: {cd ${_IGNITE_CLIENTS_DIR}}"
# dump debug info
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
return 1
fi
# check backup states
find * -prune -type l | while read _IGNITE_HOST
find -- * -prune -type l 2>/dev/null | while read -r _IGNITE_HOST
do
# check exclude
[[ "${_EXCLUDE_HOSTS#*${_IGNITE_HOST}}" != "${_EXCLUDE_HOSTS}" ]] && continue
@ -146,12 +182,15 @@ then
fi
# handle unit result
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
_STC=0
fi
done
# check backup ages
find * -prune -type l | while read _IGNITE_HOST
find -- * -prune -type l 2>/dev/null | while read -r _IGNITE_HOST
do
# check exclude
[[ "${_EXCLUDE_HOSTS#*${_IGNITE_HOST}}" != "${_EXCLUDE_HOSTS}" ]] && continue
@ -175,23 +214,27 @@ then
fi
# handle unit result
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
_STC=0
fi
done
# shellcheck disable=SC2164
cd "${_OLD_PWD}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
_MSG="unable to run command: cd /var/opt/ignite/clients"
_MSG="unable to run command: {cd ${_IGNITE_CLIENTS_DIR}}"
log_hc "$0" 1 "${_MSG}"
# dump debug info
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
return 1
fi
else
_MSG="Host is not an Ignite/UX server"
log_hc "$0" ${_STC} "${_MSG}"
warn "could not access/find the Ignite-UX's clients directory at ${_IGNITE_CLIENTS_DIR}"
return 1
fi
return 0
@ -208,6 +251,7 @@ CONFIG : $3 with:
PURPOSE : Checks the state and age of saved Ignite-UX client backups (should only be
run only on the Ignite-UX server). Backups with warnings are considered
to OK. Backups older than \$backup_age will not pass the health check.
LOG HEALTHY : Supported
EOT