Added archival feature (also included code for this into the reporting functions)
This commit is contained in:
parent
3be76e2d22
commit
1c3d9fd77f
@ -37,7 +37,7 @@
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# define the version (YYYY-MM-DD)
|
||||
typeset -r SCRIPT_VERSION="2017-12-22"
|
||||
typeset -r SCRIPT_VERSION="2017-12-26"
|
||||
# location of parent directory containing KSH functions/HC plugins
|
||||
typeset -r FPATH_PARENT="/opt/hc/lib"
|
||||
# location of custom HC configuration files
|
||||
@ -60,6 +60,8 @@ typeset -r LOCK_DIR="${TMP_DIR}/.${SCRIPT_NAME}.lock"
|
||||
typeset -r HC_MSG_FILE="${TMP_DIR}/.${SCRIPT_NAME}.hc.msg.$$" # plugin messages files
|
||||
typeset -r SEP="|"
|
||||
typeset -r LOG_DIR="/var/opt/hc"
|
||||
typeset -r LOG_FILE="${LOG_DIR}/${SCRIPT_NAME}.log"
|
||||
typeset -r ARCHIVE_DIR="${LOG_DIR}/archive"
|
||||
typeset -r EVENTS_DIR="${LOG_DIR}/events"
|
||||
typeset -r STATE_DIR="${LOG_DIR}/state"
|
||||
typeset -r STATE_PERM_DIR="${STATE_DIR}/persistent"
|
||||
@ -82,6 +84,9 @@ typeset HC_STDOUT_LOG=""
|
||||
typeset HC_STDERR_LOG=""
|
||||
typeset LINUX_DISTRO=""
|
||||
typeset LINUX_RELEASE=""
|
||||
typeset ARCHIVE_RC=0
|
||||
typeset DISABLE_RC=0
|
||||
typeset ENABLE_RC=0
|
||||
typeset RUN_RC=0
|
||||
typeset SORT_CMD=""
|
||||
typeset DEBUG_OPTS=""
|
||||
@ -96,6 +101,7 @@ typeset ARG_DISPLAY="" # display is STDOUT by default
|
||||
typeset ARG_FAIL_ID=""
|
||||
typeset ARG_HC=""
|
||||
typeset ARG_HC_ARGS="" # no extra arguments to HC plug-in by default
|
||||
typeset ARG_HISTORY=0 # include historical events is off by default
|
||||
typeset ARG_LAST=0 # report last events
|
||||
typeset ARG_LIST="" # list all by default
|
||||
typeset ARG_LOCK=1 # lock for concurrent script executions is on by default
|
||||
@ -159,6 +165,18 @@ then
|
||||
print -u2 "ERROR: you must define a value for the EXEC_USER setting in $0"
|
||||
exit 1
|
||||
fi
|
||||
# SCRIPT_VERSION
|
||||
if [[ -z "${SCRIPT_VERSION}" ]]
|
||||
then
|
||||
print -u2 "ERROR: you must define a value for the SCRIPT_VERSION setting in $0"
|
||||
exit 1
|
||||
fi
|
||||
# TMP_DIR
|
||||
if [[ -z "${TMP_DIR}" ]]
|
||||
then
|
||||
print -u2 "ERROR: you must define a value for the TMP_DIR setting in $0"
|
||||
exit 1
|
||||
fi
|
||||
# FPATH_PARENT
|
||||
if [[ -z "${FPATH_PARENT}" ]]
|
||||
then
|
||||
@ -211,6 +229,11 @@ else
|
||||
fi
|
||||
|
||||
# check for core directories
|
||||
[[ -d ${ARCHIVE_DIR} ]] || mkdir -p "${ARCHIVE_DIR}" >/dev/null 2>&1
|
||||
if [[ ! -d "${ARCHIVE_DIR}" ]] || [[ ! -w "${ARCHIVE_DIR}" ]]
|
||||
then
|
||||
print -u2 "ERROR: unable to access the archive directory at ${ARCHIVE_DIR}"
|
||||
fi
|
||||
[[ -d ${EVENTS_DIR} ]] || mkdir -p "${EVENTS_DIR}" >/dev/null 2>&1
|
||||
if [[ ! -d "${EVENTS_DIR}" ]] || [[ ! -w "${EVENTS_DIR}" ]]
|
||||
then
|
||||
@ -297,7 +320,7 @@ then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# --check-host,--check/--disable/--enable/--run/--show,--hc
|
||||
# --check-host,--check/--disable/--enable/--run/--show/--archive,--hc
|
||||
if [[ -n "${ARG_HC}" ]] && (( ARG_ACTION == 0 ))
|
||||
then
|
||||
print -u2 "ERROR: you must specify an action for the HC (--check/--disable/--enable/--run/--show)"
|
||||
@ -305,7 +328,7 @@ then
|
||||
fi
|
||||
if (( ARG_CHECK_HOST == 0 ))
|
||||
then
|
||||
if (( ARG_ACTION < 6 )) && [[ -z "${ARG_HC}" ]]
|
||||
if (( ARG_ACTION < 6 || ARG_ACTION == 10 )) && [[ -z "${ARG_HC}" ]]
|
||||
then
|
||||
print -u2 "ERROR: you specify a value for parameter '--hc'"
|
||||
exit 1
|
||||
@ -319,6 +342,15 @@ then
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if (( ARG_ACTION == 10 )) || [[ -n "${ARG_HC_ARGS}" ]]
|
||||
then
|
||||
case "${ARG_HC}" in
|
||||
*,*)
|
||||
print -u2 "ERROR: you can only specify one value for '--hc' in combination with '--archive'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
else
|
||||
# host checking has no other messages to display
|
||||
ARG_VERBOSE=0
|
||||
@ -329,8 +361,8 @@ then
|
||||
ARG_VERBOSE=0
|
||||
ARG_LOG=0
|
||||
fi
|
||||
# --log-dir
|
||||
LOG_FILE="${LOG_DIR}/${SCRIPT_NAME}.log"
|
||||
|
||||
# check log location
|
||||
if (( ARG_LOG != 0 ))
|
||||
then
|
||||
if [[ ! -d "${LOG_DIR}" ]] || [[ ! -w "${LOG_DIR}" ]]
|
||||
@ -428,10 +460,10 @@ Execute/report simple health checks (HC) on UNIX hosts.
|
||||
|
||||
Syntax: ${SCRIPT_DIR}/${SCRIPT_NAME} [--help] | [--help-terse] | [--version] |
|
||||
[--list=<needle>] | [--list-core] | [--fix-symlinks] | (--disable-all | enable-all) |
|
||||
(--check-host | ((--check | --enable | --disable | --run | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">]))
|
||||
(--check-host | ((--archive | --check | --enable | --disable | --run | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">]))
|
||||
[--display=<method>] ([--debug] [--debug-level=<level>]) [--no-monitor] [--no-log] [--no-lock]
|
||||
[--notify=<method_list>] [--mail-to=<address_list>] [--sms-to=<sms_rcpt> --sms-provider=<name>]
|
||||
[--report=<method> ( ([--last] | [--today]) | ([--reverse] [--id=<fail_id> [--detail]]) ) ]
|
||||
[--report=<method> ( ([--last] | [--today]) | ([--reverse] [--id=<fail_id> [--detail]] [--with-history]) ) ]
|
||||
|
||||
EOT
|
||||
|
||||
@ -440,6 +472,7 @@ then
|
||||
cat << EOT
|
||||
Parameters:
|
||||
|
||||
--archive : move events from the HC log file into archive log files
|
||||
--check : display HC state.
|
||||
--check-host : execute all configured HC(s) (see check_host.conf)
|
||||
--config-file : custom configuration file for a HC, may only be specified when executing a single HC plugin.
|
||||
@ -477,6 +510,7 @@ Parameters:
|
||||
--sms-to : name of person or group to which a sms alert will be send to [requires SMS core plugin]
|
||||
--today : show today's events (HC and their combined STC value)
|
||||
--version : show the timestamp of the script.
|
||||
--with-history : also include events that have been archived already (reporting)
|
||||
|
||||
EOT
|
||||
fi
|
||||
@ -598,6 +632,9 @@ CMD_LINE="$*"
|
||||
for CMD_PARAMETER in ${CMD_LINE}
|
||||
do
|
||||
case ${CMD_PARAMETER} in
|
||||
-archive|--archive)
|
||||
ARG_ACTION=10
|
||||
;;
|
||||
-check|--check)
|
||||
ARG_ACTION=1
|
||||
;;
|
||||
@ -647,6 +684,15 @@ do
|
||||
-enable-all|--enable-all)
|
||||
ARG_ACTION=7
|
||||
;;
|
||||
-f|-fix-symlinks|--fix-symlinks)
|
||||
read_config
|
||||
check_config
|
||||
build_fpath
|
||||
check_shell
|
||||
check_user
|
||||
fix_symlinks
|
||||
exit 0
|
||||
;;
|
||||
-hc=*)
|
||||
ARG_HC="${CMD_PARAMETER#-hc=}"
|
||||
;;
|
||||
@ -659,14 +705,8 @@ do
|
||||
--hc-args=*)
|
||||
ARG_HC_ARGS="${CMD_PARAMETER#--hc-args=}"
|
||||
;;
|
||||
-f|-fix-symlinks|--fix-symlinks)
|
||||
read_config
|
||||
check_config
|
||||
build_fpath
|
||||
check_shell
|
||||
check_user
|
||||
fix_symlinks
|
||||
exit 0
|
||||
-with-history|--with-history)
|
||||
ARG_HISTORY=1
|
||||
;;
|
||||
-id=*)
|
||||
ARG_FAIL_ID="${CMD_PARAMETER#-id=}"
|
||||
@ -1028,7 +1068,8 @@ case ${ARG_ACTION} in
|
||||
exists_hc "${HC_DISABLE}" && die "cannot find HC: ${HC_DISABLE}"
|
||||
log "disabling HC: ${HC_DISABLE}"
|
||||
touch "${STATE_PERM_DIR}/${HC_DISABLE}.disabled" >/dev/null 2>&1
|
||||
if (( $? == 0 ))
|
||||
DISABLE_RC=$1
|
||||
if (( DISABLE_RC == 0 ))
|
||||
then
|
||||
log "successfully disabled HC: ${HC_DISABLE}"
|
||||
else
|
||||
@ -1046,7 +1087,8 @@ case ${ARG_ACTION} in
|
||||
[[ -d ${STATE_PERM_DIR} ]] || \
|
||||
die "state directory does not exist, all HC(s) are enabled"
|
||||
rm -f "${STATE_PERM_DIR}/${HC_ENABLE}.disabled" >/dev/null 2>&1
|
||||
if (( $? == 0 ))
|
||||
ENABLE_RC=$?
|
||||
if (( ENABLE_RC == 0 ))
|
||||
then
|
||||
log "successfully enabled HC: ${HC_ENABLE}"
|
||||
else
|
||||
@ -1061,6 +1103,24 @@ case ${ARG_ACTION} in
|
||||
9) # list HC plugins
|
||||
list_hc "" "${ARG_LIST}"
|
||||
;;
|
||||
10) # archive log entries
|
||||
exists_hc "${ARG_HC}" && die "cannot find HC: ${ARG_HC}"
|
||||
log "archiving log entries for ${ARG_HC}..."
|
||||
archive_hc "${ARG_HC}"
|
||||
ARCHIVE_RC=$?
|
||||
case ${ARCHIVE_RC} in
|
||||
0)
|
||||
log "no archiving needed for ${ARG_HC}"
|
||||
;;
|
||||
1)
|
||||
log "successfully archived log entries for ${ARG_HC}"
|
||||
;;
|
||||
2)
|
||||
log "failed to archive log entries for ${ARG_HC} [RC=${ARCHIVE_RC}]"
|
||||
EXIT_CODE=1
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
# finish up work
|
||||
|
@ -23,6 +23,64 @@
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: archive_hc()
|
||||
# DOES: archive log entries for a given HC
|
||||
# EXPECTS: HC name [string]
|
||||
# RETURNS: 0=no archiving needed; 1=archiving OK; 2=archiving NOK
|
||||
# REQUIRES: n/a
|
||||
function archive_hc
|
||||
{
|
||||
(( ARG_DEBUG != 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset HC_NAME="$1"
|
||||
typeset ARCHIVE_FILE=""
|
||||
typeset YEAR_MONTH=""
|
||||
typeset LOG_COUNT=0
|
||||
typeset ARCHIVE_RC=0
|
||||
typeset SAVE_HC_LOG="${HC_LOG}.$$"
|
||||
typeset TMP_FILE="${TMP_DIR}/.$0.tmp.archive.$$"
|
||||
|
||||
# set local trap for cleanup
|
||||
trap "rm -f ${TMP_FILE} ${SAVE_LOG_FILE} >/dev/null 2>&1; return 1" 1 2 3 15
|
||||
|
||||
# isolate messages from HC, find unique %Y-%m combinations
|
||||
grep ".*${SEP}${HC_NAME}${SEP}" ${HC_LOG} 2>/dev/null |\
|
||||
cut -f1 -d"${SEP}" | cut -f1 -d' ' | cut -f1-2 -d'-' | sort -u |\
|
||||
while read YEAR_MONTH
|
||||
do
|
||||
# find all messages for that YEAR-MONTH combination
|
||||
grep "${YEAR_MONTH}.*${SEP}${HC_NAME}${SEP}" ${HC_LOG} >${TMP_FILE}
|
||||
LOG_COUNT=$(wc -l ${TMP_FILE} | cut -f1 -d' ')
|
||||
log "# of new entries to archive: ${LOG_COUNT}"
|
||||
|
||||
# combine existing archived messages and resort
|
||||
ARCHIVE_FILE="${ARCHIVE_DIR}/hc.${YEAR_MONTH}.log"
|
||||
cat ${ARCHIVE_FILE} ${TMP_FILE} | sort -u >${ARCHIVE_FILE}
|
||||
LOG_COUNT=$(wc -l ${ARCHIVE_FILE} | cut -f1 -d' ')
|
||||
log "# entries in ${ARCHIVE_FILE} now: ${LOG_COUNT}"
|
||||
|
||||
# remove archived messages from the $HC_LOG (but create a backup first!)
|
||||
cp -p ${HC_LOG} ${SAVE_HC_LOG} 2>/dev/null
|
||||
comm -23 ${HC_LOG} ${ARCHIVE_FILE} 2>/dev/null >${TMP_FILE}
|
||||
if [[ -s ${TMP_FILE} ]]
|
||||
then
|
||||
mv ${TMP_FILE} ${HC_LOG} 2>/dev/null
|
||||
LOG_COUNT=$(wc -l ${HC_LOG} | cut -f1 -d' ')
|
||||
log "# entries in ${HC_LOG} now: ${LOG_COUNT}"
|
||||
ARCHIVE_RC=1
|
||||
else
|
||||
warn "a problem occurred. Rolling back archival"
|
||||
mv ${SAVE_HC_LOG} ${HC_LOG} 2>/dev/null
|
||||
ARCHIVE_RC=2
|
||||
fi
|
||||
done
|
||||
|
||||
# clean up temporary file(s)
|
||||
rm -f ${TMP_FILE} ${SAVE_HC_LOG} >/dev/null 2>&1
|
||||
|
||||
return ${ARCHIVE_RC}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: debug()
|
||||
# DOES: handle debug messages
|
||||
@ -391,7 +449,7 @@ if (( DO_NOTIFY_SMS != 0 )) && [[ -z "${ARG_SMS_PROVIDER}" ]]
|
||||
then
|
||||
die "you cannot specify '--notify=sms' without '--sms-provider'"
|
||||
fi
|
||||
# --report/--detail/--id/--reverse/--last/--today
|
||||
# --report/--detail/--id/--reverse/--last/--today/--with-history
|
||||
if (( DO_REPORT_STD != 0 ))
|
||||
then
|
||||
if (( ARG_DETAIL != 0 )) && [[ -z "${ARG_FAIL_ID}" ]]
|
||||
@ -426,6 +484,11 @@ then
|
||||
then
|
||||
die "you cannot specify '--today' with '--id'"
|
||||
fi
|
||||
# switch on history for --last & --today
|
||||
if (( ARG_LAST != 0 )) || (( ARG_TODAY != 0 ))
|
||||
then
|
||||
ARG_HISTORY=1
|
||||
fi
|
||||
fi
|
||||
if (( DO_REPORT_STD == 0 )) && (( ARG_LAST != 0 ))
|
||||
then
|
||||
@ -439,6 +502,10 @@ if (( DO_REPORT_STD == 0 )) && (( ARG_DETAIL != 0 ))
|
||||
then
|
||||
die "you cannot specify '--detail' without '--report'"
|
||||
fi
|
||||
if (( DO_REPORT_STD == 0 )) && (( ARG_HISTORY != 0 ))
|
||||
then
|
||||
die "you cannot specify '--with-history' without '--report'"
|
||||
fi
|
||||
if (( DO_REPORT_STD == 0 )) && [[ -n "${ARG_FAIL_ID}" ]]
|
||||
then
|
||||
die "you cannot specify '--id' without '--report'"
|
||||
|
@ -30,7 +30,7 @@
|
||||
function report_std
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _VERSION="2017-12-15" # YYYY-MM-DD
|
||||
typeset _VERSION="2017-12-26" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -52,9 +52,19 @@ typeset _HC_LAST_FAIL_ID="-"
|
||||
typeset _HC_LAST_EVENT_FAIL_ID=0
|
||||
typeset _HC_LAST_EVENT_STC=""
|
||||
typeset _ID_NEEDLE=""
|
||||
typeset _LOG_STASH=""
|
||||
typeset _REPORT_LINE=""
|
||||
typeset _SORT_CMD=""
|
||||
|
||||
# which files do we need to examine
|
||||
if (( ARG_HISTORY != 0 ))
|
||||
then
|
||||
set +f # file globbing must be on
|
||||
_LOG_STASH="${HC_LOG} ${ARCHIVE_DIR}/hc.*.log"
|
||||
else
|
||||
_LOG_STASH="${HC_LOG}"
|
||||
fi
|
||||
|
||||
# --last report
|
||||
if (( ARG_LAST != 0 ))
|
||||
then
|
||||
@ -68,14 +78,14 @@ then
|
||||
_HC_LAST_FAIL_ID="-"
|
||||
# find last event or block of events (same timestamp)
|
||||
# (but unfortunately this is only accurate to events within the SAME second!)
|
||||
_HC_LAST_TIME="$(grep ${_HC_LAST} ${HC_LOG} 2>/dev/null | sort -n | cut -f1 -d${SEP} | uniq | tail -1)"
|
||||
_HC_LAST_TIME="$(grep -h ${_HC_LAST} ${_LOG_STASH} 2>/dev/null | sort -n | cut -f1 -d${SEP} | uniq | tail -1)"
|
||||
if [[ -z "${_HC_LAST_TIME}" ]]
|
||||
then
|
||||
_HC_LAST_TIME="-"
|
||||
_HC_LAST_STC="-"
|
||||
else
|
||||
# find all STC codes for the last event and add them up
|
||||
grep "${_HC_LAST_TIME}${SEP}${HC_LAST}" ${HC_LOG} 2>/dev/null |\
|
||||
grep -h "${_HC_LAST_TIME}${SEP}${HC_LAST}" ${_LOG_STASH} 2>/dev/null |\
|
||||
while read -r _REPORT_LINE
|
||||
do
|
||||
_HC_LAST_EVENT_STC=$(print "${_REPORT_LINE}" | cut -f3 -d"${SEP}")
|
||||
@ -99,7 +109,7 @@ else
|
||||
(( ARG_TODAY != 0 )) && _ID_NEEDLE="$(date '+%Y%m%d')" # refers to timestamp of HC FAIL_ID
|
||||
|
||||
# check fail count (look for unique IDs in the 5th field of the HC log)
|
||||
_FAIL_COUNT=$(cut -f5 -d"${SEP}" ${HC_LOG} 2>/dev/null | grep -E -e "${_ID_NEEDLE}" | uniq | wc -l)
|
||||
_FAIL_COUNT=$(cut -f5 -d"${SEP}" ${_LOG_STASH} 2>/dev/null | grep -E -e "${_ID_NEEDLE}" | uniq | wc -l)
|
||||
if (( _FAIL_COUNT != 0 ))
|
||||
then
|
||||
# check for detail or not?
|
||||
@ -123,7 +133,7 @@ else
|
||||
|
||||
# print failed events
|
||||
# no extended grep here and no end $SEP!
|
||||
grep ".*${SEP}.*${SEP}.*${SEP}.*${SEP}${_ID_NEEDLE}" ${HC_LOG} 2>/dev/null |\
|
||||
grep -h ".*${SEP}.*${SEP}.*${SEP}.*${SEP}${_ID_NEEDLE}" ${_LOG_STASH} 2>/dev/null |\
|
||||
${_SORT_CMD} | while read -r _REPORT_LINE
|
||||
do
|
||||
_FAIL_F1=$(print "${_REPORT_LINE}" | cut -f1 -d"${SEP}")
|
||||
@ -141,7 +151,7 @@ else
|
||||
_EVENT_COUNT=1
|
||||
_DIR_PREFIX="$(expr substr ${ARG_FAIL_ID} 1 4)-$(expr substr ${ARG_FAIL_ID} 5 2)"
|
||||
# no extended grep here!
|
||||
grep ".*${SEP}.*${SEP}.*${SEP}.*${SEP}${_ID_NEEDLE}${SEP}" ${HC_LOG} 2>/dev/null |\
|
||||
grep -h ".*${SEP}.*${SEP}.*${SEP}.*${SEP}${_ID_NEEDLE}${SEP}" ${_LOG_STASH} 2>/dev/null |\
|
||||
${_SORT_CMD} | while read -r _REPORT_LINE
|
||||
do
|
||||
_FAIL_F1=$(print "${_REPORT_LINE}" | cut -f1 -d"${SEP}")
|
||||
|
Loading…
x
Reference in New Issue
Block a user