From 982a237d77c6796bb0f3c98ac172268ee597dce0 Mon Sep 17 00:00:00 2001 From: Patrick Van der Veken Date: Thu, 30 Jan 2020 11:43:36 +0100 Subject: [PATCH 1/3] Added possbility to do checks based on weekday in check_exadata_zfs_share_replication plugin Added data_has_newline() --- ...ck_exadata_zfs_share_replication.conf.dist | 23 ++-- .../check_exadata_zfs_share_usage.conf.dist | 1 + opt/hc/lib/core/include_data.sh | 26 +++- .../check_exadata_zfs_share_replication.sh | 116 +++++++++++------- .../exadata/check_exadata_zfs_share_usage.sh | 16 ++- 5 files changed, 116 insertions(+), 66 deletions(-) diff --git a/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist b/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist index 5c433e4..35142ce 100644 --- a/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist +++ b/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist @@ -33,21 +33,22 @@ max_replication_lag=300 # will for this given share will be skipped (this allows for exclusion of shares) # In order to check share(s) for a given ZFS appliance at least one configuration # entry must be present: either a wildcard or custom entry. +# Caveat: any share must finally resolve to one entry only. # Format: -# zfs:::::[] +# zfs:::::[]:[day1,day2,..|*>] # Examples: -# check rep_share1 on myzfs1 with a custom threshold of 300 seconds -# zfs:myzfs1:rep_share1:*:*:600 -# check all shares of myzfs2 with a custom threshold of 1200 seconds -# zfs:myzfs2:*:*:*:1200 -# check all shares of myzfs3 with the general threshold -# zfs:myzfs3:*:*:*: +# check rep_share1 on myzfs1 with a custom threshold of 300 seconds on every day of the week +# zfs:myzfs1:rep_share1:*:*:600:* +# check all shares of myzfs2 with a custom threshold of 1200 seconds on Sunday and Monday +# zfs:myzfs2:*:*:*:1200:Sun,Mon +# check all shares of myzfs3 with the general threshold but only on Friday +# zfs:myzfs3:*:*:*:Fri # disable all shares of myzfs4 from checking -# zfs:myzfs4:*:*:*:0 +# zfs:myzfs4:*:*:*:0:* # disable check of rep_share7 on myzfs5 -# zfs:myzfs5:rep_share7:*:*:0 -# check that rep_share4 on myzfs6 is inactive -# zfs:myzfs6:rep_share4:false:*: +# zfs:myzfs5:rep_share7:*:*:0:* +# check that rep_share4 on myzfs6 is inactive (every day of the week) +# zfs:myzfs6:rep_share4:false:*:* #****************************************************************************** diff --git a/etc/opt/hc/check_exadata_zfs_share_usage.conf.dist b/etc/opt/hc/check_exadata_zfs_share_usage.conf.dist index d18c029..e8c749c 100644 --- a/etc/opt/hc/check_exadata_zfs_share_usage.conf.dist +++ b/etc/opt/hc/check_exadata_zfs_share_usage.conf.dist @@ -33,6 +33,7 @@ max_space_usage=90 # will for this given share will be skipped (this allows for exclusion of shares) # In order to check share(s) for a given ZFS appliance at least one configuration # entry must be present: either a wildcard or custom entry. +# Caveat: any share must finally resolve to one entry only. # Format: # zfs::::[] # Examples: diff --git a/opt/hc/lib/core/include_data.sh b/opt/hc/lib/core/include_data.sh index df75bc5..d0559b0 100755 --- a/opt/hc/lib/core/include_data.sh +++ b/opt/hc/lib/core/include_data.sh @@ -30,7 +30,7 @@ # RETURNS: 0 function version_include_data { -typeset _VERSION="2019-07-14" # YYYY-MM-DD +typeset _VERSION="2020-01-27" # YYYY-MM-DD print "INFO: $0: ${_VERSION#version_*}" @@ -196,10 +196,30 @@ done return 0 } +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: data_has_newline() +# DOES: checks if a string contains newlines +# EXPECTS: $1=haystack [string] +# OUTPUTS: n/a +# RETURNS: 0=no newline found; 1=newlines found +# REQUIRES: n/a +function data_has_newline +{ +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" + +typeset _HAYSTACK="${1}" + +typeset _COUNT=$(print -R "${_HAYSTACK}" | wc -l 2>/dev/null) + +(( _COUNT > 1 )) && return 1 + +return 0 +} + # ----------------------------------------------------------------------------- # @(#) FUNCTION: data_magic_quote() # DOES: magically quotes a needle in a string (default needle is: %) -# EXPECTS: $1=to be magically quoted [string]; $2=needle [string] +# EXPECTS: to be magically quoted [string]; $2=needle [string] # OUTPUTS: magically quoted [string] # RETURNS: n/a # REQUIRES: n/a @@ -217,7 +237,7 @@ return 0 # ----------------------------------------------------------------------------- # @(#) FUNCTION: data_magic_unquote() # DOES: magically unquotes a needle in a string (default needle is: %) -# EXPECTS: $1=to be magically unquoted [string]; $2=needle [string] +# EXPECTS: to be magically unquoted [string]; $2=needle [string] # OUTPUTS: magically unquoted [string] # RETURNS: n/a # REQUIRES: n/a diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh index 8d72eca..226c683 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh @@ -20,7 +20,8 @@ # DOES: see _show_usage() # EXPECTS: see _show_usage() # REQUIRES: data_comma2space(), data_contains_string(), data_get_lvalue_from_config(), -# dump_logs(), init_hc(), linux_exec_ssh(), log_hc(), warn() +# data_has_newline(), dump_logs(), init_hc(), linux_exec_ssh(), +# log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] @@ -28,7 +29,8 @@ # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken] # @(#) 2019-05-14: small fixes [Patrick Van der Veken] -# @(#) 2019-07-05: help fix [Patrick Van der Veken] +# @(#) 2020-01-27: addition of day check option + +# @(#) newline config value check [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -38,7 +40,7 @@ function check_exadata_zfs_share_replication { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-07-05" # YYYY-MM-DD +typeset _VERSION="2020-01-27" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # replication query script -- DO NOT CHANGE -- # prj1/share1:true:idle:success:111 @@ -75,12 +77,14 @@ typeset _CFG_SSH_USER="" typeset _CFG_ZFS_HOSTS="" typeset _CFG_ZFS_HOST="" typeset _CFG_ZFS_LINE="" +typeset _CFG_REPLICATION_DAYS="" typeset _REPLICATION_ENABLED="" typeset _REPLICATION_LAG="" typeset _REPLICATION_RESULT="" typeset _SSH_BIN="" typeset _SSH_OUTPUT="" typeset _ZFS_DATA="" +typeset _WEEKDAY=$(data_lc "$(date '+%a' 2>/dev/null)") # Sun # handle arguments (originally comma-separated) for _ARG in ${_ARGS} @@ -210,6 +214,7 @@ do _CFG_REPLICATION_ENABLED="" _CFG_REPLICATION_RESULT="" _CFG_REPLICATION_LAG="" + _CFG_REPLICATION_DAYS="" # which values to use (general or custom?), keep in mind wildcards (custom will overrule wildcard entry) _CFG_ZFS_LINE=$(grep -E -e "^zfs:${_ZFS_HOST}:[*]:" ${_CONFIG_FILE} 2>/dev/null) @@ -219,6 +224,7 @@ do _CFG_REPLICATION_ENABLED=$(print "${_CFG_ZFS_LINE}" | cut -f4 -d':' 2>/dev/null) _CFG_REPLICATION_RESULT=$(print "${_CFG_ZFS_LINE}" | cut -f5 -d':' 2>/dev/null) _CFG_REPLICATION_LAG=$(print "${_CFG_ZFS_LINE}" | cut -f6 -d':' 2>/dev/null) + _CFG_REPLICATION_DAYS=$(print "${_CFG_ZFS_LINE}" | cut -f7 -d':' 2>/dev/null) # null value means general threshold if [[ -z "${_CFG_REPLICATION_LAG}" ]] then @@ -229,10 +235,18 @@ do _CFG_ZFS_LINE=$(grep -E -e "^zfs:${_ZFS_HOST}:${_REPLICATION_NAME}:" ${_CONFIG_FILE} 2>/dev/null) if [[ -n "${_CFG_ZFS_LINE}" ]] then + data_has_newline "${_CFG_ZFS_LINE}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + warn "ignoring ${_ZFS_HOST}:${_REPLICATION_NAME} because it parses to multiple results in ${_CONFIG_FILE}" + continue + fi (( ARG_DEBUG > 0 )) && debug "found custom definition for ${_ZFS_HOST}:${_REPLICATION_NAME} in configuration file ${_CONFIG_FILE}" _CFG_REPLICATION_ENABLED=$(print "${_CFG_ZFS_LINE}" | cut -f4 -d':' 2>/dev/null) _CFG_REPLICATION_RESULT=$(print "${_CFG_ZFS_LINE}" | cut -f5 -d':' 2>/dev/null) _CFG_REPLICATION_LAG=$(print "${_CFG_ZFS_LINE}" | cut -f6 -d':' 2>/dev/null) + _CFG_REPLICATION_DAYS=$(print "${_CFG_ZFS_LINE}" | cut -f7 -d':' 2>/dev/null) # null value means general threshold if [[ -z "${_CFG_REPLICATION_LAG}" ]] then @@ -262,56 +276,65 @@ do # fixed defaults if missing [[ -z "${_CFG_REPLICATION_ENABLED}" || "${_CFG_REPLICATION_ENABLED}" = '*' ]] && _CFG_REPLICATION_ENABLED="true" [[ -z "${_CFG_REPLICATION_RESULT}" || "${_CFG_REPLICATION_RESULT}" = '*' ]] && _CFG_REPLICATION_RESULT="success" + _CFG_REPLICATION_DAYS=$(data_lc "${_CFG_REPLICATION_DAYS}") + [[ -z "${_CFG_REPLICATION_DAYS}" || "${_CFG_REPLICATION_DAYS}" = '*' ]] && _CFG_REPLICATION_DAYS="${_WEEKDAY}" # perform checks - # check replication enabled state (active or not?) - if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] - then - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" - _STC=1 - else - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]" - _STC=0 - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_ENABLED}" "${_CFG_REPLICATION_ENABLED}" - fi - # check replication last result (success or not?) - if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] - then - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" - _STC=1 - else - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]" - _STC=0 - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_RESULT}" "${_CFG_REPLICATION_RESULT}" - fi - # check replication lag - # caveat: replication lag is at initial replication - data_contains_string "${_REPLICATION_LAG}" "unknown" - # shellcheck disable=SC2181 + # do we need to perform the check today? + data_contains_string "${_CFG_REPLICATION_DAYS}" "${_WEEKDAY}" if (( $? > 0 )) then - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" - _REPLICATION_LAG=-1 - _STC=1 - else - if (( _REPLICATION_LAG > _CFG_REPLICATION_LAG )) + # check replication enabled state (active or not?) + if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] then - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is too big [${_REPLICATION_LAG}>${_CFG_REPLICATION_LAG}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" _STC=1 else - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_LAG}<=${_CFG_REPLICATION_LAG}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]" _STC=0 fi - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_LAG}" "${_CFG_REPLICATION_LAG}" + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_ENABLED}" "${_CFG_REPLICATION_ENABLED}" + fi + # check replication last result (success or not?) + if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] + then + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" + _STC=1 + else + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_RESULT}" "${_CFG_REPLICATION_RESULT}" + fi + # check replication lag + # caveat: replication lag is at initial replication + data_contains_string "${_REPLICATION_LAG}" "unknown" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" + _REPLICATION_LAG=-1 + _STC=1 + else + if (( _REPLICATION_LAG > _CFG_REPLICATION_LAG )) + then + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is too big [${_REPLICATION_LAG}>${_CFG_REPLICATION_LAG}]" + _STC=1 + else + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_LAG}<=${_CFG_REPLICATION_LAG}]" + _STC=0 + fi + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_LAG}" "${_CFG_REPLICATION_LAG}" + fi + else + warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} was not configured for today" fi done @@ -328,11 +351,10 @@ CONFIG : $3 with parameters: log_healthy= ssh_user= ssh_key_file= - ssh_opts= max_replication_lag= and formatted stanzas of: - zfs::::: -PURPOSE : Checks the replication state, sync status and maximum lag of the configured ZFS hosts/shares + zfs:::::: +PURPOSE : Checks the replication state, sync status and maximum lag of the configured ZFS hosts/shares on certain days CLI: zfs > shares > replications > packages > select (action) > show LOG HEALTHY : Supported diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh index 9f1bd9b..0c24555 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh @@ -19,8 +19,8 @@ # @(#) MAIN: check_exadata_zfs_share_usage # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(), -# init_hc(), linux_exec_ssh(), log_hc(), warn() +# REQUIRES: data_comma2space(), data_get_lvalue_from_config, data_has_newline(), +# dump_logs(), init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] @@ -28,7 +28,7 @@ # @(#) 2019-04-09: fix bad math in ZFS script & HC message [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken] # @(#) 2019-05-14: small fixes [Patrick Van der Veken] -# @(#) 2019-07-05: help fix [Patrick Van der Veken] +# @(#) 2020-01-27: newline config value check [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -38,7 +38,7 @@ function check_exadata_zfs_share_usage { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-07-05" # YYYY-MM-DD +typeset _VERSION="2020-01-27" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # usage query script -- DO NOT CHANGE -- # prj1:share1:16 @@ -233,6 +233,13 @@ do _CFG_ZFS_LINE=$(grep -E -e "^zfs:${_ZFS_HOST}:${_PROJECT_NAME}:${_SHARE_NAME}:" ${_CONFIG_FILE} 2>/dev/null) if [[ -n "${_CFG_ZFS_LINE}" ]] then + data_has_newline "${_CFG_ZFS_LINE}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + warn "ignoring ${_ZFS_HOST}:${_PROJECT_NAME}:${_SHARE_NAME} because it parses to multiple results in ${_CONFIG_FILE}" + continue + fi (( ARG_DEBUG > 0 )) && debug "found custom definition for ${_ZFS_HOST}:${_PROJECT_NAME}/${_SHARE_NAME} in configuration file ${_CONFIG_FILE}" _CFG_SPACE_THRESHOLD=$(print "${_CFG_ZFS_LINE}" | cut -f5 -d':' 2>/dev/null) # null value means general threshold @@ -290,7 +297,6 @@ CONFIG : $3 with parameters: log_healthy= ssh_user= ssh_key_file= - ssh_opts= max_space_usage= and formatted stanzas of: zfs:::: From daf50c64cbc7103f8f84cffe0ebdc0cb131a8844 Mon Sep 17 00:00:00 2001 From: Patrick Van der Veken Date: Wed, 4 Mar 2020 10:15:48 +0100 Subject: [PATCH 2/3] Added possibility to do check based on an hourly schedule --- ...ck_exadata_zfs_share_replication.conf.dist | 10 +- opt/hc/lib/core/include_data.sh | 60 ++++++++- .../check_exadata_zfs_share_replication.sh | 114 ++++++++++-------- 3 files changed, 133 insertions(+), 51 deletions(-) diff --git a/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist b/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist index 35142ce..b1c5fd7 100644 --- a/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist +++ b/etc/opt/hc/check_exadata_zfs_share_replication.conf.dist @@ -35,14 +35,18 @@ max_replication_lag=300 # entry must be present: either a wildcard or custom entry. # Caveat: any share must finally resolve to one entry only. # Format: -# zfs:::::[]:[day1,day2,..|*>] +# zfs:::::[]:[day1,day2,..|*>]:[|*>] +# +# : 3 letter day name (case insensitive) +# : 24 hours notation (start and end hours are inclusive) +# # Examples: # check rep_share1 on myzfs1 with a custom threshold of 300 seconds on every day of the week # zfs:myzfs1:rep_share1:*:*:600:* # check all shares of myzfs2 with a custom threshold of 1200 seconds on Sunday and Monday # zfs:myzfs2:*:*:*:1200:Sun,Mon -# check all shares of myzfs3 with the general threshold but only on Friday -# zfs:myzfs3:*:*:*:Fri +# check all shares of myzfs3 with the general threshold but only on Friday between 7am-10m +# zfs:myzfs3:*:*:*:Fri:07-10 # disable all shares of myzfs4 from checking # zfs:myzfs4:*:*:*:0:* # disable check of rep_share7 on myzfs5 diff --git a/opt/hc/lib/core/include_data.sh b/opt/hc/lib/core/include_data.sh index d0559b0..dfa05f9 100755 --- a/opt/hc/lib/core/include_data.sh +++ b/opt/hc/lib/core/include_data.sh @@ -30,7 +30,7 @@ # RETURNS: 0 function version_include_data { -typeset _VERSION="2020-01-27" # YYYY-MM-DD +typeset _VERSION="2020-03-04" # YYYY-MM-DD print "INFO: $0: ${_VERSION#version_*}" @@ -613,6 +613,64 @@ esac return 0 } +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: data_expand_numerical_range() +# DOES: expand numerical range (X-Y) to comma-separated list of numbers +# EXPECTS: [string] +# OUTPUTS: [string] +# RETURNS: 0=no error occurred; <>0=some error occurred +# REQUIRES: n/a +function data_expand_numerical_range +{ +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +typeset _NUM_LIST="" + +case "${1}" in + *-*) + # range operator, expand + # check if there are only 2 operands (fields) + if (( $(print "${1}" | awk -F '-' '{ print NF }' 2>/dev/null) > 2 )) + then + (( ARG_DEBUG > 0 )) && debug "in range $1 found more than one range (-) operator" + return 1 + fi + # check if X < Y + if $(print "${1}" | awk -F '-' '{ if ($1 < $2) { exit 1 }}' 2>/dev/null) + then + (( ARG_DEBUG > 0 )) && debug "in range $1 operator Y is smaller or equal to operator Y" + return 1 + fi + # expand list + _NUM_LIST=$(print "${1}"| awk -F '-' ' + BEGIN { count = 0; } + { + while ($1 + count < $2) { + if (length (NUM_LIST) == 0) { + NUM_LIST = sprintf ("%s", $1 + count); + } else { + NUM_LIST = sprintf ("%s,%s", NUM_LIST, $1 + count); + } + count++; + } + } + END { print NUM_LIST; }') + if [[ -z "${_NUM_LIST}" ]] + then + (( ARG_DEBUG > 0 )) && debug "range conversion returned empty list" + return 1 + else + print "${_NUM_LIST}" + fi + ;; + *) + # no range, return as-is + print "${1}" + ;; +esac + +return 0 +} + # ----------------------------------------------------------------------------- # @(#) FUNCTION: data_encode_url # DOES: encode URL data diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh index 226c683..321fe20 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh @@ -19,9 +19,9 @@ # @(#) MAIN: check_exadata_zfs_share_replication # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), data_contains_string(), data_get_lvalue_from_config(), -# data_has_newline(), dump_logs(), init_hc(), linux_exec_ssh(), -# log_hc(), warn() +# REQUIRES: data_comma2space(), data_contains_string(), data_expand_numerical_range(), +# data_get_lvalue_from_config(), data_has_newline(), data_is_numeric(), +# dump_logs(), init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] @@ -31,6 +31,7 @@ # @(#) 2019-05-14: small fixes [Patrick Van der Veken] # @(#) 2020-01-27: addition of day check option + # @(#) newline config value check [Patrick Van der Veken] +# @(#) 2020-03-05: addition of hour check option # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -40,7 +41,7 @@ function check_exadata_zfs_share_replication { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2020-01-27" # YYYY-MM-DD +typeset _VERSION="2020-03-04" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # replication query script -- DO NOT CHANGE -- # prj1/share1:true:idle:success:111 @@ -78,13 +79,16 @@ typeset _CFG_ZFS_HOSTS="" typeset _CFG_ZFS_HOST="" typeset _CFG_ZFS_LINE="" typeset _CFG_REPLICATION_DAYS="" +typeset _CFG_REPLICATION_HOURS="" typeset _REPLICATION_ENABLED="" +typeset _REPLICATION_HOURS="" typeset _REPLICATION_LAG="" typeset _REPLICATION_RESULT="" typeset _SSH_BIN="" typeset _SSH_OUTPUT="" typeset _ZFS_DATA="" typeset _WEEKDAY=$(data_lc "$(date '+%a' 2>/dev/null)") # Sun +typeset _HOUR=$(data_strip_space "$(date '+%k' 2>/dev/null)") # 7,23 etc # handle arguments (originally comma-separated) for _ARG in ${_ARGS} @@ -215,6 +219,7 @@ do _CFG_REPLICATION_RESULT="" _CFG_REPLICATION_LAG="" _CFG_REPLICATION_DAYS="" + _CFG_REPLICATION_HOURS="" # which values to use (general or custom?), keep in mind wildcards (custom will overrule wildcard entry) _CFG_ZFS_LINE=$(grep -E -e "^zfs:${_ZFS_HOST}:[*]:" ${_CONFIG_FILE} 2>/dev/null) @@ -225,6 +230,7 @@ do _CFG_REPLICATION_RESULT=$(print "${_CFG_ZFS_LINE}" | cut -f5 -d':' 2>/dev/null) _CFG_REPLICATION_LAG=$(print "${_CFG_ZFS_LINE}" | cut -f6 -d':' 2>/dev/null) _CFG_REPLICATION_DAYS=$(print "${_CFG_ZFS_LINE}" | cut -f7 -d':' 2>/dev/null) + _CFG_REPLICATION_HOURS=$(print "${_CFG_ZFS_LINE}" | cut -f8 -d':' 2>/dev/null) # null value means general threshold if [[ -z "${_CFG_REPLICATION_LAG}" ]] then @@ -247,6 +253,7 @@ do _CFG_REPLICATION_RESULT=$(print "${_CFG_ZFS_LINE}" | cut -f5 -d':' 2>/dev/null) _CFG_REPLICATION_LAG=$(print "${_CFG_ZFS_LINE}" | cut -f6 -d':' 2>/dev/null) _CFG_REPLICATION_DAYS=$(print "${_CFG_ZFS_LINE}" | cut -f7 -d':' 2>/dev/null) + _CFG_REPLICATION_HOURS=$(print "${_CFG_ZFS_LINE}" | cut -f8 -d':' 2>/dev/null) # null value means general threshold if [[ -z "${_CFG_REPLICATION_LAG}" ]] then @@ -278,63 +285,76 @@ do [[ -z "${_CFG_REPLICATION_RESULT}" || "${_CFG_REPLICATION_RESULT}" = '*' ]] && _CFG_REPLICATION_RESULT="success" _CFG_REPLICATION_DAYS=$(data_lc "${_CFG_REPLICATION_DAYS}") [[ -z "${_CFG_REPLICATION_DAYS}" || "${_CFG_REPLICATION_DAYS}" = '*' ]] && _CFG_REPLICATION_DAYS="${_WEEKDAY}" + if [[ -z "${_CFG_REPLICATION_HOURS}" || "${_CFG_REPLICATION_HOURS}" = '*' ]] + then + _REPLICATION_HOURS="${_HOUR}" + else + _REPLICATION_HOURS=$(data_expand_numerical_range "${_CFG_REPLICATION_HOURS}") + fi # perform checks # do we need to perform the check today? data_contains_string "${_CFG_REPLICATION_DAYS}" "${_WEEKDAY}" if (( $? > 0 )) then - # check replication enabled state (active or not?) - if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] - then - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" - _STC=1 - else - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]" - _STC=0 - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_ENABLED}" "${_CFG_REPLICATION_ENABLED}" - fi - # check replication last result (success or not?) - if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] - then - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" - _STC=1 - else - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]" - _STC=0 - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_RESULT}" "${_CFG_REPLICATION_RESULT}" - fi - # check replication lag - # caveat: replication lag is at initial replication - data_contains_string "${_REPLICATION_LAG}" "unknown" - # shellcheck disable=SC2181 + # do we need to perform the check this hour? + data_contains_string "${_REPLICATION_HOURS}" "${_HOUR}" if (( $? > 0 )) then - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" - _REPLICATION_LAG=-1 - _STC=1 - else - if (( _REPLICATION_LAG > _CFG_REPLICATION_LAG )) + # check replication enabled state (active or not?) + if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] then - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is too big [${_REPLICATION_LAG}>${_CFG_REPLICATION_LAG}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" _STC=1 else - _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_LAG}<=${_CFG_REPLICATION_LAG}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]" _STC=0 fi - fi - if (( _LOG_HEALTHY > 0 || _STC > 0 )) - then - log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_LAG}" "${_CFG_REPLICATION_LAG}" + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_ENABLED}" "${_CFG_REPLICATION_ENABLED}" + fi + # check replication last result (success or not?) + if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] + then + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" + _STC=1 + else + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_RESULT}" "${_CFG_REPLICATION_RESULT}" + fi + # check replication lag + # caveat: replication lag is at initial replication + data_contains_string "${_REPLICATION_LAG}" "unknown" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" + _REPLICATION_LAG=-1 + _STC=1 + else + if (( _REPLICATION_LAG > _CFG_REPLICATION_LAG )) + then + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is too big [${_REPLICATION_LAG}>${_CFG_REPLICATION_LAG}]" + _STC=1 + else + _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_LAG}<=${_CFG_REPLICATION_LAG}]" + _STC=0 + fi + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_LAG}" "${_CFG_REPLICATION_LAG}" + fi + else + warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} is not configured for this hour/these hours: ${_REPLICATION_HOURS}" fi else - warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} was not configured for today" + warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} is not configured for today" fi done @@ -353,7 +373,7 @@ CONFIG : $3 with parameters: ssh_key_file= max_replication_lag= and formatted stanzas of: - zfs:::::: + zfs:::::::- PURPOSE : Checks the replication state, sync status and maximum lag of the configured ZFS hosts/shares on certain days CLI: zfs > shares > replications > packages > select (action) > show LOG HEALTHY : Supported From 534c935589183bbdfcc0b6321650db6e4c1ee2fb Mon Sep 17 00:00:00 2001 From: Patrick Van der Veken Date: Fri, 6 Mar 2020 10:21:07 +0100 Subject: [PATCH 3/3] Fix for numerical range conversion --- opt/hc/lib/core/include_data.sh | 22 +++++++++++-------- .../check_exadata_zfs_share_replication.sh | 12 +++++----- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/opt/hc/lib/core/include_data.sh b/opt/hc/lib/core/include_data.sh index dfa05f9..12c7fda 100755 --- a/opt/hc/lib/core/include_data.sh +++ b/opt/hc/lib/core/include_data.sh @@ -30,7 +30,7 @@ # RETURNS: 0 function version_include_data { -typeset _VERSION="2020-03-04" # YYYY-MM-DD +typeset _VERSION="2020-03-06" # YYYY-MM-DD print "INFO: $0: ${_VERSION#version_*}" @@ -237,7 +237,7 @@ return 0 # ----------------------------------------------------------------------------- # @(#) FUNCTION: data_magic_unquote() # DOES: magically unquotes a needle in a string (default needle is: %) -# EXPECTS: to be magically unquoted [string]; $2=needle [string] +# EXPECTS: $1=to be magically unquoted [string]; $2=needle [string] # OUTPUTS: magically unquoted [string] # RETURNS: n/a # REQUIRES: n/a @@ -616,7 +616,8 @@ return 0 # ----------------------------------------------------------------------------- # @(#) FUNCTION: data_expand_numerical_range() # DOES: expand numerical range (X-Y) to comma-separated list of numbers -# EXPECTS: [string] +# EXPECTS: $1=numerical range [string] +# $2=flag for leading zeroes <10 [0=do not add (default),1=add] # OUTPUTS: [string] # RETURNS: 0=no error occurred; <>0=some error occurred # REQUIRES: n/a @@ -624,6 +625,7 @@ function data_expand_numerical_range { (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" typeset _NUM_LIST="" +typeset _HAS_LEAD_ZERO=${2:0} case "${1}" in *-*) @@ -641,19 +643,21 @@ case "${1}" in return 1 fi # expand list - _NUM_LIST=$(print "${1}"| awk -F '-' ' + _NUM_LIST=$(print "${1}"| awk -F '-' -v has_lead_zero=${_HAS_LEAD_ZERO} ' BEGIN { count = 0; } { - while ($1 + count < $2) { + while ($1 + count <= $2) { + # add leading zero to sprintf when < 10 + if (has_lead_zero > 0 && $1 + count < 10) { lead_zero = "0" } else { lead_zero = "" } if (length (NUM_LIST) == 0) { - NUM_LIST = sprintf ("%s", $1 + count); + NUM_LIST = sprintf ("%" lead_zero "2d", $1 + count); } else { - NUM_LIST = sprintf ("%s,%s", NUM_LIST, $1 + count); - } + NUM_LIST = sprintf ("%s,%" lead_zero "2d", NUM_LIST, $1 + count); } count++; } } - END { print NUM_LIST; }') + # remove space from end result and print + END { gsub(/[[:space:]]/, "", NUM_LIST); print NUM_LIST; }') if [[ -z "${_NUM_LIST}" ]] then (( ARG_DEBUG > 0 )) && debug "range conversion returned empty list" diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh index 321fe20..7875c7e 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh @@ -31,7 +31,8 @@ # @(#) 2019-05-14: small fixes [Patrick Van der Veken] # @(#) 2020-01-27: addition of day check option + # @(#) newline config value check [Patrick Van der Veken] -# @(#) 2020-03-05: addition of hour check option +# @(#) 2020-03-05: addition of hour check option + fix +# @(#) 2020-03-06: fix for expanding numerical range # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -41,7 +42,7 @@ function check_exadata_zfs_share_replication { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2020-03-04" # YYYY-MM-DD +typeset _VERSION="2020-03-06" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # replication query script -- DO NOT CHANGE -- # prj1/share1:true:idle:success:111 @@ -88,7 +89,7 @@ typeset _SSH_BIN="" typeset _SSH_OUTPUT="" typeset _ZFS_DATA="" typeset _WEEKDAY=$(data_lc "$(date '+%a' 2>/dev/null)") # Sun -typeset _HOUR=$(data_strip_space "$(date '+%k' 2>/dev/null)") # 7,23 etc +typeset _HOUR=$(data_strip_space "$(date '+%H' 2>/dev/null)") # 7,23 etc # handle arguments (originally comma-separated) for _ARG in ${_ARGS} @@ -289,7 +290,8 @@ do then _REPLICATION_HOURS="${_HOUR}" else - _REPLICATION_HOURS=$(data_expand_numerical_range "${_CFG_REPLICATION_HOURS}") + # expand range with leading zeroes + _REPLICATION_HOURS=$(data_expand_numerical_range "${_CFG_REPLICATION_HOURS}" 1) fi # perform checks @@ -351,7 +353,7 @@ do log_hc "$0" ${_STC} "${_MSG}" "${_REPLICATION_LAG}" "${_CFG_REPLICATION_LAG}" fi else - warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} is not configured for this hour/these hours: ${_REPLICATION_HOURS}" + warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} is only configured for this/these hour(s): ${_REPLICATION_HOURS}" fi else warn "check of ${_ZFS_HOST}:${_REPLICATION_NAME} is not configured for today"