Compare commits
No commits in common. "master" and "20190424" have entirely different histories.
3
.github/FUNDING.yml
vendored
3
.github/FUNDING.yml
vendored
@ -1,3 +0,0 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: patvdv
|
@ -90,11 +90,6 @@ Additionally, there may be bundles for display or notification plugins, e.g.:
|
||||
* hc-display-terse
|
||||
* hc-notify-eif
|
||||
* hc-notify-sms
|
||||
* hc-notify-slack
|
||||
|
||||
## Configure & deploy
|
||||
|
||||
An Ansible role is available at: https://github.com/patvdv/kudos.check_health
|
||||
|
||||
## Execute (examples)
|
||||
|
||||
@ -142,14 +137,13 @@ An Ansible role is available at: https://github.com/patvdv/kudos.check_health
|
||||
* **Alerting** on failed health checks:
|
||||
```
|
||||
/opt/hc/bin/check_health.sh --hc=check_hpux_root_crontab --run --notify=mail --mail-to="alert@acme.com"
|
||||
/opt/hc/bin/check_health.sh --hc=check_linux_fs_usage --run --notify=slack
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
### Documentation
|
||||
|
||||
More documentation can be found at <https://www.kudos.be/check_health/>
|
||||
More documentation can be found at http://www.kudos.be/Projects/Health_checker.html
|
||||
|
||||
### Logo
|
||||
|
||||
|
@ -32,14 +32,12 @@ Fileset
|
||||
/opt/hc/lib/platform/aix/check_aix_subsystems.sh
|
||||
/opt/hc/lib/platform/aix/check_aix_sysbackup.sh
|
||||
/opt/hc/lib/platform/aix/check_aix_topasrec.sh
|
||||
/opt/hc/lib/platform/aix/check_aix_uptime.sh
|
||||
/etc/opt/hc
|
||||
/etc/opt/hc/check_aix_file_age.conf.dist
|
||||
/etc/opt/hc/check_aix_file_change.conf.dist
|
||||
/etc/opt/hc/check_aix_root_crontab.conf.dist
|
||||
/etc/opt/hc/check_aix_subsystems.conf.dist
|
||||
/etc/opt/hc/check_aix_sysbackup.conf.dist
|
||||
/etc/opt/hc/check_aix_uptime.conf.dist
|
||||
EOROOTFiles
|
||||
Relocatable: N
|
||||
EOFileset
|
||||
|
@ -1 +0,0 @@
|
||||
*prereq hc_aix.rte
|
@ -1,31 +0,0 @@
|
||||
Package Name: hc_notify_slack
|
||||
Package VRMF: %BUILD_DATE%
|
||||
Update: N
|
||||
Fileset
|
||||
Fileset Name: hc_notify_slack.rte
|
||||
Fileset VRMF: %BUILD_DATE%
|
||||
Fileset Description: Health Checker - core Slack plugin
|
||||
USRLIBLPPFiles
|
||||
EOUSRLIBLPPFiles
|
||||
ROOTLIBLPPFiles
|
||||
Post-installation Script: /export/nim/build/hc_notify_slack/scripts/hc_notify_slack.postinstall
|
||||
Unpost-installation Script: /export/nim/build/hc_notify_slack/scripts/hc_notify_slack.postuninstall
|
||||
EOROOTLIBLPPFiles
|
||||
Bosboot required: N
|
||||
License agreement acceptance required: N
|
||||
Include license files in this package: N
|
||||
Requisites: /export/nim/build/hc_notify_slack/hc_notify_slack.reqs
|
||||
USRFiles
|
||||
EOUSRFiles
|
||||
ROOT Part: Y
|
||||
ROOTFiles
|
||||
/opt/hc/lib
|
||||
/opt/hc/lib/core
|
||||
/opt/hc/lib/platform/aix/notify_slack.sh
|
||||
/etc/opt/hc
|
||||
/etc/opt/hc/core
|
||||
/etc/opt/hc/core/providers
|
||||
/etc/opt/hc/core/providers/notify_slack.conf.dist
|
||||
EOROOTFiles
|
||||
Relocatable: N
|
||||
EOFileset
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) post-install script for hc_notify_slack LPP package
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2022 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of ETC dir
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
print "INFO: starting post-install script ..."
|
||||
|
||||
# copy plugin config file from dist (if needed)
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_slack.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist ${HC_ETC_DIR}/core/providers/notify_slack.conf >/dev/null || \
|
||||
{
|
||||
print -u2 "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks || print -u2 "WARN: updating symlinks failed"
|
||||
else
|
||||
print -u2 "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
|
||||
print "INFO: finished post-install script"
|
||||
|
||||
exit 0
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) post-uninstall script for hc_notify_slack LPP package
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2022 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
print "INFO: starting post-uninstall script ..."
|
||||
|
||||
# remove plugin configuration file (.dist only)
|
||||
if [[ -d ${HC_ETC_DIR}/core/providers ]]
|
||||
then
|
||||
rm -f ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist >/dev/null
|
||||
(( $? == 0 )) || echo "WARN: could not remove plugin config file in directory ${HC_ETC_DIR}/core/providers"
|
||||
fi
|
||||
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks || print -u2 "WARN: updating symlinks failed"
|
||||
else
|
||||
print -u2 "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
|
||||
print "INFO: finished post-uninstall script"
|
||||
|
||||
exit 0
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -17,35 +17,14 @@ sha256sums=('SKIP')
|
||||
package() {
|
||||
cd "${srcdir}/${_pkgname}"
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib
|
||||
install -d -m 755 ${pkgdir}/opt/hc/core
|
||||
install -D -m 755 opt/hc/lib/core/include_exadata.sh ${pkgdir}/opt/hc/lib/core/include_exadata.sh
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib/platform
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib/platform/exadata
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_ib_status.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_ib_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_cluster.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_cluster.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
|
||||
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_alerts.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_alerts.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_celldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_celldisks.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_flash.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_flash.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_griddisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_griddisks.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_luns.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_luns.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_megaraid.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_megaraid.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_zfs_cluster.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_cluster.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_zfs_logs.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_logs.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_exadata_zfs_services.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_services.conf.dist
|
||||
|
@ -25,20 +25,8 @@ This package contains platform/OS specific plugins.
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/core
|
||||
cp ../SOURCES/opt/hc/lib/core/include_exadata.sh $RPM_BUILD_ROOT/opt/hc/lib/core
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_ib_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_ib_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_cluster.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_cluster.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
|
||||
@ -46,15 +34,6 @@ cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
|
||||
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_alerts.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_alerts.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_celldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_celldisks.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_flash.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_flash.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_griddisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_griddisks.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_luns.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_luns.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_megaraid.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_megaraid.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_cluster.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_cluster.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_logs.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_logs.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_services.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_services.conf.dist
|
||||
@ -95,35 +74,14 @@ echo "INFO: finished post-uninstall script"
|
||||
%files
|
||||
%defattr(-,root,root,755)
|
||||
%dir /opt/hc/lib
|
||||
%dir /opt/hc/lib/core
|
||||
%attr(755, root, root) /opt/hc/lib/core/include_exadata.sh
|
||||
%dir /opt/hc/lib/platform
|
||||
%dir /opt/hc/lib/platform/exadata
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_ib_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_cluster.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
|
||||
%dir /etc/opt/hc
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_alerts.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_celldisks.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_flash.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_griddisks.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_luns.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_megaraid.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_megaraid.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_cluster.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_logs.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_services.conf.dist
|
||||
@ -132,20 +90,6 @@ echo "INFO: finished post-uninstall script"
|
||||
|
||||
|
||||
%changelog
|
||||
* Tue Jul 07 2020 <patrick@kudos.be> - 0.5.0
|
||||
- Added check_exadata_ib_status
|
||||
* Fri Jul 07 2019 <patrick@kudos.be> - 0.4.0
|
||||
- Added check_exadata_zfs_cluster
|
||||
* Fri May 14 2019 <patrick@kudos.be> - 0.3.0
|
||||
- Added include_exadata
|
||||
- Added plugin check_exadata_cell_alerts
|
||||
- Added plugin check_exadata_cell_celldisks
|
||||
- Added plugin check_exadata_cell_flash
|
||||
- Added plugin check_exadata_cell_griddisks
|
||||
- Added plugin check_exadata_cell_luns
|
||||
- Added plugin check_exadata_cell_megaraid
|
||||
- Added plugin check_exadata_cell_physicaldisks
|
||||
- Added plugin check_exadata_megaraid
|
||||
* Fri Apr 12 2019 <patrick@kudos.be> - 0.2.0
|
||||
- Added plugin check_exadata_zfs_pool_usage
|
||||
* Tue Mar 26 2019 <patrick@kudos.be> - 0.1.0
|
||||
|
@ -59,11 +59,9 @@ This is the OS/platform plugin package"
|
||||
file_permissions -u 0222 -o root -g sys
|
||||
|
||||
directory ../../../opt/hc/lib/platform/hp-ux=/opt/hc/lib/platform/hp-ux
|
||||
file -m 755 check_hpux_autofs.sh
|
||||
file -m 755 check_hpux_autopath.sh
|
||||
file -m 755 check_hpux_cdsf_cluster.sh
|
||||
file -m 755 check_hpux_cron_status.sh
|
||||
file -m 755 check_hpux_defunct_processes.sh
|
||||
file -m 755 check_hpux_drd_status.sh
|
||||
file -m 755 check_hpux_file_age.sh
|
||||
file -m 755 check_hpux_file_change.sh
|
||||
@ -73,7 +71,7 @@ This is the OS/platform plugin package"
|
||||
file -m 755 check_hpux_ignite_backup.sh
|
||||
file -m 755 check_hpux_ioscan.sh
|
||||
file -m 755 check_hpux_guid_status.sh
|
||||
file -m 755 check_hpux_hpvm_vpar_status.sh
|
||||
file -m 644 check_hpux_hpvm_vpar_status.sh
|
||||
file -m 755 check_hpux_httpd_status.sh
|
||||
file -m 755 check_hpux_kernel_params.sh
|
||||
file -m 755 check_hpux_kernel_usage.sh
|
||||
@ -88,7 +86,6 @@ This is the OS/platform plugin package"
|
||||
file -m 755 check_hpux_sshd_status.sh
|
||||
file -m 755 check_hpux_syslog.sh
|
||||
file -m 755 check_hpux_syslogd_status.sh
|
||||
file -m 755 check_hpux_uptime.sh
|
||||
file -m 755 check_hpux_vg_minor_number.sh
|
||||
end
|
||||
|
||||
@ -105,9 +102,6 @@ This is the OS/platform plugin package"
|
||||
file_permissions -u 0222 -o root -g sys
|
||||
|
||||
directory ../../../etc/opt/hc/=/etc/opt/hc
|
||||
file -m 644 check_hpux_autofs.conf.dist
|
||||
file -m 644 check_hpux_cron_status.conf.dist
|
||||
file -m 644 check_hpux_defunct_processes.conf.dist
|
||||
file -m 644 check_hpux_drd_status.conf.dist
|
||||
file -m 644 check_hpux_file_age.conf.dist
|
||||
file -m 644 check_hpux_file_change.conf.dist
|
||||
@ -119,12 +113,11 @@ This is the OS/platform plugin package"
|
||||
file -m 644 check_hpux_kernel_params.conf.dist
|
||||
file -m 644 check_hpux_kernel_usage.conf.dist
|
||||
file -m 644 check_hpux_ntp_status.conf.dist
|
||||
file -m 644 check_hpux_ovpa_status.conf.dist
|
||||
file -m 644 check_hpux_patch_version.conf.dist
|
||||
file -m 755 check_hpux_ovpa_status.conf.dist
|
||||
file -m 755 check_hpux_patch_version.conf.dist
|
||||
file -m 644 check_hpux_root_crontab.conf.dist
|
||||
file -m 644 check_hpux_sfm_status.conf.dist
|
||||
file -m 644 check_hpux_syslog.conf.dist
|
||||
file -m 644 check_hpux_uptime.conf.dist
|
||||
end
|
||||
|
||||
fileset
|
||||
|
@ -1,80 +0,0 @@
|
||||
# Depot definition:
|
||||
depot
|
||||
layout_version 1.0
|
||||
|
||||
# Vendor definition:
|
||||
vendor
|
||||
tag KUDOS
|
||||
title "KUDOS BVBA - http://www.kudos.be"
|
||||
end
|
||||
|
||||
# Bundle definitions:
|
||||
bundle
|
||||
tag HC-NOTIFY-SLACK
|
||||
title "The Health Checker (HC) for UNIX (core SLACK plugin)"
|
||||
os_name HP-UX
|
||||
revision %BUILD_DATE%
|
||||
architecture HP-UX_B.11_32/64
|
||||
machine_type ia64*
|
||||
vendor_tag KUDOS
|
||||
contents HC-NOTIFY-SLACK-MAIN,r=,a=,v=
|
||||
end
|
||||
|
||||
# Product definitions:
|
||||
product
|
||||
tag HC-NOTIFY-SLACK-MAIN
|
||||
revision %BUILD_DATE%
|
||||
category tools
|
||||
category_title Tools
|
||||
title "The Health Checker (HC) for UNIX (OS plugins)"
|
||||
description "The Health Checker is collection of scripts (plugins)
|
||||
designed to perform regular - but not intensive - health checks on
|
||||
UNIX/Linux systems. It provides plugins for AIX, HP-UX and Linux as
|
||||
well customer specific checks. Checks may include topics such file
|
||||
system mounts, process checks, file consistency etc.
|
||||
This is the core SLACK plugin package"
|
||||
architecture S700/S800_HP-UX_10/HP-UX_11/IA64_HP-UX_11
|
||||
machine_type 9000/7*|9000/8*|ia64*
|
||||
os_name HP-UX
|
||||
os_release ?.1?.*
|
||||
os_version [A-Z]
|
||||
directory /opt/hc
|
||||
is_locatable false
|
||||
is_patch false
|
||||
|
||||
postinstall scripts/hc_notify_slack.postinstall
|
||||
postremove scripts/hc_notify_slack.postremove
|
||||
|
||||
# Fileset definitions:
|
||||
fileset
|
||||
tag core_plugins
|
||||
title UX Health Checker (core SLACK plugins)
|
||||
is_kernel false
|
||||
is_locatable false
|
||||
is_reboot false
|
||||
is_sparse false
|
||||
is_patch false
|
||||
prerequisite HC-HPUX-MAIN
|
||||
|
||||
file_permissions -u 0222 -o root -g sys
|
||||
|
||||
directory ../../../opt/hc/lib/core=/opt/hc/lib/core
|
||||
file -m 755 notify_slack.sh
|
||||
end
|
||||
|
||||
fileset
|
||||
tag config
|
||||
title UX Health Checker (core SLACK plugins: configuration)
|
||||
is_kernel false
|
||||
is_locatable false
|
||||
is_reboot false
|
||||
is_sparse false
|
||||
is_patch false
|
||||
prerequisite HC-NOTIFY-SLACK-MAIN.core_plugins
|
||||
|
||||
file_permissions -u 0222 -o root -g sys
|
||||
|
||||
directory ../../../etc/opt/hc/core/providers=/etc/opt/hc/core/providers
|
||||
file -m 644 notify_slack.conf.dist
|
||||
end
|
||||
end
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) post-install script for HC-NOTIFY-SLACK SD package
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2022 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of ETC dir
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
print "INFO: starting post-install script ..."
|
||||
|
||||
# copy plugin config file from dist (if needed)
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_slack.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist ${HC_ETC_DIR}/core/providers/notify_slack.conf >/dev/null || \
|
||||
{
|
||||
print -u2 "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks || print -u2 "WARN: updating symlinks failed"
|
||||
else
|
||||
print -u2 "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
|
||||
print "INFO: finished post-install script"
|
||||
|
||||
exit 0
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) post-remove script for HC-NOTIFY-SLACK SD package
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2022 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
print "INFO: starting post-remove script ..."
|
||||
|
||||
# remove plugin configuration file (.dist only)
|
||||
if [[ -d ${HC_ETC_DIR}/core/providers ]]
|
||||
then
|
||||
rm -f ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist >/dev/null
|
||||
(( $? == 0 )) || echo "WARN: could not remove plugin config file in directory ${HC_ETC_DIR}/core/providers"
|
||||
fi
|
||||
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks || print -u2 "WARN: updating symlinks failed"
|
||||
else
|
||||
print -u2 "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
|
||||
print "INFO: finished post-remove script"
|
||||
|
||||
exit 0
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -19,13 +19,9 @@ package() {
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib/platform
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib/platform/linux
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_autofs.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_autofs.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_burp_backup.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_burp_backup.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_burp_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_burp_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_dovecot_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_dovecot_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_es_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_es_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_fail2ban_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_fail2ban_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_fetchmail_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_fetchmail_status.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_file_age.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_file_age.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_file_change.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_file_change.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_fs_mounts.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_fs_mounts.sh
|
||||
@ -45,15 +41,11 @@ package() {
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_hpssacli.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_hpssacli.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_process_limits.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_process_limits.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_root_crontab.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_root_crontab.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_uptime.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_uptime.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_vz_ct_counters.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_vz_ct_counters.sh
|
||||
install -D -m 755 opt/hc/lib/platform/linux/check_linux_vz_ct_status.sh ${pkgdir}/opt/hc/lib/platform/linux/check_linux_vz_ct_status.sh
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc
|
||||
install -D -m 644 etc/opt/hc/check_linux_autofs.conf.dist ${pkgdir}/etc/opt/hc/check_linux_autofs.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_burp_backup.conf.dist ${pkgdir}/etc/opt/hc/check_linux_burp_backup.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_es_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_es_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_fail2ban_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_fail2ban_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_fetchmail_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_fetchmail_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_file_age.conf.dist ${pkgdir}/etc/opt/hc/check_linux_file_age.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_file_change.conf.dist ${pkgdir}/etc/opt/hc/check_linux_file_change.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_fs_usage.conf.dist ${pkgdir}/etc/opt/hc/check_linux_fs_usage.conf.dist
|
||||
@ -61,12 +53,10 @@ package() {
|
||||
install -D -m 644 etc/opt/hc/check_linux_hpacucli.conf.dist ${pkgdir}/etc/opt/hc/check_linux_hpacucli.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_hplog.conf.dist ${pkgdir}/etc/opt/hc/check_linux_hplog.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_hpssacli.conf.dist ${pkgdir}/etc/opt/hc/check_linux_hpssacli.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_httpd_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_httpd_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_mysqld_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_mysqld_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_ntp_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_ntp_status.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_process_limits.conf.dist ${pkgdir}/etc/opt/hc/check_linux_process_limits.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_root_crontab.conf.dist ${pkgdir}/etc/opt/hc/check_linux_root_crontab.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_uptime.conf.dist ${pkgdir}/etc/opt/hc/check_linux_uptime.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_vz_ct_counters.conf.dist ${pkgdir}/etc/opt/hc/check_linux_vz_ct_counters.conf.dist
|
||||
install -D -m 644 etc/opt/hc/check_linux_vz_ct_status.conf.dist ${pkgdir}/etc/opt/hc/check_linux_vz_ct_status.conf.dist
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc/core
|
||||
|
@ -40,34 +40,6 @@ post_install() {
|
||||
else
|
||||
echo "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
# set SELinux contexts for logrotate
|
||||
SESTATUS_BIN=$(command -v sestatus 2>/dev/null)
|
||||
if [[ -n "${SESTATUS_BIN}" ]]
|
||||
then
|
||||
IS_ENFORCING=$(${SESTATUS_BIN} | grep -c "Current mode.*enforcing" 2>/dev/null)
|
||||
if (( IS_ENFORCING > 0 ))
|
||||
then
|
||||
SEMANAGE_BIN=$(command -v semanage 2>/dev/null)
|
||||
if [[ -n "${SEMANAGE_BIN}" ]]
|
||||
then
|
||||
${SEMANAGE_BIN} fcontext -a -t var_log_t "${HC_VAR_DIR}(/check_health\.sh\.log.*)?"
|
||||
echo "INFO: SELinux fcontexts configured for log rotation"
|
||||
if [[ -d ${HC_VAR_DIR} ]]
|
||||
then
|
||||
RESTORECON_BIN=$(command -v restorecon 2>/dev/null)
|
||||
if [[ -n "${RESTORECON_BIN}" ]]
|
||||
then
|
||||
${RESTORECON_BIN} -Frv ${HC_VAR_DIR}
|
||||
echo "INFO: SELinux fcontexts set on ${HC_VAR_DIR} for log rotation"
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'restorecon' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'semanage' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
||||
}
|
||||
|
||||
|
@ -1,33 +0,0 @@
|
||||
# -- post-install --
|
||||
post_install() {
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
echo "INFO: starting post-install script ..."
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
||||
}
|
||||
|
||||
# -- post-remove --
|
||||
post_remove() {
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
echo "INFO: starting post-uninstall script ..."
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-uninstall script"
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
# Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
|
||||
_pkgname="check_health"
|
||||
pkgname="hc-notify-slack"
|
||||
pkgver="%BUILD_DATE%"
|
||||
pkgrel=1
|
||||
pkgdesc="The KUDOS Health Checker (HC) for UNIX (Slack notify core plugin)"
|
||||
arch=("any")
|
||||
url="https://github.com/patvdv/${_pkgname}"
|
||||
license=('GPL3')
|
||||
makedepends=('git')
|
||||
depends=('hc-linux')
|
||||
source=("${_pkgname}::git+${url}.git#branch=master")
|
||||
install=".install"
|
||||
sha256sums=('SKIP')
|
||||
|
||||
package() {
|
||||
cd "${srcdir}/${_pkgname}"
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib
|
||||
install -d -m 755 ${pkgdir}/opt/hc/lib/core
|
||||
install -D -m 755 opt/hc/lib/core/notify_slack.sh ${pkgdir}/opt/hc/lib/core/notify_slack.sh
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc/core
|
||||
install -d -m 755 ${pkgdir}/etc/opt/hc/core/providers
|
||||
install -D -m 644 etc/opt/hc/core/providers/notify_slack.conf.dist ${pkgdir}/etc/opt/hc/core/providers/notify_slack.conf.dist
|
||||
}
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (CSV display core plugin).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (init display core plugin).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (JSON display core plugin).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (terse display core plugin).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (Zenoss display core plugin).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 300
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (platform plugins).
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 300
|
||||
Depends: ksh, aptitude
|
||||
Depends: ksh
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX.
|
||||
|
@ -16,10 +16,8 @@ chown -R root:root /opt/hc /etc/opt/hc 2>/dev/null
|
||||
chown root:root /etc/logrotate.d/check_health 2>/dev/null
|
||||
chmod 644 /etc/logrotate.d/check_health 2>/dev/null
|
||||
# copy configuration files
|
||||
if [[ -f ${HC_ETC_DIR}/core/check_health.conf.dist ]]
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/check_health.conf ]]
|
||||
then
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/check_health.conf ]]
|
||||
then
|
||||
# copy main configuration file
|
||||
cp -p ${HC_ETC_DIR}/core/check_health.conf.dist ${HC_ETC_DIR}/core/check_health.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
@ -27,14 +25,9 @@ then
|
||||
echo "ERROR: could not copy main config file in ${HC_ETC_DIR}/core"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "WARN: could not check_health config .dist file in ${HC_ETC_DIR}/core"
|
||||
fi
|
||||
if [[ -f ${HC_ETC_DIR}/check_host.conf.dist ]]
|
||||
if [[ ! -f ${HC_ETC_DIR}/check_host.conf ]]
|
||||
then
|
||||
if [[ ! -f ${HC_ETC_DIR}/check_host.conf ]]
|
||||
then
|
||||
# copy host check configuration file
|
||||
cp -p ${HC_ETC_DIR}/check_host.conf.dist ${HC_ETC_DIR}/check_host.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
@ -42,9 +35,6 @@ then
|
||||
echo "ERROR: could not copy host check config file in ${HC_ETC_DIR}"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "WARN: could not find check_host config .dist file in ${HC_ETC_DIR}"
|
||||
fi
|
||||
# refresh symbolic FPATH links for core includes & plugins
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
@ -53,32 +43,4 @@ then
|
||||
else
|
||||
echo "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
# set SELinux contexts for logrotate
|
||||
SESTATUS_BIN=$(command -v sestatus 2>/dev/null)
|
||||
if [[ -n "${SESTATUS_BIN}" ]]
|
||||
then
|
||||
IS_ENFORCING=$(${SESTATUS_BIN} | grep -c "Current mode.*enforcing" 2>/dev/null)
|
||||
if (( IS_ENFORCING > 0 ))
|
||||
then
|
||||
SEMANAGE_BIN=$(command -v semanage 2>/dev/null)
|
||||
if [[ -n "${SEMANAGE_BIN}" ]]
|
||||
then
|
||||
${SEMANAGE_BIN} fcontext -a -t var_log_t "${HC_VAR_DIR}(/check_health\.sh\.log.*)?"
|
||||
echo "INFO: SELinux fcontexts configured for log rotation"
|
||||
if [[ -d ${HC_VAR_DIR} ]]
|
||||
then
|
||||
RESTORECON_BIN=$(command -v restorecon 2>/dev/null)
|
||||
if [[ -n "${RESTORECON_BIN}" ]]
|
||||
then
|
||||
${RESTORECON_BIN} -Frv ${HC_VAR_DIR}
|
||||
echo "INFO: SELinux fcontexts set on ${HC_VAR_DIR} for log rotation"
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'restorecon' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'semanage' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
||||
|
@ -10,9 +10,8 @@ HC_VAR_DIR="/var/opt/hc"
|
||||
echo "INFO: starting post-uninstall script ..."
|
||||
if [[ -d ${HC_DIR} ]]
|
||||
then
|
||||
# remove all symlinks
|
||||
find ${HC_DIR} -type l -exec rm -f {} \; 2>/dev/null
|
||||
(( $? == 0 )) || echo "WARN: failed to remove symlinks in ${HC_DIR}"
|
||||
rm -rf ${HC_DIR} 2>/dev/null
|
||||
(( $? == 0 )) || echo "WARN: failed to remove ${HC_DIR}"
|
||||
fi
|
||||
if [[ -d ${HC_ETC_DIR} ]]
|
||||
then
|
||||
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (EIF notify core plugin).
|
||||
|
@ -10,19 +10,14 @@ echo "INFO: starting post-install script ..."
|
||||
# debian: reset ownerships
|
||||
chown -R root:root /opt/hc /etc/opt/hc 2>/dev/null
|
||||
# copy plugin configuration file
|
||||
if [[ -f ${HC_ETC_DIR}/core/providers/notify_eif.conf.dist ]]
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_eif.conf ]]
|
||||
then
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_eif.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_eif.conf.dist ${HC_ETC_DIR}/core/providers/notify_eif.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "WARN: could not find plugin config .dist file in ${HC_ETC_DIR}/core/providers"
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
|
@ -1,10 +0,0 @@
|
||||
Package: hc-notify-slack
|
||||
Version: %BUILD_DATE%
|
||||
Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (Slack notify core plugin).
|
@ -1,33 +0,0 @@
|
||||
#!/bin/bash
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
echo "INFO: starting post-install script ..."
|
||||
# debian: reset ownerships
|
||||
chown -R root:root /opt/hc /etc/opt/hc 2>/dev/null
|
||||
# copy plugin configuration file
|
||||
if [[ -f ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist ]]
|
||||
then
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_slack.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist ${HC_ETC_DIR}/core/providers/notify_slack.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "WARN: could not find plugin config .dist file in ${HC_ETC_DIR}/core/providers"
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
@ -1,32 +0,0 @@
|
||||
#!/bin/bash
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
# update or uninstall?
|
||||
if (( $1 == 0 ))
|
||||
then
|
||||
echo "INFO: starting post-uninstall script ..."
|
||||
# copy plugin configuration file (.dist only)
|
||||
if [[ -d ${HC_ETC_DIR}/core/providers ]]
|
||||
then
|
||||
rm -f ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist 2>/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not remove plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "INFO: starting post-uninstall script (RPM upgrade)"
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-uninstall script"
|
@ -4,7 +4,7 @@ Architecture: all
|
||||
Maintainer: Patrick Van der Veken <patrick@kudos.be>
|
||||
Essential: no
|
||||
Installed-Size: 50
|
||||
Pre-Depends: hc-linux
|
||||
Depends: hc-linux
|
||||
Section: tools
|
||||
Priority: extra
|
||||
Description: The KUDOS Health Checker (HC) for UNIX (SMS notify core plugin).
|
||||
|
@ -10,19 +10,14 @@ echo "INFO: starting post-install script ..."
|
||||
# debian: reset ownerships
|
||||
chown -R root:root /opt/hc /etc/opt/hc 2>/dev/null
|
||||
# copy plugin configuration file
|
||||
if [[ -f ${HC_ETC_DIR}/core/providers/notify_sms.conf.dist ]]
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_sms.conf ]]
|
||||
then
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_sms.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_sms.conf.dist ${HC_ETC_DIR}/core/providers/notify_sms.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "WARN: could not find plugin config .dist file in ${HC_ETC_DIR}/core/providers"
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
|
@ -27,13 +27,9 @@ rm -rf $RPM_BUILD_ROOT
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform/linux
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_autofs.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_autofs.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_burp_backup.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_burp_backup.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_burp_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_burp_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_dovecot_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_dovecot_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_es_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_es_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_fail2ban_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_fail2ban_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_fetchmail_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_fetchmail_status.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_file_age.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_file_age.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_file_change.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_file_change.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_fs_mounts.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_fs_mounts.sh
|
||||
@ -53,15 +49,11 @@ cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_hplog.sh $RPM_BUILD_ROOT/opt
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_hpssacli.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_hpssacli.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_process_limits.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_process_limits.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_root_crontab.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_root_crontab.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_uptime.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_uptime.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_vz_ct_counters.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_vz_ct_counters.sh
|
||||
cp ../SOURCES/opt/hc/lib/platform/linux/check_linux_vz_ct_status.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/linux/check_linux_vz_ct_status.sh
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_autofs.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_autofs.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_burp_backup.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_burp_backup.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_es_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_es_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_fail2ban_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_fail2ban_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_fetchmail_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_fetchmail_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_file_age.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_file_age.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_file_change.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_file_change.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_fs_usage.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_fs_usage.conf.dist
|
||||
@ -69,12 +61,10 @@ cp ../SOURCES/etc/opt/hc/check_linux_hpasmcli.conf.dist $RPM_BUILD_ROOT/etc/opt/
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_hpacucli.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_hpacucli.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_hplog.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_hplog.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_hpssacli.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_hpssacli.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_httpd_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_httpd_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_mysqld_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_mysqld_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_ntp_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_ntp_status.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_process_limits.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_process_limits.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_root_crontab.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_root_crontab.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_uptime.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_uptime.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_vz_ct_counters.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_vz_ct_counters.conf.dist
|
||||
cp ../SOURCES/etc/opt/hc/check_linux_vz_ct_status.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_linux_vz_ct_status.conf.dist
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc/core
|
||||
@ -117,13 +107,9 @@ echo "INFO: finished post-uninstall script"
|
||||
%dir /opt/hc/lib
|
||||
%dir /opt/hc/lib/platform
|
||||
%dir /opt/hc/lib/platform/linux
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_autofs.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_burp_backup.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_burp_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_dovecot_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_es_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_fail2ban_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_fetchmail_status.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_file_age.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_file_change.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_fs_mounts.sh
|
||||
@ -143,15 +129,11 @@ echo "INFO: finished post-uninstall script"
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_hpssacli.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_process_limits.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_root_crontab.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_uptime.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_vz_ct_counters.sh
|
||||
%attr(755, root, root) /opt/hc/lib/platform/linux/check_linux_vz_ct_status.sh
|
||||
%dir /etc/opt/hc
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_autofs.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_burp_backup.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_es_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_fail2ban_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_fetchmail_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_file_age.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_file_change.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_fs_usage.conf.dist
|
||||
@ -159,12 +141,10 @@ echo "INFO: finished post-uninstall script"
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_hpacucli.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_hplog.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_hpssacli.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_httpd_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_mysqld_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_ntp_status.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_process_limits.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_root_crontab.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_uptime.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_vz_ct_counters.conf.dist
|
||||
%attr(644, root, root) /etc/opt/hc/check_linux_vz_ct_status.conf.dist
|
||||
%dir /etc/opt/hc/core
|
||||
@ -173,17 +153,6 @@ echo "INFO: finished post-uninstall script"
|
||||
%attr(644, root, root) /etc/opt/hc/core/templates/mail_body.tpl-check_linux_root_crontab
|
||||
|
||||
%changelog
|
||||
* Sat Dec 26 2020 <patrick@kudos.be> - 0.3.5
|
||||
- Added check_linux_fetchmail_status plugin
|
||||
- Added check_linux_dovecot_status plugin
|
||||
* Mon Dec 21 2020 <patrick@kudos.be> - 0.3.4
|
||||
- Added check_linux_uptime plugin
|
||||
* Fri Oct 18 2020 <patrick@kudos.be> - 0.3.3
|
||||
- Added check_linux_fail2ban_status plugin
|
||||
* Fri Nov 1 2019 <patrick@kudos.be> - 0.3.2
|
||||
- Added configuration file for check_linux_httpd_status plugin
|
||||
* Sun Jul 14 2019 <patrick@kudos.be> - 0.3.1
|
||||
- Removed check_linux_autofs plugin
|
||||
* Sat Apr 20 2019 <patrick@kudos.be> - 0.3.0
|
||||
- Removed Serviceguard plugins
|
||||
* Tue Mar 26 2019 <patrick@kudos.be> - 0.2.0
|
||||
|
@ -88,34 +88,6 @@ then
|
||||
else
|
||||
echo "ERROR: could not locate or excute the HC main script (${HC_BIN})"
|
||||
fi
|
||||
# set SELinux contexts for logrotate
|
||||
SESTATUS_BIN=$(command -v sestatus 2>/dev/null)
|
||||
if [[ -n "${SESTATUS_BIN}" ]]
|
||||
then
|
||||
IS_ENFORCING=$(${SESTATUS_BIN} | grep -c "Current mode.*enforcing" 2>/dev/null)
|
||||
if (( IS_ENFORCING > 0 ))
|
||||
then
|
||||
SEMANAGE_BIN=$(command -v semanage 2>/dev/null)
|
||||
if [[ -n "${SEMANAGE_BIN}" ]]
|
||||
then
|
||||
${SEMANAGE_BIN} fcontext -a -t var_log_t "${HC_VAR_DIR}(/check_health\.sh\.log.*)?"
|
||||
echo "INFO: SELinux fcontexts configured for log rotation"
|
||||
if [[ -d ${HC_VAR_DIR} ]]
|
||||
then
|
||||
RESTORECON_BIN=$(command -v restorecon 2>/dev/null)
|
||||
if [[ -n "${RESTORECON_BIN}" ]]
|
||||
then
|
||||
${RESTORECON_BIN} -Frv ${HC_VAR_DIR}
|
||||
echo "INFO: SELinux fcontexts set on ${HC_VAR_DIR} for log rotation"
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'restorecon' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "WARN: SELinux is set to 'enforcing' but could not found 'semanage' to set fcontexts for log rotation"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
||||
|
||||
%postun
|
||||
|
@ -1,107 +0,0 @@
|
||||
%define build_timestamp %(date +"%Y%m%d")
|
||||
|
||||
Name: hc-notify-slack
|
||||
Version: %{build_timestamp}
|
||||
Release: 1
|
||||
|
||||
Summary: The KUDOS Health Checker (HC) for UNIX (Slack notify core plugin)
|
||||
Group: Tools/Monitoring
|
||||
|
||||
License: GNU General Public License either version 2 of the License, or (at your option) any later version
|
||||
URL: http://www.kudos.be
|
||||
|
||||
Requires: ksh,hc-linux
|
||||
BuildArch: noarch
|
||||
BuildRoot: %{_topdir}/%{name}-%{version}-root
|
||||
|
||||
%description
|
||||
The Health Checker is collection of scripts (plugins) designed to perform regular - but not intensive - health checks on UNIX/Linux systems. It provides plugins for AIX, HP-UX and Linux as well customer specific checks. Checks may include topics such file system mounts, process checks, file consistency etc.
|
||||
This package contains core plugins (notify).
|
||||
|
||||
%prep
|
||||
|
||||
%build
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib
|
||||
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/core
|
||||
cp ../SOURCES/opt/hc/lib/core/notify_slack.sh $RPM_BUILD_ROOT/opt/hc/lib/core/notify_slack.sh
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc/core
|
||||
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc/core/providers
|
||||
cp ../SOURCES/etc/opt/hc/core/providers/notify_slack.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/core/providers/notify_slack.conf.dist
|
||||
|
||||
%post
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
echo "INFO: starting post-install script ..."
|
||||
# copy plugin configuration file
|
||||
if [[ ! -f ${HC_ETC_DIR}/core/providers/notify_slack.conf ]]
|
||||
then
|
||||
cp -p ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist ${HC_ETC_DIR}/core/providers/notify_slack.conf >/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not copy plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-install script"
|
||||
|
||||
%postun
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# location of the HC configuration files
|
||||
HC_ETC_DIR="/etc/opt/hc"
|
||||
# location of check_health.sh
|
||||
HC_BIN="/opt/hc/bin/check_health.sh"
|
||||
PATH="$PATH:/usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
# update or uninstall?
|
||||
if (( $1 == 0 ))
|
||||
then
|
||||
echo "INFO: starting post-uninstall script ..."
|
||||
# copy plugin configuration file (.dist only)
|
||||
if [[ -d ${HC_ETC_DIR}/core/providers ]]
|
||||
then
|
||||
rm -f ${HC_ETC_DIR}/core/providers/notify_slack.conf.dist 2>/dev/null
|
||||
(( $? == 0 )) || \
|
||||
{
|
||||
echo "ERROR: could not remove plugin config file in ${HC_ETC_DIR}/core/providers"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "INFO: starting post-uninstall script (RPM upgrade)"
|
||||
fi
|
||||
# refresh symbolic FPATH links
|
||||
if [[ -x ${HC_BIN} ]]
|
||||
then
|
||||
${HC_BIN} --fix-symlinks
|
||||
(( $? == 0 )) || echo "WARN: updating symlinks failed"
|
||||
fi
|
||||
echo "INFO: finished post-uninstall script"
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,755)
|
||||
%dir /opt/hc/lib
|
||||
%dir /opt/hc/lib/core
|
||||
%attr(755, root, root) /opt/hc/lib/core/notify_slack.sh
|
||||
%dir /etc/opt/hc
|
||||
%dir /etc/opt/hc/core
|
||||
%dir /etc/opt/hc/core/providers
|
||||
%attr(644, root, root) /etc/opt/hc/core/providers/notify_slack.conf.dist
|
||||
|
||||
%changelog
|
||||
* Sun Oct 14 2022 <patrick@kudos.be> - 0.0.1
|
||||
- Initial build
|
@ -47,7 +47,6 @@ rpmbuild -bb ${BUILD_DIR}/SPECS/hc-linux-platform.spec
|
||||
# build core plugins
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-notify-eif.spec
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-notify-sms.spec
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-notify-slack.spec
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-display-init.spec
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-display-json.spec
|
||||
rpmbuild -bb ${BUILD_DIR}/SPECS/hc-display-csv.spec
|
||||
|
@ -13,11 +13,9 @@ log_healthy="no"
|
||||
|
||||
# full path to the location of mksysb images, the tool expects sub-directories
|
||||
# per host underneath this location
|
||||
# [default: /export/images]
|
||||
backup_path=/export/images
|
||||
|
||||
# name of the file containing the mksysb output
|
||||
# [default: mksysb.log]
|
||||
mksysb_log=mksysb.log
|
||||
|
||||
# maximum days before backup gets flagged as 'too old'
|
||||
|
@ -1,41 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_aix_uptime.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_aix_uptime HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# trigger event when current uptime is less than previously record uptime?
|
||||
# [default: yes]
|
||||
check_reboot="yes"
|
||||
|
||||
# time to wait before the current uptime is considered for the reboot check.
|
||||
# Example: a value of 60m means the check for a previous reboot will be at earliest
|
||||
# performed 1 hour after the last reboot of the host.
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 60m]
|
||||
reboot_time="60m"
|
||||
|
||||
# trigger event when current uptime goes over the maximum threshold?
|
||||
# [default: no]
|
||||
check_old_age="no"
|
||||
|
||||
# time to expire before the current uptime is considered "old age".
|
||||
# Example: a value of 365d means a must-reboot will be flagged at earliest after
|
||||
# 1 year of uptime of the host
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 365d]
|
||||
old_age_time="365d"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -11,10 +11,6 @@
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# location of the crsctl tool
|
||||
# [default: null (autodiscovery)]
|
||||
crsctl_bin=""
|
||||
|
||||
# stanza(s) of resource definitions (case sensitive).
|
||||
# Example:
|
||||
#[myresource]
|
||||
|
@ -11,10 +11,6 @@
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# location of the crsctl tool
|
||||
# [default: null (autodiscovery)]
|
||||
crsctl_bin=""
|
||||
|
||||
# list of required resource statuses
|
||||
# Format:
|
||||
# crs:<resource_name>:<*|node>=<ONLINE|OFFLINE>,<*|node>=<ONLINE|OFFLINE>,...
|
||||
|
@ -1,30 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_alerts.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_alerts HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify the alert severities to report (comma-separated)
|
||||
# Possible values: critical/warning/info/clear
|
||||
# [default: critical]
|
||||
alert_severities="critical"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,29 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_celldisks.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_celldisks HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify which cell disks to exclude from the check (comma-separated)
|
||||
# [default: null]
|
||||
excluded_disks=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,37 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_flash.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_flash HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify which flash devices to exclude from the check (comma-separated)
|
||||
# [default: null]
|
||||
excluded_devices=""
|
||||
|
||||
# specify whether to check the flash cache status (yes|no)
|
||||
# [default: yes]
|
||||
check_flashcache="yes"
|
||||
|
||||
# specify whether to check the flash log status (yes|no)
|
||||
# [default: yes]
|
||||
check_flashlog="yes"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,33 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_griddisks.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_griddisks HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify which grid disks to exclude from the check (comma-separated)
|
||||
# [default: null]
|
||||
excluded_disks=""
|
||||
|
||||
# specify whether to check the errorCount of griddisks (yes|no)
|
||||
# [default: yes]
|
||||
check_errorcount="yes"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,29 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_luns.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_luns HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify which luns to exclude from the check (comma-separated)
|
||||
# [default: null]
|
||||
excluded_luns=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,53 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_megaraid.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_megaraid HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify whether to check the controller state (yes|no)
|
||||
# [default: yes]
|
||||
check_controller="yes"
|
||||
|
||||
# specify whether to check the BBU (battery) state (yes|no)
|
||||
# [default: yes]
|
||||
check_bbu="yes"
|
||||
|
||||
# specify whether to check the BBU (supercap) state (yes|no)
|
||||
# [default: yes]
|
||||
check_supercap="yes"
|
||||
|
||||
# specify whether to check the physical devices (yes|no)
|
||||
# [default: yes]
|
||||
check_physical="yes"
|
||||
|
||||
# specify whether to check the virtual devices (yes|no)
|
||||
# [default: yes]
|
||||
check_virtual="yes"
|
||||
|
||||
# individual checks that should be disabled (wildcards are not supported)
|
||||
# cell_exclude:<cell_server>:<controller|bbu|supercap|physical|virtual>
|
||||
# Examples:
|
||||
# disable controller check on mycell1
|
||||
# cell_exclude:mycell1:controller
|
||||
# disable bbu check on mycell2
|
||||
# cell_exclude:mycell2:bbu
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,29 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_physicaldisks.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_cell_physicaldisks HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the dcli session to the cell server
|
||||
# [default: root]
|
||||
dcli_user=""
|
||||
|
||||
# specify the cell servers to query
|
||||
# [default: null]
|
||||
cell_servers=""
|
||||
|
||||
# specify which physical disks to exclude from the check (comma-separated)
|
||||
# [default: null]
|
||||
excluded_disks=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,37 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_megaraid.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_megaraid HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify whether to check the controller state (yes|no)
|
||||
# [default: yes]
|
||||
check_controller="yes"
|
||||
|
||||
# specify whether to check the BBU (battery) state (yes|no)
|
||||
# [default: yes]
|
||||
check_bbu="yes"
|
||||
|
||||
# specify whether to check the BBU (supercap) state (yes|no)
|
||||
# [default: yes]
|
||||
check_supercap="yes"
|
||||
|
||||
# specify whether to check the physical devices (yes|no)
|
||||
# [default: yes]
|
||||
check_physical="yes"
|
||||
|
||||
# specify whether to check the virtual devices (yes|no)
|
||||
# [default: yes]
|
||||
check_virtual="yes"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,35 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_zfs_cluster.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_zfs_cluster HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify the user account for the SSH session to the ZFS appliance(s)
|
||||
# [default: root]
|
||||
ssh_user=""
|
||||
|
||||
# specify the private key file for the SSH session to the ZFS appliance(s)
|
||||
# [default: ~root/.ssh/id_rsa]
|
||||
ssh_key_file=""
|
||||
|
||||
# specify additional options for the SSH session to the ZFS appliance(s)
|
||||
# [default: null]
|
||||
ssh_opts=""
|
||||
|
||||
# specify the ZFS hostname(s) to check the cluster states on
|
||||
# Format:
|
||||
# zfs:<host_name>
|
||||
# Examples:
|
||||
# zfs:myzfs1
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,7 +1,7 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_zfs_logs.conf
|
||||
# @(#) check_exadata_zfs_share_usage.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_exadata_zfs_logs HC plugin.
|
||||
# This is a configuration file for the check_exadata_zfs_share_usage HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
@ -25,7 +25,7 @@ ssh_opts=""
|
||||
|
||||
# specify the ZFS hostname(s), log name(s) & alert levels. Filters
|
||||
# should be comma-separated. Following logs are supported (filters in brackets)
|
||||
# alert (critical,major,minor)
|
||||
# alert (critical,major,minor
|
||||
# fltlog (critical,major,minor)
|
||||
# system (error)
|
||||
# scrk (failed)
|
||||
|
@ -33,26 +33,21 @@ max_replication_lag=300
|
||||
# will for this given share will be skipped (this allows for exclusion of shares)
|
||||
# In order to check share(s) for a given ZFS appliance at least one configuration
|
||||
# entry must be present: either a wildcard or custom entry.
|
||||
# Caveat: any share must finally resolve to one entry only.
|
||||
# Format:
|
||||
# zfs:<host_name>:<replication_name|*>:<true|false|*>:<success|failed|*>:[<max_replication_lag>]:[day1,day2,..|*>]:[<hour|<start_hour-end_hour>|*>]
|
||||
#
|
||||
# <day>: 3 letter day name (case insensitive)
|
||||
# <hour>: 24 hours notation (start and end hours are inclusive)
|
||||
#
|
||||
# zfs:<host_name>:<replication_name|*>:<true|false|*>:<success|failed|*>:[<max_replication_lag>]
|
||||
# Examples:
|
||||
# check rep_share1 on myzfs1 with a custom threshold of 300 seconds on every day of the week
|
||||
# zfs:myzfs1:rep_share1:*:*:600:*
|
||||
# check all shares of myzfs2 with a custom threshold of 1200 seconds on Sunday and Monday
|
||||
# zfs:myzfs2:*:*:*:1200:Sun,Mon
|
||||
# check all shares of myzfs3 with the general threshold but only on Friday between 7am-10m
|
||||
# zfs:myzfs3:*:*:*:Fri:07-10
|
||||
# check rep_share1 on myzfs1 with a custom threshold of 300 seconds
|
||||
# zfs:myzfs1:rep_share1:*:*:600
|
||||
# check all shares of myzfs2 with a custom threshold of 1200 seconds
|
||||
# zfs:myzfs2:*:*:*:1200
|
||||
# check all shares of myzfs3 with the general threshold
|
||||
# zfs:myzfs3:*:*:*:
|
||||
# disable all shares of myzfs4 from checking
|
||||
# zfs:myzfs4:*:*:*:0:*
|
||||
# zfs:myzfs4:*:*:*:0
|
||||
# disable check of rep_share7 on myzfs5
|
||||
# zfs:myzfs5:rep_share7:*:*:0:*
|
||||
# check that rep_share4 on myzfs6 is inactive (every day of the week)
|
||||
# zfs:myzfs6:rep_share4:false:*:*
|
||||
# zfs:myzfs5:rep_share7:*:*:0
|
||||
# check that rep_share4 on myzfs6 is inactive
|
||||
# zfs:myzfs6:rep_share4:false:*:
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
|
@ -33,7 +33,6 @@ max_space_usage=90
|
||||
# will for this given share will be skipped (this allows for exclusion of shares)
|
||||
# In order to check share(s) for a given ZFS appliance at least one configuration
|
||||
# entry must be present: either a wildcard or custom entry.
|
||||
# Caveat: any share must finally resolve to one entry only.
|
||||
# Format:
|
||||
# zfs:<host_name>:<project_name|*>:<share_name|*>:[<usage_threshold_%>]
|
||||
# Examples:
|
||||
|
@ -1,31 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_hpux_autofs.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_hpux_autofs HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify whether to fix autofs (if needed)
|
||||
# [default: no]
|
||||
fix_autofs="no"
|
||||
|
||||
# number of times to attempt to restart the autofs daemon
|
||||
# (requires fix_autofs=yes)
|
||||
# [default: 3]
|
||||
retry_start=3
|
||||
|
||||
# number of seconds to sleep between daemon restart attempts
|
||||
# (requires fix_autofs=yes)
|
||||
# [default: 5]
|
||||
sleep_time=5
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,25 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_hpux_cron_status.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_hpux_cron_status HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify the wait time (in seconds) between registering and checking a cron task
|
||||
# [default: 10]
|
||||
wait_time=10
|
||||
|
||||
# specify the path the cron log file
|
||||
# [default: /var/adm/cron/log]
|
||||
cron_log="/var/adm/cron/log"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,27 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_hpux_defunct_processes.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_hpux_defunct_processes HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="yes"
|
||||
|
||||
# specify above which threshold of defunct processes to generate a HC fail event
|
||||
# If group_by_ppid is set to 'yes' then the value of 'process_threshold' will
|
||||
# apply for each PPID, otherwise it will apply to all defunct processes together
|
||||
# [default: 10]
|
||||
process_threshold=10
|
||||
|
||||
# specify whether to group by parent ID or not.
|
||||
# [default: yes]
|
||||
group_by_ppid="yes"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -32,7 +32,7 @@ max_space_usage=90
|
||||
# defining a threshold of 0 (zero), then the check will for this give filesystem
|
||||
# will be skipped (this allows for exclusion of filesystems)
|
||||
# Format:
|
||||
# fs:<mount_point>:[<max_inodes_usage_%>]:[<max_space_usage_%]
|
||||
# fs:<mount_point>:[<max_inode_usage_%>]:[<max_space_usage_%]
|
||||
# Examples:
|
||||
# check myfs1 with a custom inodes + space usage threshold
|
||||
#fs:myfs1:70:95
|
||||
|
@ -11,7 +11,7 @@
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify properties as displayed by the 'hpvmstatus -M' command
|
||||
# specify properties as displayed by the 'vzlist -a -H -o ctid,status,onboot' command
|
||||
# Format:
|
||||
# [vpar:<parid>:<runtime_status>:<boot_status>]
|
||||
# runtime_status should be 'on' or 'off'
|
||||
|
@ -1,41 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_hpux_uptime.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_hpux_uptime HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# trigger event when current uptime is less than previously record uptime?
|
||||
# [default: yes]
|
||||
check_reboot="yes"
|
||||
|
||||
# time to wait before the current uptime is considered for the reboot check.
|
||||
# Example: a value of 60m means the check for a previous reboot will be at earliest
|
||||
# performed 1 hour after the last reboot of the host.
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 60m]
|
||||
reboot_time="60m"
|
||||
|
||||
# trigger event when current uptime goes over the maximum threshold?
|
||||
# [default: no]
|
||||
check_old_age="no"
|
||||
|
||||
# time to expire before the current uptime is considered "old age".
|
||||
# Example: a value of 365d means a must-reboot will be flagged at earliest after
|
||||
# 1 year of uptime of the host
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 365d]
|
||||
old_age_time="365d"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,31 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_linux_autofs.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_linux_autofs HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify whether to fix autofs (if needed)
|
||||
# [default: no]
|
||||
fix_autofs="no"
|
||||
|
||||
# number of times to attempt to restart the autofs daemon
|
||||
# (requires fix_autofs=yes)
|
||||
# [default: 3]
|
||||
retry_start=3
|
||||
|
||||
# number of seconds to sleep between daemon restart attempts
|
||||
# (requires fix_autofs=yes)
|
||||
# [default: 5]
|
||||
sleep_time=5
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,27 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_linux_fail2ban_status.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_linux_fail2ban_status HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify which type of process check to use
|
||||
# Format: <auto|pgrep|sysv|systemd>. NULL value equals to "auto"
|
||||
# [default: auto]
|
||||
check_type="auto"
|
||||
|
||||
# specify the list jails to status check (comma-separated)
|
||||
# Format: jail1,jail2,jail3,...
|
||||
# [default: none]
|
||||
check_jails=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -1,28 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_linux_fetchmail_status.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_linux_fetchmail_status HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify the regex of error codes that will be used to search the log file_path
|
||||
# [default: error|authfail|lockbusy|ioerr]
|
||||
error_regex="error|authfail|lockbusy|ioerr"
|
||||
|
||||
# specify fetchmail details
|
||||
# Format:
|
||||
# fetchmail:<account>:<rc_file>:<check_log=Yes|No>
|
||||
# Examples:
|
||||
#fetchmail:vmail:/srv/vmail/janedoe/.fetchmailrc:Yes
|
||||
#fetchmail:vmail:/srv/vmail/johndoe/.fetchmailrc:No
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -32,7 +32,7 @@ max_space_usage=90
|
||||
# defining a threshold of 0 (zero), then the check will for this give filesystem
|
||||
# will be skipped (this allows for exclusion of filesystems)
|
||||
# Format:
|
||||
# fs:<mount_point>:[<max_inodes_usage_%>]:[<max_space_usage_%]
|
||||
# fs:<mount_point>:[<max_inode_usage_%>]:[<max_space_usage_%]
|
||||
# Examples:
|
||||
# check myfs1 with a custom inodes + space usage threshold
|
||||
#fs:myfs1:70:95
|
||||
|
@ -12,23 +12,18 @@
|
||||
log_healthy="no"
|
||||
|
||||
# location of 'hpacucli' tool
|
||||
# [default: /usr/sbin/hpacucli]
|
||||
hpacucli_bin="/usr/sbin/hpacucli"
|
||||
|
||||
# controller all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller all show status (0=off, 1=on) [default: 1]
|
||||
do_acu_controller=1
|
||||
|
||||
# controller slot=X enclosure all show (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X enclosure all show (0=off, 1=on) [default: 1]
|
||||
do_acu_enclosure=1
|
||||
|
||||
# controller slot=X physicaldrive all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X physicaldrive all show status (0=off, 1=on) [default: 1]
|
||||
do_acu_physical=1
|
||||
|
||||
# controller slot=X logicaldrive all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X logicaldrive all show status (0=off, 1=on) [default: 1]
|
||||
do_acu_logical=1
|
||||
|
||||
|
||||
|
@ -12,27 +12,21 @@
|
||||
log_healthy="no"
|
||||
|
||||
# location of 'hpasmcli' tool
|
||||
# [default: /sbin/hpasmcli]
|
||||
hpasmcli_bin="/sbin/hpasmcli"
|
||||
|
||||
# SHOW FANS (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# SHOW FANS (0=off, 1=on) [default: 1]
|
||||
do_asm_fans=1
|
||||
|
||||
# SHOW DIMM (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# SHOW DIMM (0=off, 1=on) [default: 1]
|
||||
do_asm_dimm=1
|
||||
|
||||
# SHOW POWERSUPPLY (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# SHOW POWERSUPPLY (0=off, 1=on) [default: 1]
|
||||
do_asm_powersupply=1
|
||||
|
||||
# SHOW SERVER (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# SHOW SERVER (0=off, 1=on) [default: 1]
|
||||
do_asm_server=1
|
||||
|
||||
# SHOW TEMPERATURE (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# SHOW TEMPERATURE (0=off, 1=on) [default: 1]
|
||||
do_asm_temperature=1
|
||||
|
||||
|
||||
|
@ -12,7 +12,6 @@
|
||||
log_healthy="no"
|
||||
|
||||
# location of 'hpacucli' tool
|
||||
# [default: /sbin/hplog]
|
||||
hplog_bin="/sbin/hplog"
|
||||
|
||||
# comma-separated list of severities to search for (case insensitive)
|
||||
|
@ -12,23 +12,18 @@
|
||||
log_healthy="no"
|
||||
|
||||
# location of 'hpssacli' tool
|
||||
# [default: /usr/sbin/hpssacli]
|
||||
hpssacli_bin="/usr/sbin/hpssacli"
|
||||
|
||||
# controller all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller all show status (0=off, 1=on) [default: 1]
|
||||
do_ssa_controller=1
|
||||
|
||||
# controller slot=X enclosure all show (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X enclosure all show (0=off, 1=on) [default: 1]
|
||||
do_ssa_enclosure=1
|
||||
|
||||
# controller slot=X physicaldrive all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X physicaldrive all show status (0=off, 1=on) [default: 1]
|
||||
do_ssa_physical=1
|
||||
|
||||
# controller slot=X logicaldrive all show status (0=off, 1=on)
|
||||
# [default: 1]
|
||||
# controller slot=X logicaldrive all show status (0=off, 1=on) [default: 1]
|
||||
do_ssa_logical=1
|
||||
|
||||
#******************************************************************************
|
||||
|
@ -1,34 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_linux_httpd_status.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_linux_httpd_status HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify which type of process check to use
|
||||
# Format: <auto|pgrep|sysv|systemd>. NULL value equals to "auto"
|
||||
# [default: auto]
|
||||
# [release >20191101]
|
||||
check_type="auto"
|
||||
|
||||
# specify name of the httpd binary
|
||||
# Format: httpd|apache|apache2
|
||||
# [default: httpd]
|
||||
# [release >20201010]
|
||||
httpd_bin="httpd"
|
||||
|
||||
# specify custom path to the httpd binary
|
||||
# [default: null]
|
||||
# [release >20201010]
|
||||
httpd_path=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -19,11 +19,6 @@ force_chrony="no"
|
||||
# [default: no]
|
||||
force_ntp="no"
|
||||
|
||||
# whether to force the use of systemd-timesyncd?
|
||||
# [default: no]
|
||||
# [release >20220129]
|
||||
force_systemd="no"
|
||||
|
||||
# maximum allowed offset (in milliseconds (positive integers only)
|
||||
# [default: 500]
|
||||
max_offset=500
|
||||
|
@ -1,41 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_linux_uptime.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_linux_uptime HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# specify whether to also log passed health checks
|
||||
# (warning: this may rapidly grow the HC log)
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# trigger event when current uptime is less than previously record uptime?
|
||||
# [default: yes]
|
||||
check_reboot="yes"
|
||||
|
||||
# time to wait before the current uptime is considered for the reboot check.
|
||||
# Example: a value of 60m means the check for a previous reboot will be at earliest
|
||||
# performed 1 hour after the last reboot of the host.
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 60m]
|
||||
reboot_time="60m"
|
||||
|
||||
# trigger event when current uptime goes over the maximum threshold?
|
||||
# [default: no]
|
||||
check_old_age="no"
|
||||
|
||||
# time to expire before the current uptime is considered "old age".
|
||||
# Example: a value of 365d means a must-reboot will be flagged at earliest after
|
||||
# 1 year of uptime of the host
|
||||
# Format: <value>m|h|d where m=minutes, h=hours, d=days (lowercase suffix).
|
||||
# if no suffix is specified, then minutes are assumed.
|
||||
# [default: 365d]
|
||||
old_age_time="365d"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -11,12 +11,6 @@
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify UBC names to exclude (comma-separated list) (release >20200411)
|
||||
# [default: none]
|
||||
# [release >2020410]
|
||||
# Examples: numfile,numpty
|
||||
exclude_counters=""
|
||||
|
||||
# specify containers for which to check the UBC
|
||||
# Format:
|
||||
# [ct:<ct_id>]
|
||||
|
@ -11,18 +11,13 @@
|
||||
# [default: no]
|
||||
log_healthy="no"
|
||||
|
||||
# specify properties as displayed by the following commands:
|
||||
# OpenVZ 6.x: 'vzlist -a -H -o ctid,status,onboot'
|
||||
# OpenVZ 7.x: 'prlctl list --info -a' (release >20200411)
|
||||
# specify properties as displayed by the 'vzlist -a -H -o ctid,status,onboot' command
|
||||
# Format:
|
||||
# [ct:<ct_id>:<runtime_status>:<boot_status>]
|
||||
# runtime_status should be 'running' or 'stopped'
|
||||
# boot_status should be 'yes' or 'no' (OpenVZ 6.x)
|
||||
# boot_status should be 'on' of 'off' (OpenVZ 7.x)
|
||||
# Examples (OpenVZ 6.x):
|
||||
# boot_status should be 'yes' or 'no'
|
||||
# Examples:
|
||||
#ct:100:running:yes
|
||||
# Examples (OpenVZ 7.x) (release >20200411):
|
||||
#ct:621240e6-fa79-406a-a3f9-26de47b5d789:running:on
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
|
@ -1,7 +1,7 @@
|
||||
#******************************************************************************
|
||||
# @(#) check_serviceguard_package_status.conf
|
||||
# @(#) check_linux_sg_package_status.conf
|
||||
#******************************************************************************
|
||||
# This is a configuration file for the check_serviceguard_package_status HC plugin.
|
||||
# This is a configuration file for the check_linux_sg_package_status HC plugin.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
@ -10,31 +10,6 @@
|
||||
# maximum timeout for HC calls/plugins (seconds) [default: 60]
|
||||
HC_TIME_OUT=60
|
||||
|
||||
# enable/disable logging of passed health checks (warning: this may rapidly grow the HC log)
|
||||
# see also: '--log-healthy' command-line parameter
|
||||
# 'log_healthy' setting in HC plugin configuration file(s)
|
||||
# [values: Yes|No]
|
||||
HC_LOG_HEALTHY="No"
|
||||
|
||||
# enable/disable healing logic if available in HC plugin(s)
|
||||
# see also: '--no-fix' command-line parameter
|
||||
# [values: Yes|No]
|
||||
HC_WILL_FIX="Yes"
|
||||
|
||||
# cache "last" reporting entries. Set to 'Yes' to speed up reporting of the last
|
||||
# registered HC events
|
||||
# [values: Yes|No]
|
||||
HC_REPORT_CACHE_LAST="No"
|
||||
|
||||
# cache "today" reporting entries. Set to 'Yes' to speed up reporting of today's
|
||||
# registered HC events
|
||||
# [values: Yes|No]
|
||||
HC_REPORT_CACHE_TODAY="No"
|
||||
|
||||
# Show log entry count(s) after archiving. Disabling this will speed up archiving.
|
||||
# [values: Yes|No]
|
||||
HC_COUNT_ARCHIVES="Yes"
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
|
@ -1,15 +0,0 @@
|
||||
#******************************************************************************
|
||||
# @(#) notify_slack.conf
|
||||
#******************************************************************************
|
||||
# This is the the configuration file for the notify_slack.sh provider.
|
||||
# All lines starting with a '#' are comment lines.
|
||||
# [default: indicates hardcoded script values if no value is defined here]
|
||||
#******************************************************************************
|
||||
|
||||
# URL of the webhook of the Slack app
|
||||
SLACK_WEBHOOK=""
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# End of FILE
|
||||
#******************************************************************************
|
@ -23,8 +23,8 @@
|
||||
# REQUIRES: ksh88/93 (mksh/pdksh will probably work too but YMMV)
|
||||
# build_fpath(), check_config(), check_core(), check_lock_dir(),
|
||||
# check_params(), check_platform(), check_user(), check_shell(),
|
||||
# display_usage(), do_cleanup, fix_symlinks(), get_disable_comment(),
|
||||
# read_config() + include functions
|
||||
# display_usage(), do_cleanup, fix_symlinks(), read_config()
|
||||
# + include functions
|
||||
# For other pre-requisites see the documentation in display_usage()
|
||||
# REQUIRES (OPTIONAL): display_*(), notify_*(), report_*()
|
||||
# EXISTS: 0=no errors encountered, >0=some errors encountered
|
||||
@ -38,7 +38,7 @@
|
||||
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
# define the version (YYYY-MM-DD)
|
||||
typeset -r SCRIPT_VERSION="2021-03-31"
|
||||
typeset -r SCRIPT_VERSION="2019-04-03"
|
||||
# location of parent directory containing KSH functions/HC plugins
|
||||
typeset -r FPATH_PARENT="/opt/hc/lib"
|
||||
# location of custom HC configuration files
|
||||
@ -86,15 +86,14 @@ typeset EXIT_CODE=0
|
||||
typeset FDIR=""
|
||||
typeset FFILE=""
|
||||
typeset FPATH=""
|
||||
typeset HC_ARCHIVE=""
|
||||
typeset HC_CHECK=""
|
||||
typeset HC_COMMENT=""
|
||||
typeset HC_DISABLE=""
|
||||
typeset HC_ENABLE=""
|
||||
typeset HC_RUN=""
|
||||
typeset HC_FAIL_ID=""
|
||||
# shellcheck disable=SC2034
|
||||
typeset HC_FILE_LINE=""
|
||||
typeset HC_LOG_HEALTHY=""
|
||||
typeset HC_NOW=""
|
||||
typeset HC_TIME_OUT=60
|
||||
typeset HC_MIN_TIME_OUT=30
|
||||
@ -102,20 +101,10 @@ typeset HC_MIN_TIME_OUT=30
|
||||
typeset HC_MSG_VAR=""
|
||||
typeset HC_STDOUT_LOG=""
|
||||
typeset HC_STDERR_LOG=""
|
||||
set -A HC_STDOUT_LOG_ARRAY
|
||||
set -A HC_STDERR_LOG_ARRAY
|
||||
# shellcheck disable=SC2034
|
||||
typeset HC_COUNT_ARCHVES=""
|
||||
typeset HC_WILL_FIX=""
|
||||
# shellcheck disable=SC2034
|
||||
typeset HC_REPORT_CACHE_LAST=""
|
||||
# shellcheck disable=SC2034
|
||||
typeset HC_REPORT_CACHE_TODAY=""
|
||||
# shellcheck disable=SC2034
|
||||
typeset LINUX_DISTRO=""
|
||||
# shellcheck disable=SC2034
|
||||
typeset LINUX_RELEASE=""
|
||||
typeset PREVIOUS_HC_TIME_OUT=""
|
||||
typeset ARCHIVE_RC=0
|
||||
typeset DISABLE_RC=0
|
||||
typeset ENABLE_RC=0
|
||||
@ -131,7 +120,6 @@ typeset DEBUG_OPTS=""
|
||||
# command-line parameters
|
||||
typeset ARG_ACTION=0 # HC action flag
|
||||
typeset ARG_CHECK_HOST=0 # host check is off by default
|
||||
typeset ARG_COMMENT=""
|
||||
typeset ARG_CONFIG_FILE="" # custom configuration file for a HC, none by default
|
||||
typeset ARG_DEBUG=0 # debug is off by default
|
||||
typeset ARG_DEBUG_LEVEL=0 # debug() only by default
|
||||
@ -144,10 +132,8 @@ typeset ARG_HC_ARGS="" # no extra arguments to HC plug-in by default
|
||||
typeset ARG_HISTORY=0 # include historical events is off by default
|
||||
typeset ARG_LAST=0 # report last events
|
||||
typeset ARG_LIST="" # list all by default
|
||||
typeset ARG_LIST_DETAILS=0 # list with full details is off by default
|
||||
typeset ARG_LOCK=1 # lock for concurrent script executions is on by default
|
||||
typeset ARG_LOG=1 # logging is on by default
|
||||
typeset ARG_NO_FIX=0 # fix/healing is not disabled by default
|
||||
typeset ARG_LOG_HEALTHY=0 # logging of healthy health checks is off by default
|
||||
typeset ARG_MONITOR=1 # killing long running HC processes is on by default
|
||||
typeset ARG_NEWER=""
|
||||
@ -249,55 +235,32 @@ return 0
|
||||
function check_core
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset INCLUDE_FILE=""
|
||||
|
||||
# check include_core (MUST be present)
|
||||
if [[ ! -r ${FPATH_PARENT}/core/include_core.sh || ! -h ${FPATH_PARENT}/core/include_core ]]
|
||||
# check and include core helper libs
|
||||
if [[ -r ${FPATH_PARENT}/core/include_core.sh && -h ${FPATH_PARENT}/core/include_core ]]
|
||||
then
|
||||
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core(.sh) is not present (tip: run --fix-symlinks)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# PDKSH hack (IS_PDKSH is not yet set here)
|
||||
case "${KSH_VERSION}" in
|
||||
*MIRBSD*|*PD*|*LEGACY*)
|
||||
# shellcheck source=/dev/null
|
||||
. ${FPATH_PARENT}/core/include_core.sh
|
||||
if [[ -r ${FPATH_PARENT}/core/include_data.sh && -h ${FPATH_PARENT}/core/include_data ]]
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && print -u2 "DEBUG: including ${FPATH_PARENT}/core/include_data.sh"
|
||||
else
|
||||
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core.sh is not present (tip: run --fix-symlinks)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -r ${FPATH_PARENT}/core/include_data.sh && -h ${FPATH_PARENT}/core/include_data ]]
|
||||
then
|
||||
# shellcheck source=/dev/null
|
||||
. ${FPATH_PARENT}/core/include_data.sh
|
||||
else
|
||||
else
|
||||
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_data.sh is not present (tip: run --fix-symlinks)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -r ${FPATH_PARENT}/core/include_os.sh && -h ${FPATH_PARENT}/core/include_os ]]
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && print -u2 "DEBUG: including ${FPATH_PARENT}/core/include_os.sh"
|
||||
fi
|
||||
if [[ -r ${FPATH_PARENT}/core/include_os.sh && -h ${FPATH_PARENT}/core/include_os ]]
|
||||
then
|
||||
# shellcheck source=/dev/null
|
||||
. ${FPATH_PARENT}/core/include_os.sh
|
||||
else
|
||||
else
|
||||
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_os.sh is not present (tip: run --fix-symlinks)"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# include include_*
|
||||
find ${FPATH_PARENT}/core -name "include_*.sh" -type f -print 2>/dev/null | while read -r INCLUDE_FILE
|
||||
do
|
||||
if [[ -h ${INCLUDE_FILE%%.sh} ]]
|
||||
then
|
||||
# shellcheck source=/dev/null
|
||||
(( ARG_DEBUG > 0 )) && print -u2 "DEBUG: including ${INCLUDE_FILE}"
|
||||
# shellcheck source=/dev/null
|
||||
. "${INCLUDE_FILE}"
|
||||
else
|
||||
print -u2 "ERROR: library file ${INCLUDE_FILE} exists but has no symlink. Run --fix-symlinks"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
esac
|
||||
fi
|
||||
|
||||
# check for core directories
|
||||
[[ -d ${ARCHIVE_DIR} ]] || mkdir -p "${ARCHIVE_DIR}" >/dev/null 2>&1
|
||||
@ -340,19 +303,18 @@ function check_lock_dir
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
if (( ARG_LOCK > 0 ))
|
||||
then
|
||||
mkdir "${LOCK_DIR}" >/dev/null 2>/dev/null || {
|
||||
mkdir ${LOCK_DIR} >/dev/null || {
|
||||
print -u2 "ERROR: unable to acquire lock ${LOCK_DIR}"
|
||||
ARG_VERBOSE=0 warn "unable to acquire lock ${LOCK_DIR}"
|
||||
if [[ -f ${LOCK_DIR}/.pid ]]
|
||||
then
|
||||
# shellcheck disable=SC2086
|
||||
typeset LOCK_PID=$(<"${LOCK_DIR}/.pid")
|
||||
typeset LOCK_PID="$(<${LOCK_DIR}/.pid)"
|
||||
print -u2 "ERROR: active health checker running on PID: ${LOCK_PID}"
|
||||
ARG_VERBOSE=0 warn "active health checker running on PID: ${LOCK_PID}. Exiting!"
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
print $$ > "${LOCK_DIR}/.pid"
|
||||
print $$ >${LOCK_DIR}/.pid
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && print "DEBUG: locking has been disabled"
|
||||
fi
|
||||
@ -476,14 +438,11 @@ then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# --comment
|
||||
if [[ -n "${ARG_COMMENT}" ]]
|
||||
# --log-healthy
|
||||
if (( ARG_LOG_HEALTHY > 0 && ARG_ACTION != 4 ))
|
||||
then
|
||||
if (( ARG_ACTION != 2 )) && (( ARG_ACTION != 6 ))
|
||||
then
|
||||
print -u2 "ERROR: you can only use '--comment' in combination with '--disable' or '--disable-all'"
|
||||
print -u2 "ERROR: you can only use '--log-healthy' in combination with '--run'"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# check log location
|
||||
if (( ARG_LOG > 0 ))
|
||||
@ -560,9 +519,9 @@ case "${KSH_VERSION}" in
|
||||
if [[ -z "${ERRNO}" ]]
|
||||
then
|
||||
# shellcheck disable=SC2154
|
||||
(( ARG_DEBUG > 0 )) && debug "running ksh: ${.sh.version}"
|
||||
(( ARG_DEBUG > 0 )) && print "running ksh: ${.sh.version}"
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "running ksh: ksh88 or older"
|
||||
(( ARG_DEBUG > 0 )) && print "running ksh: ksh88 or older"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
@ -586,9 +545,9 @@ cat << EOT
|
||||
Execute/report simple health checks (HC) on UNIX hosts.
|
||||
|
||||
Syntax: ${SCRIPT_DIR}/${SCRIPT_NAME} [--help] | [--help-terse] | [--version] |
|
||||
[--list=<needle>] | [--list-details] | [--list-core] | [--list-include] | [--fix-symlinks] | [--show-stats] | (--archive-all | --disable-all [--comment=<text>] | --enable-all) | [--fix-logs [--with-history]] |
|
||||
(--check-host | ((--archive | --check | --enable | --disable [--comment=<text>] | --run [--timeout=<secs>] | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">]))
|
||||
[--display=<method>] ([--debug] [--debug-level=<level>]) [--log-healthy] [--no-fix] [--no-log] [--no-lock] [--no-monitor] [[--flip-rc] [--with-rc=<count|max|sum>]]]
|
||||
[--list=<needle>] | [--list-core] | [--list-include] | [--fix-symlinks] | [--show-stats] | (--archive-all | --disable-all | --enable-all) | [--fix-logs [--with-history]] |
|
||||
(--check-host | ((--archive | --check | --enable | --disable | --run [--timeout=<secs>] | --show) --hc=<list_of_checks> [--config-file=<configuration_file>] [hc-args="<arg1,arg2=val,arg3">]))
|
||||
[--display=<method>] ([--debug] [--debug-level=<level>]) [--log-healthy] [--no-monitor] [--no-log] [--no-lock] [[--flip-rc] [--with-rc=<count|max|sum>]]]
|
||||
[--notify=<method_list>] [--mail-to=<address_list>] [--sms-to=<sms_rcpt> --sms-provider=<name>]
|
||||
[--report=<method> [--with-history] ( ([--last] | [--today]) | [(--older|--newer)=<date>] | [--reverse] [--id=<fail_id> [--detail]] )]
|
||||
|
||||
@ -603,7 +562,6 @@ Parameters:
|
||||
--archive-all : move events for all HCs from the HC log file into archive log files
|
||||
--check : display HC state.
|
||||
--check-host : execute all configured HC(s) (see check_host.conf)
|
||||
--comment : add comment to requested action (--disable). WARNING: comments may not contain spaces!
|
||||
--config-file : custom configuration file for a HC (may only be specified when executing a single HC plugin)
|
||||
--debug : run script in debug mode
|
||||
--debug-level : level of debugging information to show (0,1,2)
|
||||
@ -622,23 +580,18 @@ Parameters:
|
||||
in double quotes (example: --hc_args="arg1,arg2=value,arg3").
|
||||
--id : value of a FAIL ID (must be specified as uninterrupted sequence of numbers)
|
||||
--last : show the last (failed) events for each HC and their combined STC value
|
||||
--list : show the available health checks in a terse manner. Use --list-details for a more extensive list.
|
||||
--list-details : show the available health checks with following details included:
|
||||
--list : show the available health checks. Use <needle> to search with wildcards. Following details are shown:
|
||||
- health check (plugin) name
|
||||
- state of the HC plugin (disabled/enabled)
|
||||
- version of the HC plugin
|
||||
- whether the HC plugin requires a configuration file in ${CONFIG_DIR}
|
||||
- whether the HC plugin is scheduled by cron
|
||||
- whether the plugin contains a facility for --log-healthy and/or whether it is enabled
|
||||
- whether the plugin contains fix/healing logic (see --no-fix)
|
||||
--list-core : show the available core plugins (mail,SMS,...)
|
||||
--list-include : show the available includes/libraries
|
||||
--log-healthy : log/show also passed health checks. By default this is off when the plugin support this feature.
|
||||
(overrides \$HC_LOG_HEALTHY and can itself be overridden by --no-log to disable all logging)
|
||||
(can be overridden by --no-log to disable all logging)
|
||||
--mail-to : list of e-mail address(es) to which an e-mail alert will be send to [requires mail core plugin]
|
||||
--newer : show the (failed) events for each HC that are newer than the given date
|
||||
--no-fix : do not apply fix/healing logic for failed health checks (if available)
|
||||
(overrides \$HC_WILL_FIX)
|
||||
--no-lock : disable locking to allow concurrent script executions
|
||||
--no-log : do not log any messages to the script log file or health check results.
|
||||
--no-monitor : do not stop the execution of a HC after \$HC_TIME_OUT seconds
|
||||
@ -652,7 +605,7 @@ Parameters:
|
||||
--sms-provider : name of a supported SMS provider (see \$SMS_PROVIDERS) [requires SMS core plugin]
|
||||
--sms-to : name of person or group to which a sms alert will be send to [requires SMS core plugin]
|
||||
--timeout : maximum runtime of a HC plugin in seconds (overrides \$HC_TIME_OUT)
|
||||
--today : show (failed) events of today (HC and their combined STC value)
|
||||
--today : show today's (failed) events (HC and their combined STC value)
|
||||
--version : show the timestamp of the script.
|
||||
--with-history : also include events that have been archived already (reporting)
|
||||
--with-rc : define RC handling (plugin) when --flip-rc is used
|
||||
@ -675,28 +628,16 @@ function do_cleanup
|
||||
log "performing cleanup ..."
|
||||
|
||||
# remove temporary files
|
||||
[[ -f "${HC_MSG_FILE}" ]] && rm -f "${HC_MSG_FILE}" >/dev/null 2>&1
|
||||
[[ -f "${HC_MSG_FILE}" ]] && rm -f ${HC_MSG_FILE} >/dev/null 2>&1
|
||||
|
||||
# remove left over plugin log files (pop from array stacks)
|
||||
FILE_COUNT=1
|
||||
while (( FILE_COUNT <= ${#HC_STDOUT_LOG_ARRAY[*]} ))
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "cleaning up plugin log file ${HC_STDOUT_LOG_ARRAY[FILE_COUNT]}"
|
||||
[[ -f "${HC_STDOUT_LOG_ARRAY[FILE_COUNT]}" ]] && rm -f "${HC_STDOUT_LOG_ARRAY[FILE_COUNT]}" >/dev/null 2>&1
|
||||
FILE_COUNT=$(( FILE_COUNT + 1 ))
|
||||
done
|
||||
FILE_COUNT=1
|
||||
while (( FILE_COUNT <= ${#HC_STDERR_LOG_ARRAY[*]} ))
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "cleaning up plugin log file ${HC_STDERR_LOG_ARRAY[FILE_COUNT]}"
|
||||
[[ -f "${HC_STDERR_LOG_ARRAY[FILE_COUNT]}" ]] && rm -f "${HC_STDERR_LOG_ARRAY[FILE_COUNT]}" >/dev/null 2>&1
|
||||
FILE_COUNT=$(( FILE_COUNT + 1 ))
|
||||
done
|
||||
# remove trailing log files
|
||||
[[ -f "${HC_STDOUT_LOG}" ]] && rm -f ${HC_STDOUT_LOG} >/dev/null 2>&1
|
||||
[[ -f "${HC_STDERR_LOG}" ]] && rm -f ${HC_STDERR_LOG} >/dev/null 2>&1
|
||||
|
||||
# remove lock directory
|
||||
if [[ -d ${LOCK_DIR} ]]
|
||||
then
|
||||
rm -rf "${LOCK_DIR}" >/dev/null 2>&1
|
||||
rm -rf ${LOCK_DIR} >/dev/null 2>&1
|
||||
log "${LOCK_DIR} lock directory removed"
|
||||
fi
|
||||
|
||||
@ -721,7 +662,6 @@ typeset FSYML=""
|
||||
# find missing symlinks (do not skip core plug-ins here)
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# shellcheck disable=SC2086
|
||||
find ${FDIR} -type f -print 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
FSYML="${FFILE%.sh}"
|
||||
@ -740,7 +680,7 @@ done
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# do not use 'find -type l' here!
|
||||
# shellcheck disable=SC2010,SC2086
|
||||
# shellcheck disable=SC2010
|
||||
ls ${FDIR} 2>/dev/null | grep -v "\." 2>/dev/null | while read -r FSYML
|
||||
do
|
||||
# check if file is a dead symlink
|
||||
@ -778,24 +718,6 @@ else
|
||||
. "${CONFIG_FILE}"
|
||||
fi
|
||||
|
||||
# reconcile global settings w/ cmd-line parameters
|
||||
if (( ARG_LOG_HEALTHY == 0 ))
|
||||
then
|
||||
case "${HC_LOG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
ARG_LOG_HEALTHY=1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if (( ARG_NO_FIX == 0 ))
|
||||
then
|
||||
case "${HC_WILL_FIX}" in
|
||||
no|NO|No)
|
||||
ARG_NO_FIX=1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
@ -843,12 +765,6 @@ do
|
||||
fi
|
||||
ARG_CHECK_HOST=1
|
||||
;;
|
||||
-comment=*)
|
||||
ARG_COMMENT="${CMD_PARAMETER#-comment=}"
|
||||
;;
|
||||
--comment=*)
|
||||
ARG_COMMENT="${CMD_PARAMETER#--comment=}"
|
||||
;;
|
||||
-config-file=*)
|
||||
ARG_CONFIG_FILE="${CMD_PARAMETER#-config-file=}"
|
||||
;;
|
||||
@ -992,11 +908,6 @@ do
|
||||
ARG_ACTION=9
|
||||
fi
|
||||
;;
|
||||
-list-details|--list-details)
|
||||
# shellcheck disable=SC2034
|
||||
ARG_LIST_DETAILS=1
|
||||
ARG_ACTION=9
|
||||
;;
|
||||
-list-hc|--list-hc|-list-all|--list-all)
|
||||
print -u2 "WARN: deprecated option. Use --list | --list=<needle>"
|
||||
exit 0
|
||||
@ -1045,13 +956,8 @@ do
|
||||
# shellcheck disable=SC2034
|
||||
ARG_NOTIFY="${CMD_PARAMETER#--notify=}"
|
||||
;;
|
||||
-no-fix|--no-fix)
|
||||
ARG_NO_FIX=1
|
||||
;;
|
||||
-no-log|--no-log)
|
||||
ARG_LOG=0
|
||||
# --no-log always means --no-fix!
|
||||
ARG_NO_FIX=1
|
||||
;;
|
||||
-no-lock|--no-lock)
|
||||
ARG_LOCK=0
|
||||
@ -1249,13 +1155,7 @@ case ${ARG_ACTION} in
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
HC_COMMENT=$(get_disable_comment "${HC_CHECK}")
|
||||
if [[ -n "${HC_COMMENT}" ]]
|
||||
then
|
||||
log "HC ${HC_CHECK} is currently disabled (${HC_COMMENT})"
|
||||
else
|
||||
log "HC ${HC_CHECK} is currently disabled"
|
||||
fi
|
||||
else
|
||||
log "HC ${HC_CHECK} is currently enabled"
|
||||
fi
|
||||
@ -1277,11 +1177,6 @@ case ${ARG_ACTION} in
|
||||
exists_hc "${HC_DISABLE}" && die "cannot find HC: ${HC_DISABLE}"
|
||||
log "disabling HC: ${HC_DISABLE}"
|
||||
touch "${STATE_PERM_DIR}/${HC_DISABLE}.disabled" >/dev/null 2>&1
|
||||
# write comment if supplied
|
||||
if [[ -n "${ARG_COMMENT}" ]]
|
||||
then
|
||||
print "${ARG_COMMENT}" >"${STATE_PERM_DIR}/${HC_DISABLE}.disabled"
|
||||
fi
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
@ -1329,7 +1224,7 @@ case ${ARG_ACTION} in
|
||||
# re-initialize messages stash (log of failed checks)
|
||||
# shellcheck disable=SC2034
|
||||
HC_MSG_VAR=""
|
||||
: > "${HC_MSG_FILE}" 2>/dev/null
|
||||
: >${HC_MSG_FILE} 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
@ -1367,16 +1262,13 @@ case ${ARG_ACTION} in
|
||||
# set & initialize STDOUT/STDERR locations (not in init_hc()!)
|
||||
HC_STDOUT_LOG="${TMP_DIR}/${HC_RUN}.stdout.log.$$"
|
||||
HC_STDERR_LOG="${TMP_DIR}/${HC_RUN}.stderr.log.$$"
|
||||
# push plugin log files to array stacks (index starts at 1!)
|
||||
HC_STDOUT_LOG_ARRAY[${#HC_STDOUT_LOG_ARRAY[*]}+1]="${HC_STDOUT_LOG}"
|
||||
HC_STDERR_LOG_ARRAY[${#HC_STDERR_LOG_ARRAY[*]}+1]="${HC_STDERR_LOG}"
|
||||
: > "${HC_STDOUT_LOG}" 2>/dev/null
|
||||
: >${HC_STDOUT_LOG} 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
die "unable to reset the \${HC_STDOUT_LOG} file"
|
||||
fi
|
||||
: > "${HC_STDERR_LOG}" 2>/dev/null
|
||||
: >${HC_STDERR_LOG} 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
@ -1386,7 +1278,6 @@ case ${ARG_ACTION} in
|
||||
# --check-host handling: alternative configuration file, mangle ARG_CONFIG_FILE & HC_TIME_OUT
|
||||
if (( ARG_CHECK_HOST == 1 ))
|
||||
then
|
||||
PREVIOUS_HC_TIME_OUT=${HC_TIME_OUT}
|
||||
ARG_CONFIG_FILE="" # reset from previous call
|
||||
RUN_CONFIG_FILE=$(grep -i -E -e "^hc:${HC_RUN}:" ${HOST_CONFIG_FILE} 2>/dev/null | cut -f3 -d':')
|
||||
[[ -n "${RUN_CONFIG_FILE}" ]] && ARG_CONFIG_FILE="${CONFIG_DIR}/${RUN_CONFIG_FILE}"
|
||||
@ -1396,14 +1287,13 @@ case ${ARG_ACTION} in
|
||||
(( RUN_TIME_OUT > HC_TIME_OUT )) && HC_TIME_OUT=${RUN_TIME_OUT}
|
||||
else
|
||||
# reset for next HC
|
||||
HC_TIME_OUT=${PREVIOUS_HC_TIME_OUT}
|
||||
HC_TIME_OUT=60
|
||||
fi
|
||||
fi
|
||||
|
||||
# run HC with or without monitor
|
||||
if (( ARG_MONITOR == 0 ))
|
||||
then
|
||||
# shellcheck disable=SC2086
|
||||
${HC_RUN} ${ARG_HC_ARGS}
|
||||
RUN_RC=$?
|
||||
EXIT_CODE=${RUN_RC}
|
||||
@ -1442,7 +1332,6 @@ case ${ARG_ACTION} in
|
||||
# SLEEP_PID is the PID of the sleep subshell itself
|
||||
SLEEP_PID=$!
|
||||
|
||||
# shellcheck disable=SC2086
|
||||
${HC_RUN} ${ARG_HC_ARGS} &
|
||||
CHILD_PID=$!
|
||||
log "spawning child process with time-out of ${HC_TIME_OUT} secs for HC call [PID=${CHILD_PID}]"
|
||||
@ -1493,7 +1382,7 @@ case ${ARG_ACTION} in
|
||||
handle_hc "${HC_RUN}"
|
||||
# exit with return code from handle_hc() (see --flip-rc)
|
||||
EXIT_CODE=$?
|
||||
rm -f "${HC_MSG_FILE}" >/dev/null 2>&1
|
||||
rm -f ${HC_MSG_FILE} >/dev/null 2>&1
|
||||
done
|
||||
;;
|
||||
5) # show info on HC (single)
|
||||
@ -1513,11 +1402,6 @@ case ${ARG_ACTION} in
|
||||
exists_hc "${HC_DISABLE}" && die "cannot find HC: ${HC_DISABLE}"
|
||||
log "disabling HC: ${HC_DISABLE}"
|
||||
touch "${STATE_PERM_DIR}/${HC_DISABLE}.disabled" >/dev/null 2>&1
|
||||
# write comment if supplied
|
||||
if [[ -n "${ARG_COMMENT}" ]]
|
||||
then
|
||||
print "${ARG_COMMENT}" >"${STATE_PERM_DIR}/${HC_DISABLE}.disabled"
|
||||
fi
|
||||
DISABLE_RC=$?
|
||||
if (( DISABLE_RC == 0 ))
|
||||
then
|
||||
@ -1592,7 +1476,26 @@ case ${ARG_ACTION} in
|
||||
esac
|
||||
;;
|
||||
13) # archive current log entries for all HCs
|
||||
archive_hc_all
|
||||
list_hc "list" | while read -r HC_ARCHIVE
|
||||
do
|
||||
# check for HC (function)
|
||||
exists_hc "${HC_ARCHIVE}" && die "cannot find HC: ${HC_ARCHIVE}"
|
||||
log "archiving current log entries for HC: ${HC_ARCHIVE}"
|
||||
archive_hc "${HC_ARCHIVE}"
|
||||
ARCHIVE_RC=$?
|
||||
case ${ARCHIVE_RC} in
|
||||
0)
|
||||
log "no archiving needed for ${HC_ARCHIVE}"
|
||||
;;
|
||||
1)
|
||||
log "successfully archived log entries for ${HC_ARCHIVE}"
|
||||
;;
|
||||
2)
|
||||
log "failed to archive log entries for ${HC_ARCHIVE} [RC=${ARCHIVE_RC}]"
|
||||
EXIT_CODE=1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -31,7 +31,7 @@
|
||||
function display_init
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _VERSION="2020-10-10" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-03-22" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -139,7 +139,7 @@ fi
|
||||
|
||||
# print status line (but also check for terminal support)
|
||||
# shellcheck disable=SC1117
|
||||
printf "%-45s %50s\t[ %8s ]%s\n" \
|
||||
printf "%-30s %50s\t[ %8s ]%s\n" \
|
||||
"${_DISPLAY_HC}" \
|
||||
"(${_DISPLAY_CFG})" \
|
||||
"${_DISPLAY_COLOR}${_DISPLAY_CODE}${_NORMAL}" \
|
||||
|
@ -30,7 +30,7 @@
|
||||
# RETURNS: 0
|
||||
function version_include_core
|
||||
{
|
||||
typeset _VERSION="2022-10-14" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-04-03" # YYYY-MM-DD
|
||||
|
||||
print "INFO: $0: ${_VERSION#version_*}"
|
||||
|
||||
@ -50,7 +50,6 @@ typeset HC_NAME="${1}"
|
||||
typeset ARCHIVE_FILE=""
|
||||
typeset ARCHIVE_RC=0
|
||||
typeset YEAR_MONTH=""
|
||||
typeset COUNT_STATS=1
|
||||
typeset LOG_COUNT=0
|
||||
typeset PRE_LOG_COUNT=0
|
||||
typeset TODO_LOG_COUNT=0
|
||||
@ -63,137 +62,65 @@ typeset TMP2_FILE="${TMP_DIR}/.$0.tmp2.archive.$$"
|
||||
# shellcheck disable=SC2064
|
||||
trap "rm -f ${TMP1_FILE} ${TMP2_FILE} ${SAVE_LOG_FILE} >/dev/null 2>&1; return 1" 1 2 3 15
|
||||
|
||||
# get pre-archive log count
|
||||
PRE_LOG_COUNT=$(wc -l "${HC_LOG}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
# get pre-archive log co
|
||||
PRE_LOG_COUNT=$(wc -l ${HC_LOG} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
if (( PRE_LOG_COUNT == 0 ))
|
||||
then
|
||||
warn "${HC_LOG} is empty, nothing to archive"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# check log count toggle (only affects $LOG_COUNT)
|
||||
case "${HC_COUNT_ARCHIVES}" in
|
||||
No|no|NO)
|
||||
COUNT_STATS=0
|
||||
;;
|
||||
*)
|
||||
: # default is to do additional stats
|
||||
;;
|
||||
esac
|
||||
|
||||
# isolate messages from HC, find unique %Y-%m combinations
|
||||
grep ".*${LOG_SEP}${HC_NAME}${LOG_SEP}" "${HC_LOG}" 2>/dev/null |\
|
||||
grep ".*${LOG_SEP}${HC_NAME}${LOG_SEP}" ${HC_LOG} 2>/dev/null |\
|
||||
cut -f1 -d"${LOG_SEP}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null |\
|
||||
cut -f1-2 -d'-' 2>/dev/null | sort -u 2>/dev/null |\
|
||||
while read -r YEAR_MONTH
|
||||
do
|
||||
# skip messages with bogus date
|
||||
if [[ -z "${YEAR_MONTH}" ]]
|
||||
then
|
||||
warn "skipping bogus log lines (lines without proper datestamp)"
|
||||
continue
|
||||
fi
|
||||
|
||||
# find all messages for that YEAR-MONTH combination
|
||||
grep "${YEAR_MONTH}.*${LOG_SEP}${HC_NAME}${LOG_SEP}" "${HC_LOG}" >"${TMP1_FILE}"
|
||||
TODO_LOG_COUNT=$(wc -l "${TMP1_FILE}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
grep "${YEAR_MONTH}.*${LOG_SEP}${HC_NAME}${LOG_SEP}" ${HC_LOG} >${TMP1_FILE}
|
||||
TODO_LOG_COUNT=$(wc -l ${TMP1_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
log "# of entries in ${YEAR_MONTH} to archive: ${TODO_LOG_COUNT}"
|
||||
|
||||
# combine existing archived messages and resort
|
||||
ARCHIVE_FILE="${ARCHIVE_DIR}/hc.${YEAR_MONTH}.log"
|
||||
cat "${ARCHIVE_FILE}" "${TMP1_FILE}" 2>/dev/null | sort -u >"${TMP2_FILE}" 2>/dev/null
|
||||
mv "${TMP2_FILE}" "${ARCHIVE_FILE}" 2>/dev/null || {
|
||||
cat ${ARCHIVE_FILE} ${TMP1_FILE} 2>/dev/null | sort -u >${TMP2_FILE} 2>/dev/null
|
||||
mv ${TMP2_FILE} ${ARCHIVE_FILE} 2>/dev/null || {
|
||||
warn "failed to move archive file, aborting"
|
||||
return 2
|
||||
}
|
||||
if (( COUNT_STATS > 0 ))
|
||||
then
|
||||
LOG_COUNT=$(wc -l "${ARCHIVE_FILE}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
LOG_COUNT=$(wc -l ${ARCHIVE_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
log "# of entries in ${ARCHIVE_FILE} now: ${LOG_COUNT}"
|
||||
fi
|
||||
|
||||
# remove archived messages from the $HC_LOG (but create a backup first!)
|
||||
cp -p "${HC_LOG}" "${SAVE_HC_LOG}" 2>/dev/null
|
||||
cp -p ${HC_LOG} ${SAVE_HC_LOG} 2>/dev/null
|
||||
# compare with the sorted $HC_LOG
|
||||
sort "${HC_LOG}" >"${TMP1_FILE}"
|
||||
comm -23 "${TMP1_FILE}" "${ARCHIVE_FILE}" 2>/dev/null >"${TMP2_FILE}"
|
||||
sort ${HC_LOG} >${TMP1_FILE}
|
||||
comm -23 ${TMP1_FILE} ${ARCHIVE_FILE} 2>/dev/null >${TMP2_FILE}
|
||||
|
||||
# check archive action (HC_LOG should not be empty unless it contained
|
||||
# only messages from one single HC plugin before archival)
|
||||
if [[ -s ${TMP2_FILE} ]] || (( PRE_LOG_COUNT == TODO_LOG_COUNT ))
|
||||
then
|
||||
mv "${TMP2_FILE}" "${HC_LOG}" 2>/dev/null || {
|
||||
mv ${TMP2_FILE} ${HC_LOG} 2>/dev/null || {
|
||||
warn "failed to move HC log file, aborting"
|
||||
return 2
|
||||
}
|
||||
if (( COUNT_STATS > 0 ))
|
||||
then
|
||||
LOG_COUNT=$(wc -l "${HC_LOG}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
LOG_COUNT=$(wc -l ${HC_LOG} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
log "# entries in ${HC_LOG} now: ${LOG_COUNT}"
|
||||
fi
|
||||
ARCHIVE_RC=1
|
||||
else
|
||||
warn "a problem occurred. Rolling back archival"
|
||||
mv "${SAVE_HC_LOG}" "${HC_LOG}" 2>/dev/null
|
||||
mv ${SAVE_HC_LOG} ${HC_LOG} 2>/dev/null
|
||||
ARCHIVE_RC=2
|
||||
fi
|
||||
done
|
||||
|
||||
# clean up temporary file(s)
|
||||
rm -f "${TMP1_FILE}" "${TMP2_FILE}" "${SAVE_HC_LOG}" >/dev/null 2>&1
|
||||
rm -f ${TMP1_FILE} ${TMP2_FILE} ${SAVE_HC_LOG} >/dev/null 2>&1
|
||||
|
||||
return ${ARCHIVE_RC}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: archive_hc_all()
|
||||
# DOES: archive log entries for all HCs
|
||||
# EXPECTS: n/a
|
||||
# RETURNS: 0
|
||||
# REQUIRES: ${HC_LOG}
|
||||
function archive_hc_all
|
||||
{
|
||||
typeset HC_ARCHIVE=""
|
||||
|
||||
# build list with all HCs in $HC_LOG and sort them by highest number of messages
|
||||
# first (to speed up the archiving operation)
|
||||
log "parsing log file ${HC_LOG} for messages ..."
|
||||
awk -F"${LOG_SEP}" '
|
||||
{
|
||||
# only do records with a proper HC name in $2
|
||||
if ($2 ~ /check_/) {
|
||||
count[$2]++;
|
||||
}
|
||||
}
|
||||
|
||||
END {
|
||||
for (hc in count) {
|
||||
print count[hc] ":" hc;
|
||||
}
|
||||
}' "${HC_LOG}" 2>/dev/null | sort -rn 2>/dev/null |\
|
||||
while IFS=":" read -r HC_COUNT HC_ARCHIVE
|
||||
do
|
||||
# check for HC (function)
|
||||
exists_hc "${HC_ARCHIVE}" && die "cannot find HC: ${HC_ARCHIVE}"
|
||||
log "archiving ${HC_COUNT} log entries for HC ${HC_ARCHIVE}"
|
||||
archive_hc "${HC_ARCHIVE}"
|
||||
ARCHIVE_RC=$?
|
||||
case ${ARCHIVE_RC} in
|
||||
0)
|
||||
log "no archiving needed for ${HC_ARCHIVE}"
|
||||
;;
|
||||
1)
|
||||
log "successfully archived log entries for ${HC_ARCHIVE}"
|
||||
;;
|
||||
2)
|
||||
warn "failed to archive log entries for ${HC_ARCHIVE} [RC=${ARCHIVE_RC}]"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: count_log_errors()
|
||||
# DOES: check hc log file(s) for rogue entries (=lines with NF<>$NUM_LOG_FIELDS
|
||||
@ -211,7 +138,6 @@ function count_log_errors
|
||||
typeset LOG_STASH="${1}"
|
||||
typeset ERROR_COUNT=0
|
||||
|
||||
# shellcheck disable=SC2002,SC2086
|
||||
ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" '
|
||||
BEGIN { num = 0 }
|
||||
{
|
||||
@ -221,7 +147,7 @@ ERROR_COUNT=$(cat ${LOG_STASH} 2>/dev/null | awk -F"${LOG_SEP}" '
|
||||
}
|
||||
END { print num }' 2>/dev/null)
|
||||
|
||||
print "${ERROR_COUNT}"
|
||||
print ${ERROR_COUNT}
|
||||
|
||||
return 0
|
||||
}
|
||||
@ -263,7 +189,7 @@ then
|
||||
print - "$*" | while read -r LOG_LINE
|
||||
do
|
||||
# shellcheck disable=SC2153
|
||||
print "${NOW}: ERROR: [$$]:" "${LOG_LINE}" >>"${LOG_FILE}"
|
||||
print "${NOW}: ERROR: [$$]:" "${LOG_LINE}" >>${LOG_FILE}
|
||||
done
|
||||
fi
|
||||
print - "$*" | while read -r LOG_LINE
|
||||
@ -307,7 +233,6 @@ DO_DISPLAY_CUSTOM9=0
|
||||
DO_NOTIFY_EIF=0
|
||||
DO_NOTIFY_MAIL=0
|
||||
DO_NOTIFY_SMS=0
|
||||
DO_NOTIFY_SLACK=0
|
||||
DO_REPORT_STD=0
|
||||
HAS_DISPLAY_CSV=0
|
||||
HAS_DISPLAY_INIT=0
|
||||
@ -326,13 +251,12 @@ HAS_DISPLAY_CUSTOM9=0
|
||||
HAS_NOTIFY_EIF=0
|
||||
HAS_NOTIFY_MAIL=0
|
||||
HAS_NOTIFY_SMS=0
|
||||
HAS_NOTIFY_SLACK=0
|
||||
HAS_REPORT_STD=0
|
||||
|
||||
# check which core display/notification plugins are installed
|
||||
# do not use a while-do loop here because mksh/pdksh does not pass updated
|
||||
# variables back from the sub shell (only works for true ksh88/ksh93)
|
||||
# shellcheck disable=SC2010,SC2086
|
||||
# shellcheck disable=SC2010
|
||||
for FFILE in $(ls -1 ${FPATH_PARENT}/core/*.sh 2>/dev/null | grep -v "include_" 2>/dev/null)
|
||||
do
|
||||
case "${FFILE}" in
|
||||
@ -404,10 +328,6 @@ do
|
||||
HAS_NOTIFY_EIF=1
|
||||
(( ARG_DEBUG > 0 )) && debug "notify_eif plugin is available"
|
||||
;;
|
||||
*notify_slack.sh)
|
||||
HAS_NOTIFY_SLACK=1
|
||||
(( ARG_DEBUG > 0 )) && debug "notify_slack plugin is available"
|
||||
;;
|
||||
*report_std.sh)
|
||||
# shellcheck disable=SC2034
|
||||
HAS_REPORT_STD=1
|
||||
@ -565,9 +485,6 @@ then
|
||||
*mail*) # by mail
|
||||
DO_NOTIFY_MAIL=1
|
||||
;;
|
||||
*slack*) # by Slack
|
||||
DO_NOTIFY_SLACK=1
|
||||
;;
|
||||
*sms*) # by sms
|
||||
DO_NOTIFY_SMS=1
|
||||
;;
|
||||
@ -726,10 +643,8 @@ return 0
|
||||
function dump_logs
|
||||
{
|
||||
log "=== STDOUT ==="
|
||||
# shellcheck disable=SC2086
|
||||
log "$(<${HC_STDOUT_LOG})"
|
||||
log "=== STDERR ==="
|
||||
# shellcheck disable=SC2086
|
||||
log "$(<${HC_STDERR_LOG})"
|
||||
|
||||
return 0
|
||||
@ -818,26 +733,24 @@ fi
|
||||
trap "[[ -f ${TMP_FILE} ]] && rm -f ${TMP_FILE} >/dev/null 2>&1; return 1" 1 2 3 15
|
||||
|
||||
# check and rewrite log file(s)
|
||||
# shellcheck disable=SC2086
|
||||
find ${LOG_STASH} -type f -print 2>/dev/null | while read -r FIX_FILE
|
||||
do
|
||||
log "fixing log file ${FIX_FILE} ..."
|
||||
|
||||
# count before rewrite
|
||||
STASH_COUNT=$(wc -l "${FIX_FILE}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
STASH_COUNT=$(wc -l ${FIX_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
|
||||
# does it have errors?
|
||||
ERROR_COUNT=$(count_log_errors "${FIX_FILE}")
|
||||
ERROR_COUNT=$(count_log_errors ${FIX_FILE})
|
||||
|
||||
# we count the empty lines (again)
|
||||
EMPTY_COUNT=$(grep -c -E -e '^$' "${FIX_FILE}" 2>/dev/null)
|
||||
EMPTY_COUNT=$(grep -c -E -e '^$' ${FIX_FILE} 2>/dev/null)
|
||||
|
||||
# rewrite if needed
|
||||
if (( ERROR_COUNT > 0 ))
|
||||
then
|
||||
: >"${TMP_FILE}" 2>/dev/null
|
||||
# shellcheck disable=SC2002
|
||||
cat "${FIX_FILE}" 2>/dev/null | awk -F"${LOG_SEP}" -v OFS="${LOG_SEP}" '
|
||||
: >${TMP_FILE} 2>/dev/null
|
||||
cat ${FIX_FILE} 2>/dev/null | awk -F"${LOG_SEP}" -v OFS="${LOG_SEP}" '
|
||||
|
||||
BEGIN { max_log_fields = '"${NUM_LOG_FIELDS}"'
|
||||
max_fields = (max_log_fields - 1) * 2
|
||||
@ -913,10 +826,10 @@ do
|
||||
# correct log line, no rewrite needed
|
||||
print $0
|
||||
}
|
||||
}' >"${TMP_FILE}" 2>/dev/null
|
||||
}' >${TMP_FILE} 2>/dev/null
|
||||
|
||||
# count after rewrite (include empty lines again in the count)
|
||||
TMP_COUNT=$(wc -l "${TMP_FILE}" 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
TMP_COUNT=$(wc -l ${TMP_FILE} 2>/dev/null | cut -f1 -d' ' 2>/dev/null)
|
||||
TMP_COUNT=$(( TMP_COUNT + EMPTY_COUNT ))
|
||||
|
||||
# bail out when we do not have enough records
|
||||
@ -927,16 +840,16 @@ do
|
||||
fi
|
||||
|
||||
# swap log file (but create a backup first!)
|
||||
cp -p "${FIX_FILE}" "${SAVE_TMP_FILE}" 2>/dev/null
|
||||
cp -p ${FIX_FILE} ${SAVE_TMP_FILE} 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
mv "${TMP_FILE}" "${FIX_FILE}" 2>/dev/null
|
||||
mv ${TMP_FILE} ${FIX_FILE} 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "failed to move/update log file, rolling back"
|
||||
mv "${SAVE_TMP_FILE}" "${FIX_FILE}" 2>/dev/null
|
||||
mv ${SAVE_TMP_FILE} ${FIX_FILE} 2>/dev/null
|
||||
return 2
|
||||
fi
|
||||
FIX_RC=1
|
||||
@ -946,7 +859,7 @@ do
|
||||
fi
|
||||
|
||||
# clean up temporary file(s)
|
||||
rm -f "${SAVE_TMP_FILE}" "${TMP_FILE}" >/dev/null 2>&1
|
||||
rm -f ${SAVE_TMP_FILE} ${TMP_FILE} >/dev/null 2>&1
|
||||
else
|
||||
log "no fixing needed for ${FIX_FILE}"
|
||||
fi
|
||||
@ -957,33 +870,12 @@ done
|
||||
return ${FIX_RC}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: get_disable_comment()
|
||||
# DOES: retrieve comment for a disabled HC
|
||||
# EXPECTS: HC name [string]
|
||||
# OUTPUTS: comment [string]
|
||||
# RETURNS: 0
|
||||
# REQUIRES: n/a
|
||||
function get_disable_comment
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset COMMENT_HC="${1}"
|
||||
|
||||
if [[ -s "${STATE_PERM_DIR}/${COMMENT_HC}.disabled" ]]
|
||||
then
|
||||
cat "${STATE_PERM_DIR}/${COMMENT_HC}.disabled" 2>/dev/null
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: handle_hc()
|
||||
# DOES: handle HC results
|
||||
# EXPECTS: 1=HC name [string], $HC_MSG_FILE temporary file
|
||||
# RETURNS: 0 or $HC_STC_RC
|
||||
# REQUIRES: data_contains_string(), data_is_numeric(), die(), display_*(),
|
||||
# notify_*(), warn()
|
||||
# REQUIRES: die(), display_*(), notify_*(), warn()
|
||||
function handle_hc
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
@ -997,21 +889,12 @@ typeset ONE_MSG_TEXT=""
|
||||
typeset ONE_MSG_CUR_VAL=""
|
||||
typeset ONE_MSG_EXP_VAL=""
|
||||
typeset ALL_MSG_STC=0
|
||||
typeset HC_CACHE_LAST_DATE=""
|
||||
typeset HC_CACHE_TODAY_DATE=""
|
||||
typeset HC_CACHE_LAST_NOW="$(date '+%Y-%m-%d %H:%M:%S' 2>/dev/null)" # see also $HC_NOW
|
||||
typeset HC_CACHE_TODAY_NOW="$(date '+%Y-%m-%d' 2>/dev/null)" # see also $HC_NOW
|
||||
typeset HC_REPORT_CACHE_LAST_STUB="${STATE_PERM_DIR}/cache.report-last"
|
||||
typeset HC_REPORT_CACHE_LAST_FILE=""
|
||||
typeset HC_REPORT_CACHE_TODAY_FILE="${STATE_PERM_DIR}/cache.report-today"
|
||||
typeset LOG_STRING_GOOD=""
|
||||
typeset LOG_STRING_FAIL=""
|
||||
|
||||
if [[ -s ${HC_MSG_FILE} ]]
|
||||
then
|
||||
# load messages file into memory
|
||||
# do not use array: max 1024 items in ksh88; regular variable is only 32-bit memory limited
|
||||
HC_MSG_VAR=$(<"${HC_MSG_FILE}")
|
||||
HC_MSG_VAR=$(<${HC_MSG_FILE})
|
||||
|
||||
# DEBUG: dump TMP file
|
||||
if (( ARG_DEBUG > 0 ))
|
||||
@ -1029,7 +912,6 @@ else
|
||||
# nothing to do, respect current EXIT_CODE
|
||||
if (( EXIT_CODE > 0 ))
|
||||
then
|
||||
# shellcheck disable=SC2086
|
||||
return ${EXIT_CODE}
|
||||
else
|
||||
return 0
|
||||
@ -1247,56 +1129,15 @@ then
|
||||
ONE_MSG_EXP_VAL=$(data_magic_unquote "${ONE_MSG_EXP_VAL}")
|
||||
fi
|
||||
fi
|
||||
|
||||
printf "%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}" \
|
||||
"${ONE_MSG_TIME}" \
|
||||
"${HC_NAME}" \
|
||||
${ONE_MSG_STC} \
|
||||
"${ONE_MSG_TEXT}" >>${HC_LOG}
|
||||
if (( ONE_MSG_STC > 0 ))
|
||||
then
|
||||
# build log string
|
||||
LOG_STRING_FAIL=$(printf "%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}" "${ONE_MSG_TIME}" "${HC_NAME}" "${ONE_MSG_STC}" "${ONE_MSG_TEXT}" "${HC_FAIL_ID}")
|
||||
|
||||
# do atomic log update
|
||||
# shellcheck disable=SC1117
|
||||
print "${LOG_STRING_FAIL}" >>"${HC_LOG}"
|
||||
|
||||
# cache report (--report --last)
|
||||
HC_REPORT_CACHE_LAST_FILE="${HC_REPORT_CACHE_LAST_STUB}-${HC_NAME}"
|
||||
case "${HC_REPORT_CACHE_LAST}" in
|
||||
Yes|yes|YES)
|
||||
# fetch date of last cache entry (did we rollover from last HC event?)
|
||||
HC_CACHE_LAST_DATE=$(tail -n 1 "${HC_REPORT_CACHE_LAST_FILE}" 2>/dev/null | cut -f1 -d"${LOG_SEP}" 2>/dev/null)
|
||||
if [[ -z "${HC_CACHE_LAST_DATE}" ]] || [[ "${HC_CACHE_LAST_DATE}" != "${HC_CACHE_LAST_NOW}" ]]
|
||||
then
|
||||
# set and update cache file
|
||||
print "${LOG_STRING_FAIL}" >"${HC_REPORT_CACHE_LAST_FILE}"
|
||||
else
|
||||
# append cache file
|
||||
print "${LOG_STRING_FAIL}" >>"${HC_REPORT_CACHE_LAST_FILE}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# remove cache file if it exists
|
||||
[[ -f "${HC_REPORT_CACHE_LAST_FILE}" ]] && rm -f "${HC_REPORT_CACHE_LAST_FILE}" >/dev/null 2>/dev/null
|
||||
;;
|
||||
esac
|
||||
# cache report (--report --today)
|
||||
case "${HC_REPORT_CACHE_TODAY}" in
|
||||
Yes|yes|YES)
|
||||
# fetch date of last cache entry (did we rollover midnight?)
|
||||
HC_CACHE_TODAY_DATE=$(tail -n 1 "${HC_REPORT_CACHE_TODAY_FILE}" 2>/dev/null | cut -f1 -d"${LOG_SEP}" 2>/dev/null | awk '{ print $1 }' 2>/dev/null)
|
||||
if [[ -z "${HC_CACHE_TODAY_DATE}" ]] || [[ "${HC_CACHE_TODAY_DATE}" != "${HC_CACHE_TODAY_NOW}" ]]
|
||||
then
|
||||
# rotate and update cache file
|
||||
(( ARG_DEBUG > 0 )) && debug "rotating today's cache file at ${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
print "${LOG_STRING_FAIL}" >"${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
else
|
||||
# append cache file
|
||||
print "${LOG_STRING_FAIL}" >>"${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# remove cache file if it exists
|
||||
[[ -f "${HC_REPORT_CACHE_TODAY_FILE}" ]] && rm -f "${HC_REPORT_CACHE_TODAY_FILE}" >/dev/null 2>/dev/null
|
||||
;;
|
||||
esac
|
||||
printf "%s${LOG_SEP}\n" "${HC_FAIL_ID}" >>${HC_LOG}
|
||||
# RC handling (max/sum/count)
|
||||
if (( ARG_FLIP_RC > 0 ))
|
||||
then
|
||||
@ -1316,53 +1157,8 @@ then
|
||||
HC_STC_RC=$(( HC_STC_RC + 1 ))
|
||||
fi
|
||||
else
|
||||
# build log string
|
||||
LOG_STRING_GOOD=$(printf "%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}%s${LOG_SEP}" "${ONE_MSG_TIME}" "${HC_NAME}" "${ONE_MSG_STC}" "${ONE_MSG_TEXT}")
|
||||
|
||||
# do atomic log update
|
||||
# shellcheck disable=SC1117
|
||||
print "${LOG_STRING_GOOD}" >>"${HC_LOG}"
|
||||
|
||||
# cache report (--report --last)
|
||||
HC_REPORT_CACHE_LAST_FILE="${HC_REPORT_CACHE_LAST_STUB}-${HC_NAME}"
|
||||
case "${HC_REPORT_CACHE_LAST}" in
|
||||
Yes|yes|YES)
|
||||
# fetch date of last cache entry (did we rollover from last HC event?)
|
||||
HC_CACHE_LAST_DATE=$(tail -n 1 "${HC_REPORT_CACHE_LAST_FILE}" 2>/dev/null | cut -f1 -d"${LOG_SEP}" 2>/dev/null)
|
||||
if [[ -z "${HC_CACHE_LAST_DATE}" ]] || [[ "${HC_CACHE_LAST_DATE}" != "${HC_CACHE_LAST_NOW}" ]]
|
||||
then
|
||||
# set and update cache file
|
||||
print "${LOG_STRING_GOOD}" >"${HC_REPORT_CACHE_LAST_FILE}"
|
||||
else
|
||||
# append cache file
|
||||
print "${LOG_STRING_GOOD}" >>"${HC_REPORT_CACHE_LAST_FILE}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# remove cache file if it exists
|
||||
[[ -f "${HC_REPORT_CACHE_LAST_FILE}" ]] && rm -f "${HC_REPORT_CACHE_LAST_FILE}" >/dev/null 2>/dev/null
|
||||
;;
|
||||
esac
|
||||
# cache report (--report --today)
|
||||
case "${HC_REPORT_CACHE_TODAY}" in
|
||||
Yes|yes|YES)
|
||||
# fetch date of last cache last_entry (did we rollover midnight?)
|
||||
HC_CACHE_TODAY_DATE=$(tail -n 1 "${HC_REPORT_CACHE_TODAY_FILE}" 2>/dev/null | cut -f1 -d"${LOG_SEP}" 2>/dev/null | awk '{ print $1 }' 2>/dev/null)
|
||||
if [[ -z "${HC_CACHE_TODAY_DATE}" ]] || [[ "${HC_CACHE_TODAY_DATE}" != "${HC_CACHE_TODAY_NOW}" ]]
|
||||
then
|
||||
# rotate and update cache file
|
||||
(( ARG_DEBUG > 0 )) && debug "rotating today's cache file at ${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
print "${LOG_STRING_GOOD}" >"${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
else
|
||||
# append cache file
|
||||
print "${LOG_STRING_GOOD}" >>"${HC_REPORT_CACHE_TODAY_FILE}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# remove cache file if it exists
|
||||
[[ -f ${HC_REPORT_CACHE_TODAY_FILE} ]] && rm -f "${HC_REPORT_CACHE_TODAY_FILE}" >/dev/null 2>/dev/null
|
||||
;;
|
||||
esac
|
||||
printf "\n" >>${HC_LOG}
|
||||
fi
|
||||
done
|
||||
|
||||
@ -1379,14 +1175,14 @@ then
|
||||
then
|
||||
# cut off the path and the .$$ part from the file location
|
||||
HC_STDOUT_LOG_SHORT="${HC_STDOUT_LOG##*/}"
|
||||
mv "${HC_STDOUT_LOG}" "${EVENTS_DIR}/${DIR_PREFIX}/${HC_FAIL_ID}/${HC_STDOUT_LOG_SHORT%.*}" >/dev/null 2>&1 || \
|
||||
mv ${HC_STDOUT_LOG} "${EVENTS_DIR}/${DIR_PREFIX}/${HC_FAIL_ID}/${HC_STDOUT_LOG_SHORT%.*}" >/dev/null 2>&1 || \
|
||||
die "failed to move ${HC_STDOUT_LOG} to event directory at ${1}"
|
||||
fi
|
||||
if [[ -f ${HC_STDERR_LOG} ]]
|
||||
then
|
||||
# cut off the path and the .$$ part from the file location
|
||||
HC_STDERR_LOG_SHORT="${HC_STDERR_LOG##*/}"
|
||||
mv "${HC_STDERR_LOG}" "${EVENTS_DIR}/${DIR_PREFIX}/${HC_FAIL_ID}/${HC_STDERR_LOG_SHORT%.*}" >/dev/null 2>&1 || \
|
||||
mv ${HC_STDERR_LOG} "${EVENTS_DIR}/${DIR_PREFIX}/${HC_FAIL_ID}/${HC_STDERR_LOG_SHORT%.*}" >/dev/null 2>&1 || \
|
||||
die "failed to move ${HC_STDERR_LOG} to event directory at ${1}"
|
||||
fi
|
||||
fi
|
||||
@ -1425,17 +1221,6 @@ then
|
||||
warn "notify_sms plugin is not avaible, cannot send alert via sms!"
|
||||
fi
|
||||
fi
|
||||
# by Slack?
|
||||
if (( DO_NOTIFY_SLACK == 1 ))
|
||||
then
|
||||
if (( HAS_NOTIFY_SLACK == 1 ))
|
||||
then
|
||||
# call plugin
|
||||
notify_slack "${HC_NAME}" "${HC_FAIL_ID}"
|
||||
else
|
||||
warn "notify_slack plugin is not avaible, cannot send alert via Slack!"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -1460,7 +1245,6 @@ fi
|
||||
function handle_timeout
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
# shellcheck disable=SC2086
|
||||
[[ -n "${CHILD_PID}" ]] && kill -s TERM ${CHILD_PID}
|
||||
warn "child process with PID ${CHILD_PID} has been forcefully stopped"
|
||||
# shellcheck disable=SC2034
|
||||
@ -1590,8 +1374,8 @@ esac
|
||||
|
||||
# mangle $ARG_HC to build the full list of HCs to be executed
|
||||
ARG_HC=""
|
||||
grep -i '^hc:' "${HOST_CONFIG_FILE}" 2>/dev/null |\
|
||||
while IFS=':' read -r _ HC_EXEC _ _
|
||||
grep -i '^hc:' ${HOST_CONFIG_FILE} 2>/dev/null |\
|
||||
while IFS=':' read _ HC_EXEC _ _
|
||||
do
|
||||
ARG_HC="${ARG_HC},${HC_EXEC}"
|
||||
done
|
||||
@ -1652,23 +1436,19 @@ case "${OS_NAME}" in
|
||||
# check system crontabs
|
||||
if (( CRON_COUNT == 0 ))
|
||||
then
|
||||
# shellcheck disable=SC2002,SC2086
|
||||
CRON_COUNT=$(cat ${CRON_SYS_LOCATIONS} 2>/dev/null | grep -c -E -e "^[^#].*${CRON_HC}" 2>/dev/null)
|
||||
fi
|
||||
# check anacron
|
||||
if (( CRON_COUNT == 0 ))
|
||||
then
|
||||
# shellcheck disable=SC2002,SC2086
|
||||
CRON_COUNT=$(cat ${CRON_ANACRON_LOCATIONS} 2>/dev/null | grep -c -E -e "^[^#].*${CRON_HC}" 2>/dev/null)
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# use default root crontab
|
||||
CRON_COUNT=$(crontab -l 2>/dev/null | grep -c -E -e "^[^#].*${CRON_HC}" 2>/dev/null)
|
||||
;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2086
|
||||
return ${CRON_COUNT}
|
||||
}
|
||||
|
||||
@ -1700,11 +1480,11 @@ printf "%80s\n" | tr ' ' -
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep "core$" | sort 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# exclude core helper librar(y|ies)
|
||||
# shellcheck disable=SC2010,SC2086
|
||||
# shellcheck disable=SC2010
|
||||
ls -1 ${FDIR}/*.sh 2>/dev/null | grep -v "include_" | sort 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
# cache script contents in memory
|
||||
FSCRIPT="$(<${FFILE})"
|
||||
FSCRIPT=$(<${FFILE})
|
||||
|
||||
# reset state
|
||||
FSTATE="enabled"
|
||||
@ -1746,12 +1526,12 @@ print -n "Dead links: "
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep "core$" 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# do not use 'find -type l' here!
|
||||
# shellcheck disable=SC2010,SC1117,SC2086
|
||||
# shellcheck disable=SC2010,SC1117
|
||||
ls ${FDIR} 2>/dev/null | grep -v "\." 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
if [[ -h "${FDIR}/${FFILE}" ]] && [[ ! -f "${FDIR}/${FFILE}" ]]
|
||||
then
|
||||
printf "%s " "${FFILE##*/}"
|
||||
printf "%s " ${FFILE##*/}
|
||||
fi
|
||||
done
|
||||
done
|
||||
@ -1783,7 +1563,6 @@ typeset FCONFIG=""
|
||||
typeset FSTATE=""
|
||||
typeset FFILE=""
|
||||
typeset FHEALTHY=""
|
||||
typeset FFIX=0
|
||||
typeset FSCHEDULED=0
|
||||
typeset FSCRIPT=""
|
||||
typeset HAS_FCONFIG=0
|
||||
@ -1801,44 +1580,21 @@ fi
|
||||
# print header
|
||||
if [[ "${FACTION}" != "list" ]]
|
||||
then
|
||||
if (( ARG_LIST_DETAILS > 0 ))
|
||||
then
|
||||
# shellcheck disable=SC1117
|
||||
printf "%-40s\t%-8s\t%s\t\t%s\t%s\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?" "H+?" "Fix?"
|
||||
printf "%-40s\t%-8s\t%s\t\t%s\t%s\t%s\n" "Health Check" "State" "Version" "Config?" "Sched?" "H+?"
|
||||
# shellcheck disable=SC2183,SC1117
|
||||
printf "%110s\n" | tr ' ' -
|
||||
else
|
||||
# shellcheck disable=SC1117
|
||||
printf "%-40s\t%-8s\n" "Health Check" "State"
|
||||
# shellcheck disable=SC2183,SC1117
|
||||
printf "%60s\n" | tr ' ' -
|
||||
fi
|
||||
printf "%100s\n" | tr ' ' -
|
||||
fi
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep -v "core$" 2>/dev/null | sort 2>/dev/null |\
|
||||
while read -r FDIR
|
||||
do
|
||||
# shellcheck disable=SC2012,SC2086
|
||||
ls -1 ${FDIR}/${FNEEDLE} 2>/dev/null | sort 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
# cache script contents in memory
|
||||
FSCRIPT="$(<${FFILE})"
|
||||
FSCRIPT=$(<${FFILE})
|
||||
|
||||
# --list (basic)
|
||||
# find function name but skip helper functions in the plug-in file (function _name)
|
||||
FNAME=$(print -R "${FSCRIPT}" | grep -E -e "^function[[:space:]]+[^_]" 2>/dev/null)
|
||||
|
||||
# check state
|
||||
DISABLE_FFILE="$(print ${FFILE##*/} | sed 's/\.sh$//')"
|
||||
if [[ -f "${STATE_PERM_DIR}/${DISABLE_FFILE}.disabled" ]]
|
||||
then
|
||||
FSTATE="disabled"
|
||||
else
|
||||
FSTATE="enabled"
|
||||
fi
|
||||
|
||||
# --list-details
|
||||
if (( ARG_LIST_DETAILS > 0 ))
|
||||
then
|
||||
# look for version string (cut off comments but don't use [:space:] in tr)
|
||||
FVERSION=$(print -R "${FSCRIPT}" | grep '^typeset _VERSION=' 2>/dev/null |\
|
||||
awk 'match($0,/[0-9]+-[0-9]+-[0-9]+/){print substr($0, RSTART, RLENGTH)}' 2>/dev/null)
|
||||
@ -1892,14 +1648,14 @@ do
|
||||
FCONFIG="No"
|
||||
FHEALTHY="N/S"
|
||||
fi
|
||||
# check fix
|
||||
if (( $(print -R "${FSCRIPT}" | grep -c -E -e "_HC_CAN_FIX=1" 2>/dev/null) > 0 ))
|
||||
# check state
|
||||
DISABLE_FFILE="$(print ${FFILE##*/} | sed 's/\.sh$//')"
|
||||
if [[ -f "${STATE_PERM_DIR}/${DISABLE_FFILE}.disabled" ]]
|
||||
then
|
||||
FFIX="Yes"
|
||||
FSTATE="disabled"
|
||||
else
|
||||
FFIX="No"
|
||||
FSTATE="enabled"
|
||||
fi
|
||||
|
||||
# reset state when unlinked
|
||||
[[ -h ${FFILE%%.*} ]] || FSTATE="unlinked"
|
||||
# check scheduling
|
||||
@ -1911,28 +1667,18 @@ do
|
||||
else
|
||||
FSCHEDULED="Yes"
|
||||
fi
|
||||
fi
|
||||
|
||||
# show results
|
||||
if [[ "${FACTION}" != "list" ]]
|
||||
then
|
||||
if (( ARG_LIST_DETAILS > 0 ))
|
||||
then
|
||||
# shellcheck disable=SC1117
|
||||
printf "%-40s\t%-8s\t%s\t%s\t%s\t%s\t%s\n" \
|
||||
printf "%-40s\t%-8s\t%s\t%s\t%s\t%s\n" \
|
||||
"${FNAME#function *}" \
|
||||
"${FSTATE}" \
|
||||
"${FVERSION#typeset _VERSION=*}" \
|
||||
"${FCONFIG}" \
|
||||
"${FSCHEDULED}" \
|
||||
"${FHEALTHY}" \
|
||||
"${FFIX}"
|
||||
else
|
||||
# shellcheck disable=SC1117
|
||||
printf "%-40s\t%-8s\n" \
|
||||
"${FNAME#function *}" \
|
||||
"${FSTATE}"
|
||||
fi
|
||||
"${FHEALTHY}"
|
||||
else
|
||||
# shellcheck disable=SC1117
|
||||
printf "%s\n" "${FNAME#function *}"
|
||||
@ -1948,12 +1694,12 @@ then
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep -v "core" 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# do not use 'find -type l' here!
|
||||
# shellcheck disable=SC2010,SC1117,SC2086
|
||||
# shellcheck disable=SC2010,SC1117
|
||||
ls ${FDIR} 2>/dev/null | grep -v "\." 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
if [[ -h "${FDIR}/${FFILE}" ]] && [[ ! -f "${FDIR}/${FFILE}" ]]
|
||||
then
|
||||
printf "%s " "${FFILE##*/}"
|
||||
printf "%s " ${FFILE##*/}
|
||||
fi
|
||||
done
|
||||
done
|
||||
@ -1967,18 +1713,10 @@ fi
|
||||
# legend
|
||||
if [[ "${FACTION}" != "list" ]]
|
||||
then
|
||||
if (( ARG_LIST_DETAILS > 0 ))
|
||||
then
|
||||
print
|
||||
print "Config?: plugin has a default configuration file (Yes/No)"
|
||||
print "Sched? : plugin is scheduled through cron (Yes/No)"
|
||||
print "H+? : plugin can choose whether to log/show passed health checks (Yes/No/Supported/Not supported)"
|
||||
print "Fix? : plugin contains fix/healing logic (Yes/No) -- not used by default!"
|
||||
else
|
||||
print
|
||||
print "Tip: use --list-details to see a list of health checks with more details"
|
||||
|
||||
fi
|
||||
fi
|
||||
|
||||
return 0
|
||||
@ -2011,11 +1749,11 @@ printf "%100s\n" | tr ' ' -
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep "core$" 2>/dev/null | sort 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# exclude core helper librar(y|ies)
|
||||
# shellcheck disable=SC2010,SC2086
|
||||
# shellcheck disable=SC2010
|
||||
ls -1 ${FDIR}/*.sh 2>/dev/null | grep "include_" 2>/dev/null | sort 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
# cache script contents in memory
|
||||
FSCRIPT="$(<${FFILE})"
|
||||
FSCRIPT=$(<${FFILE})
|
||||
|
||||
# find function name
|
||||
FNAME=$(print -R "${FSCRIPT}" | grep -E -e "^function[[:space:]].*version_" 2>/dev/null)
|
||||
@ -2048,12 +1786,12 @@ print -n "Dead links: "
|
||||
print "${FPATH}" | tr ':' '\n' 2>/dev/null | grep "core$" 2>/dev/null | while read -r FDIR
|
||||
do
|
||||
# do not use 'find -type l' here!
|
||||
# shellcheck disable=SC2010,SC1117,SC2086
|
||||
# shellcheck disable=SC2010,SC1117
|
||||
ls ${FDIR} 2>/dev/null | grep -v "\." 2>/dev/null | while read -r FFILE
|
||||
do
|
||||
if [[ -h "${FDIR}/${FFILE}" ]] && [[ ! -f "${FDIR}/${FFILE}" ]]
|
||||
then
|
||||
printf "%s " "${FFILE##*/}"
|
||||
printf "%s " ${FFILE##*/}
|
||||
fi
|
||||
done
|
||||
done
|
||||
@ -2084,7 +1822,7 @@ then
|
||||
then
|
||||
print - "$*" | while read -r LOG_LINE
|
||||
do
|
||||
print "${NOW}: INFO: [$$]:" "${LOG_LINE}" >>"${LOG_FILE}"
|
||||
print "${NOW}: INFO: [$$]:" "${LOG_LINE}" >>${LOG_FILE}
|
||||
done
|
||||
fi
|
||||
if (( ARG_VERBOSE > 0 ))
|
||||
@ -2153,7 +1891,7 @@ fi
|
||||
|
||||
# save the HC failure message for now
|
||||
print "${HC_STC}${MSG_SEP}${HC_NOW}${MSG_SEP}${HC_MSG_TEXT}${MSG_SEP}${HC_MSG_CUR_VAL}${MSG_SEP}${HC_MSG_EXP_VAL}" \
|
||||
>>"${HC_MSG_FILE}"
|
||||
>>${HC_MSG_FILE}
|
||||
|
||||
return 0
|
||||
}
|
||||
@ -2208,14 +1946,13 @@ awk -F"${LOG_SEP}" '{
|
||||
}
|
||||
}
|
||||
}
|
||||
' "${HC_LOG}" 2>/dev/null
|
||||
' ${HC_LOG} 2>/dev/null
|
||||
|
||||
# archived events
|
||||
print; print
|
||||
print -R "--- ARCHIVED events --"
|
||||
print
|
||||
# shellcheck disable=SC2086
|
||||
find ${ARCHIVE_DIR} -type f -name "hc.*.log" 2>/dev/null | sort -rn 2>/dev/null | while read -r _ARCHIVE_FILE
|
||||
find ${ARCHIVE_DIR} -type f -name "hc.*.log" 2>/dev/null | while read -r _ARCHIVE_FILE
|
||||
do
|
||||
print "${_ARCHIVE_FILE}:"
|
||||
awk -F"${LOG_SEP}" '{
|
||||
@ -2251,7 +1988,7 @@ do
|
||||
}
|
||||
}
|
||||
}
|
||||
' "${_ARCHIVE_FILE}" 2>/dev/null
|
||||
' ${_ARCHIVE_FILE} 2>/dev/null
|
||||
done
|
||||
|
||||
return 0
|
||||
@ -2292,7 +2029,7 @@ then
|
||||
then
|
||||
print - "$*" | while read -r LOG_LINE
|
||||
do
|
||||
print "${NOW}: WARN: [$$]:" "${LOG_LINE}" >>"${LOG_FILE}"
|
||||
print "${NOW}: WARN: [$$]:" "${LOG_LINE}" >>${LOG_FILE}
|
||||
done
|
||||
fi
|
||||
if (( ARG_VERBOSE > 0 ))
|
||||
|
@ -30,7 +30,7 @@
|
||||
# RETURNS: 0
|
||||
function version_include_data
|
||||
{
|
||||
typeset _VERSION="2020-12-27" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-04-20" # YYYY-MM-DD
|
||||
|
||||
print "INFO: $0: ${_VERSION#version_*}"
|
||||
|
||||
@ -51,7 +51,7 @@ typeset _PARAMETER="${1}"
|
||||
typeset _LVALUE=""
|
||||
typeset _RC=0
|
||||
|
||||
_LVALUE=$(grep -i "^${_PARAMETER} *=" "${_CONFIG_FILE}" | cut -f2- -d'=')
|
||||
_LVALUE=$(grep -i "^${_PARAMETER} *=" ${_CONFIG_FILE} | cut -f2- -d'=')
|
||||
|
||||
if [[ -n "${_LVALUE}" ]]
|
||||
then
|
||||
@ -196,26 +196,6 @@ done
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_has_newline()
|
||||
# DOES: checks if a string contains newlines
|
||||
# EXPECTS: $1=haystack [string]
|
||||
# OUTPUTS: n/a
|
||||
# RETURNS: 0=no newline found; 1=newlines found
|
||||
# REQUIRES: n/a
|
||||
function data_has_newline
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
|
||||
typeset _HAYSTACK="${1}"
|
||||
|
||||
typeset _COUNT=$(print -R "${_HAYSTACK}" | wc -l 2>/dev/null)
|
||||
|
||||
(( _COUNT > 1 )) && return 1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_magic_quote()
|
||||
# DOES: magically quotes a needle in a string (default needle is: %)
|
||||
@ -237,7 +217,7 @@ return 0
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_magic_unquote()
|
||||
# DOES: magically unquotes a needle in a string (default needle is: %)
|
||||
# EXPECTS: $1=to be magically unquoted [string]; $2=needle [string]
|
||||
# EXPECTS: to be magically unquoted [string]; $2=needle [string]
|
||||
# OUTPUTS: magically unquoted [string]
|
||||
# RETURNS: n/a
|
||||
# REQUIRES: n/a
|
||||
@ -613,68 +593,6 @@ esac
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_expand_numerical_range()
|
||||
# DOES: expand numerical range (X-Y) to comma-separated list of numbers
|
||||
# EXPECTS: $1=numerical range [string]
|
||||
# $2=flag for leading zeroes <10 [0=do not add (default),1=add]
|
||||
# OUTPUTS: [string]
|
||||
# RETURNS: 0=no error occurred; <>0=some error occurred
|
||||
# REQUIRES: n/a
|
||||
function data_expand_numerical_range
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _NUM_LIST=""
|
||||
typeset _HAS_LEAD_ZERO=${2:-0}
|
||||
|
||||
case "${1}" in
|
||||
*-*)
|
||||
# range operator, expand
|
||||
# check if there are only 2 operands (fields)
|
||||
if (( $(print "${1}" | awk -F '-' '{ print NF }' 2>/dev/null) > 2 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "in range $1 found more than one range (-) operator"
|
||||
return 1
|
||||
fi
|
||||
# check if X < Y
|
||||
if print "${1}" | awk -F '-' '{ if ($1 < $2) { exit 1 }}' 2>/dev/null
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "in range $1 operator Y is smaller or equal to operator Y"
|
||||
return 1
|
||||
fi
|
||||
# expand list
|
||||
_NUM_LIST=$(print "${1}"| awk -F '-' -v has_lead_zero=${_HAS_LEAD_ZERO} '
|
||||
BEGIN { count = 0; }
|
||||
{
|
||||
while ($1 + count <= $2) {
|
||||
# add leading zero to sprintf when < 10
|
||||
if (has_lead_zero > 0 && $1 + count < 10) { lead_zero = "0" } else { lead_zero = "" }
|
||||
if (length (NUM_LIST) == 0) {
|
||||
NUM_LIST = sprintf ("%" lead_zero "2d", $1 + count);
|
||||
} else {
|
||||
NUM_LIST = sprintf ("%s,%" lead_zero "2d", NUM_LIST, $1 + count); }
|
||||
count++;
|
||||
}
|
||||
}
|
||||
# remove space from end result and print
|
||||
END { gsub(/[[:space:]]/, "", NUM_LIST); print NUM_LIST; }')
|
||||
if [[ -z "${_NUM_LIST}" ]]
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "range conversion returned empty list"
|
||||
return 1
|
||||
else
|
||||
print "${_NUM_LIST}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# no range, return as-is
|
||||
print "${1}"
|
||||
;;
|
||||
esac
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_encode_url
|
||||
# DOES: encode URL data
|
||||
@ -937,7 +855,6 @@ print ${_EPOCH}
|
||||
# REQUIRES: n/a
|
||||
function data_epoch2date
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _UNIX_EPOCH="${1}"
|
||||
typeset _CONVERT_DATE=""
|
||||
|
||||
@ -963,154 +880,6 @@ fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_timestring_to_days()
|
||||
# DOES: converts a specific timestring to days (rounded to integer)
|
||||
# EXPECTS: time definition with a single s|m|h|d suffix [string]
|
||||
# OUTPUTS: time value in hours [string]
|
||||
# RETURNS: 0=conversion OK; 1=conversion failed
|
||||
# REQUIRES: n/a
|
||||
function data_timestring_to_days
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _DAYS=""
|
||||
|
||||
case "${1}" in
|
||||
*s|*s)
|
||||
_DAYS=$(( $(print "${1%*[sS]}") / 60 / 60 / 24 ))
|
||||
;;
|
||||
*m|*M)
|
||||
_DAYS=$(( $(print "${1%*[mM]}") / 60 / 24 ))
|
||||
;;
|
||||
*h|*H)
|
||||
_DAYS=$(( $(print "${1%*[hH]}") / 24 ))
|
||||
;;
|
||||
*d|*D)
|
||||
_DAYS=$(print "${1%*[dD]}")
|
||||
;;
|
||||
*)
|
||||
# invalid suffix specified
|
||||
(( ARG_DEBUG > 0 )) && debug "invalid time suffix specified (only s[econds]|m[inutes]|h[ours]|d[days] allowed)"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print "${_DAYS}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_timestring_to_hours()
|
||||
# DOES: converts a specific timestring to hours (rounded to integer)
|
||||
# EXPECTS: time definition with a single s|m|h|d suffix [string]
|
||||
# OUTPUTS: time value in hours [string]
|
||||
# RETURNS: 0=conversion OK; 1=conversion failed
|
||||
# REQUIRES: n/a
|
||||
function data_timestring_to_hours
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _HOURS=""
|
||||
|
||||
case "${1}" in
|
||||
*s|*s)
|
||||
_HOURS=$(( $(print "${1%*[sS]}") / 60 / 24 ))
|
||||
;;
|
||||
*m|*M)
|
||||
_HOURS=$(( $(print "${1%*[mM]}") / 60 ))
|
||||
;;
|
||||
*h|*H)
|
||||
_HOURS=$(print "${1%*[hH]}")
|
||||
;;
|
||||
*d|*D)
|
||||
_HOURS=$(( $(print "${1%*[dD]}") * 24 ))
|
||||
;;
|
||||
*)
|
||||
# invalid suffix specified
|
||||
(( ARG_DEBUG > 0 )) && debug "invalid time suffix specified (only s[econds]|m[inutes]|h[ours]|d[days] allowed)"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print "${_HOURS}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_timestring_to_mins()
|
||||
# DOES: converts a specific timestring to minutes (rounded to integer)
|
||||
# EXPECTS: time definition with a single s|m|h|d suffix [string]
|
||||
# OUTPUTS: time value in minutes [string]
|
||||
# RETURNS: 0=conversion OK; 1=conversion failed
|
||||
# REQUIRES: n/a
|
||||
function data_timestring_to_mins
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _MINUTES=""
|
||||
|
||||
case "${1}" in
|
||||
*s|*s)
|
||||
_MINUTES=$(( $(print "${1%*[sS]}") / 60 ))
|
||||
;;
|
||||
*m|*M)
|
||||
_MINUTES=$(print "${1%*[mM]}")
|
||||
;;
|
||||
*h|*H)
|
||||
_MINUTES=$(( $(print "${1%*[hH]}") * 60 ))
|
||||
;;
|
||||
*d|*D)
|
||||
_MINUTES=$(( $(print "${1%*[dD]}") * 60 * 24 ))
|
||||
;;
|
||||
*)
|
||||
# invalid suffix specified
|
||||
(( ARG_DEBUG > 0 )) && debug "invalid time suffix specified (only s[econds]|m[inutes]|h[ours]|d[days] allowed)"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print "${_MINUTES}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: data_timestring_to_secs()
|
||||
# DOES: converts a specific timestring to seconds (rounded to integer)
|
||||
# EXPECTS: time definition with a single m|h|d suffix [string]
|
||||
# OUTPUTS: time value in seconds [string]
|
||||
# RETURNS: 0=conversion OK; 1=conversion failed
|
||||
# REQUIRES: n/a
|
||||
function data_timestring_to_secs
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _SECONDS=""
|
||||
|
||||
case "${1}" in
|
||||
*s|*s)
|
||||
_SECONDS=$(print "${1%*[sS]}")
|
||||
;;
|
||||
*m|*M)
|
||||
_SECONDS=$(( $(print "${1%*[Mm]}") * 60 ))
|
||||
;;
|
||||
*h|*H)
|
||||
_SECONDS=$(( $(print "${1%*[hH]}") * 60 * 60 ))
|
||||
;;
|
||||
*d|*D)
|
||||
_SECONDS=$(( $(print "${1%*[dD]}") * 60 * 60 * 24 ))
|
||||
;;
|
||||
*)
|
||||
# invalid suffix specified
|
||||
(( ARG_DEBUG > 0 )) && debug "invalid time suffix specified (only s[econds]|m[inutes]|h[ours]|d[days] allowed)"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print "${_SECONDS}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
||||
|
@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) include_exadata.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: include_exadata
|
||||
# DOES: helper functions for Exadata related functions
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: version_include_core()
|
||||
# DOES: dummy function for version placeholder
|
||||
# EXPECTS: n/a
|
||||
# RETURNS: 0
|
||||
function version_include_exadata
|
||||
{
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
|
||||
print "INFO: $0: ${_VERSION#version_*}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: exadata_exec_dcli()
|
||||
# DOES: execute a command via dcli
|
||||
# EXPECTS: 1=options [string], 2=user [string], 3=host(s) [string],
|
||||
# 4=SSH options [string], 5=command [string]
|
||||
# RETURNS: exit code of remote command
|
||||
# OUTPUTS: STDOUT from DCLI call
|
||||
# REQUIRES: dcli command-line utility
|
||||
function exadata_exec_dcli
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
typeset _DCLI_OPTS="${1}"
|
||||
typeset _DCLI_USER="${2}"
|
||||
typeset _DCLI_HOSTS="${3}"
|
||||
typeset _SSH_OPTS="${4}"
|
||||
typeset _DCLI_COMMAND="${5}"
|
||||
typeset _DCLI_BIN=""
|
||||
|
||||
if [[ -z "${_DCLI_USER}" || -z "${_DCLI_HOSTS}" || -z "${_DCLI_COMMAND}" ]]
|
||||
then
|
||||
return 255
|
||||
fi
|
||||
|
||||
# find dcli
|
||||
_DCLI_BIN="$(command -v dcli 2>>${HC_STDERR_LOG})"
|
||||
if [[ -z "${_DCLI_BIN}" || ! -x ${_DCLI_BIN} ]]
|
||||
then
|
||||
# don't spoil STDOUT
|
||||
ARG_VERBOSE=0 warn "could not determine location for {dcli} (or it is not installed here)"
|
||||
return 255
|
||||
fi
|
||||
|
||||
# execute dcli
|
||||
if [[ -z "${_SSH_OPTS}" ]]
|
||||
then
|
||||
${_DCLI_BIN} ${_DCLI_OPTS} -l ${_DCLI_USER} -c "${_DCLI_HOSTS}" "${_DCLI_COMMAND}" 2>>${HC_STDERR_LOG} </dev/null
|
||||
else
|
||||
${_DCLI_BIN} ${_DCLI_OPTS} -l ${_DCLI_USER} -c "${_DCLI_HOSTS}" -s ${_SSH_OPTS} "${_DCLI_COMMAND}" 2>>${HC_STDERR_LOG} </dev/null
|
||||
fi
|
||||
|
||||
return $?
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -16,7 +16,7 @@
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: include_os
|
||||
# @(#) MAIN: include_OS
|
||||
# DOES: helper functions for OS related functions
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
@ -30,107 +30,13 @@
|
||||
# RETURNS: 0
|
||||
function version_include_os
|
||||
{
|
||||
typeset _VERSION="2019-07-14" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-03-16" # YYYY-MM-DD
|
||||
|
||||
print "INFO: $0: ${_VERSION#version_*}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_change_service()
|
||||
# DOES: restarts a specific service
|
||||
# EXPECTS: $1=name of service [string]; $2=action [stop/start/restart]
|
||||
# OUTPUTS: n/a
|
||||
# RETURNS: 0=success; 1=error
|
||||
# REQUIRES: linux_get_init(), linux_has_systemd_service()
|
||||
function linux_change_service
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _SERVICE="${1}"
|
||||
typeset _ACTION="${2}"
|
||||
|
||||
# linux only
|
||||
check_platform 'Linux' || {
|
||||
(( ARG_DEBUG > 0 )) && debug "may only run on platform(s): Linux"
|
||||
return 1
|
||||
}
|
||||
|
||||
# check action
|
||||
case "${_ACTION}" in
|
||||
start|START|Start)
|
||||
(( ARG_DEBUG > 0 )) && debug "requesting service start"
|
||||
;;
|
||||
stop|STOP|Stop)
|
||||
(( ARG_DEBUG > 0 )) && debug "requesting service stop"
|
||||
;;
|
||||
restart|RESTART|Restart)
|
||||
(( ARG_DEBUG > 0 )) && debug "requesting service restart"
|
||||
;;
|
||||
*)
|
||||
(( ARG_DEBUG > 0 )) && debug "requesting unknown service action"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
[[ -n "${LINUX_INIT}" ]] || linux_get_init
|
||||
case "${LINUX_INIT}" in
|
||||
'systemd')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a systemd managed host"
|
||||
_CHECK_SYSTEMD_SERVICE=$(linux_has_systemd_service "${_SERVICE}")
|
||||
if (( _CHECK_SYSTEMD_SERVICE > 0 ))
|
||||
then
|
||||
systemctl --quiet ${_ACTION} ${_SERVICE} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
return $?
|
||||
else
|
||||
warn "systemd unit file not found {${_SERVICE}}"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
'upstart')
|
||||
(( ARG_DEBUG > 0 )) && debug "is an upstart managed host"
|
||||
warn "code for upstart managed systems not implemented, NOOP"
|
||||
return 1
|
||||
;;
|
||||
'sysv')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a sysv managed host"
|
||||
service ${_SERVICE} ${_ACTION} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
return $?
|
||||
;;
|
||||
*)
|
||||
(( ARG_DEBUG > 0 )) && debug "unknown init system for this host?"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_exec_ssh()
|
||||
# DOES: execute a shell command remotely via SSH
|
||||
# EXPECTS: 1=options [string], 2=user [string], 3=host [string], 4=command [string]
|
||||
# RETURNS: exit code of remote command
|
||||
# OUTPUTS: STDOUT from SSH call
|
||||
# REQUIRES: ssh command-line utility
|
||||
function linux_exec_ssh
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
typeset _SSH_OPTS="${1}"
|
||||
typeset _SSH_USER="${2}"
|
||||
typeset _SSH_HOST="${3}"
|
||||
typeset _SSH_COMMAND="${4}"
|
||||
|
||||
if [[ -z "${_SSH_USER}" || -z "${_SSH_HOST}" || -z "${_SSH_COMMAND}" ]]
|
||||
then
|
||||
return 255
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
ssh ${_SSH_OPTS} -l ${_SSH_USER} ${_SSH_HOST} ${_SSH_COMMAND} 2>>${HC_STDERR_LOG} </dev/null
|
||||
|
||||
return $?
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_get_distro()
|
||||
# DOES: get Linux distribution name & version, sets $LINUX_DISTRO & $LINUX_RELEASE
|
||||
@ -212,75 +118,12 @@ elif [[ -r /usr/share/upstart ]]
|
||||
then
|
||||
# shellcheck disable=SC2034
|
||||
LINUX_INIT="upstart"
|
||||
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_has_service()
|
||||
# DOES: check if a specific service is present and/or enabled
|
||||
# EXPECTS: name of service [string]
|
||||
# OUTPUTS: 0=not present; 1=present (not enabled); 2=present (enabled)
|
||||
# RETURNS: 0=success; 1=error
|
||||
# REQUIRES: linux_get_init(), linux_has_systemd_service()
|
||||
function linux_has_service
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _SERVICE="${1}"
|
||||
typeset _HAS_SERVICE=0
|
||||
|
||||
# linux only
|
||||
check_platform 'Linux' || {
|
||||
warn "may only run on platform(s): Linux"
|
||||
return 1
|
||||
}
|
||||
|
||||
[[ -n "${LINUX_INIT}" ]] || linux_get_init
|
||||
case "${LINUX_INIT}" in
|
||||
'systemd')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a systemd managed host"
|
||||
_CHECK_SYSTEMD_SERVICE=$(linux_has_systemd_service "${_SERVICE}")
|
||||
if (( _CHECK_SYSTEMD_SERVICE > 0 ))
|
||||
then
|
||||
systemctl --quiet is-enabled ${_SERVICE} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_HAS_SERVICE=1
|
||||
else
|
||||
_HAS_SERVICE=2
|
||||
fi
|
||||
else
|
||||
warn "systemd unit file not found {${_SERVICE}}"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
'upstart')
|
||||
(( ARG_DEBUG > 0 )) && debug "is an upstart managed host"
|
||||
warn "code for upstart managed systems not implemented, NOOP"
|
||||
return 1
|
||||
;;
|
||||
'sysv')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a sysv managed host"
|
||||
chkconfig ${_SERVICE} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_HAS_SERVICE=2
|
||||
else
|
||||
_HAS_SERVICE=0
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
(( ARG_DEBUG > 0 )) && debug "unknown init system for this host?"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print ${_HAS_SERVICE}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_has_crm()
|
||||
# DOES: check if Corosync (CRM version) is running
|
||||
@ -406,70 +249,30 @@ return ${_RC}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) FUNCTION: linux_runs_service()
|
||||
# DOES: check if a specific service is running (active)
|
||||
# EXPECTS: name of service [string]
|
||||
# OUTPUTS: 0=not running/not active; 1=running/active
|
||||
# RETURNS: 0=success; 1=error
|
||||
# REQUIRES: linux_get_init(), linux_has_systemd_service()
|
||||
function linux_runs_service
|
||||
# @(#) FUNCTION: linux_exec_ssh()
|
||||
# DOES: execute a shell command remotely via SSH
|
||||
# EXPECTS: 1=options [string], 2=user [string], 3=host [string], 4=command [string]
|
||||
# RETURNS: exit code of remote command
|
||||
# OUTPUTS: STDOUT from SSH call
|
||||
# REQUIRES: ssh command-line utility
|
||||
function linux_exec_ssh
|
||||
{
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
typeset _SERVICE="${1}"
|
||||
typeset _RUNS_SERVICE=0
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
typeset _SSH_OPTS="${1}"
|
||||
typeset _SSH_USER="${2}"
|
||||
typeset _SSH_HOST="${3}"
|
||||
typeset _SSH_COMMAND="${4}"
|
||||
|
||||
# linux only
|
||||
check_platform 'Linux' || {
|
||||
(( ARG_DEBUG > 0 )) && debug "may only run on platform(s): Linux"
|
||||
return 1
|
||||
if [[ -z "${_SSH_USER}" || -z "${_SSH_HOST}" || -z "${_SSH_COMMAND}" ]]
|
||||
then
|
||||
return 255
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
ssh ${_SSH_OPTS} -l ${_SSH_USER} ${_SSH_HOST} ${_SSH_COMMAND} 2>>${HC_STDERR_LOG} </dev/null
|
||||
|
||||
return $?
|
||||
}
|
||||
|
||||
[[ -n "${LINUX_INIT}" ]] || linux_get_init
|
||||
case "${LINUX_INIT}" in
|
||||
'systemd')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a systemd managed host"
|
||||
_CHECK_SYSTEMD_SERVICE=$(linux_has_systemd_service "${_SERVICE}")
|
||||
if (( _CHECK_SYSTEMD_SERVICE > 0 ))
|
||||
then
|
||||
systemctl --quiet is-active ${_SERVICE} >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_RUNS_SERVICE=0
|
||||
else
|
||||
_RUNS_SERVICE=1
|
||||
fi
|
||||
else
|
||||
warn "systemd unit file not found {${_SERVICE}}"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
'upstart')
|
||||
(( ARG_DEBUG > 0 )) && debug "is an upstart managed host"
|
||||
warn "code for upstart managed systems not implemented, NOOP"
|
||||
return 1
|
||||
;;
|
||||
'sysv')
|
||||
(( ARG_DEBUG > 0 )) && debug "is a sysv managed host"
|
||||
service ${_SERVICE} status >>${HC_STDOUT_LOG} 2>>${HC_STDERR_LOG}
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_RUNS_SERVICE=0
|
||||
else
|
||||
_RUNS_SERVICE=1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
(( ARG_DEBUG > 0 )) && debug "unknown init system for this host?"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
print ${_RUNS_SERVICE}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
||||
|
@ -30,7 +30,7 @@
|
||||
function notify_mail
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _VERSION="2022-02-16" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-04-20" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -58,7 +58,6 @@ typeset _MAIL_ATTACH_BIT=""
|
||||
typeset _MAIL_METHOD=""
|
||||
typeset _MAIL_RC=0
|
||||
typeset _MAILX_BIN=""
|
||||
typeset _MAILX_ATTACH_SWITCH=""
|
||||
typeset _MUTT_BIN=""
|
||||
typeset _SENDMAIL_BIN=""
|
||||
typeset _UUENCODE_BIN=""
|
||||
@ -91,17 +90,6 @@ case "${OS_NAME}" in
|
||||
if [[ -x ${_MAILX_BIN} ]] && [[ -n "${_MAILX_BIN}" ]]
|
||||
then
|
||||
_MAIL_METHOD="mailx"
|
||||
# check which switch we need for attaching a file:
|
||||
# Debian/Ubuntu: -A file
|
||||
# Redhat (Heirloom version): -a file
|
||||
${_MAILX_BIN} -h 2>&1 | grep -q -e "-a FILE" >/dev/null 2>/dev/null
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_MAILX_ATTACH_SWITCH="-a "
|
||||
else
|
||||
_MAILX_ATTACH_SWITCH="-A "
|
||||
fi
|
||||
else
|
||||
_MAIL_METHOD="sendmail"
|
||||
fi
|
||||
@ -134,18 +122,18 @@ then
|
||||
[[ -r "${_MAIL_INFO_TPL}" ]] || die "cannot read mail info template at ${_MAIL_INFO_TPL}"
|
||||
eval "cat << __EOT
|
||||
$(sed 's/[\$`]/\\&/g;s/<## @\([^ ]*\) ##>/${\1}/g' <${_MAIL_INFO_TPL})
|
||||
__EOT" >"${_TMP1_MAIL_FILE}"
|
||||
__EOT" >${_TMP1_MAIL_FILE}
|
||||
fi
|
||||
|
||||
# create header part
|
||||
[[ -r "${_MAIL_HEADER_TPL}" ]] || die "cannot read mail header template at ${_MAIL_HEADER_TPL}"
|
||||
eval "cat << __EOT
|
||||
$(sed 's/[\$`]/\\&/g;s/<## @\([^ ]*\) ##>/${\1}/g' <${_MAIL_HEADER_TPL})
|
||||
__EOT" >>"${_TMP1_MAIL_FILE}"
|
||||
print "" >>"${_TMP1_MAIL_FILE}"
|
||||
__EOT" >>${_TMP1_MAIL_FILE}
|
||||
print "" >>${_TMP1_MAIL_FILE}
|
||||
|
||||
# create body part (from $HC_MSG_VAR)
|
||||
print "${HC_MSG_VAR}" | while IFS=${MSG_SEP} read -r _MAIL_MSG_STC _ _MAIL_MSG_TEXT _MAIL_MSG_CUR_VAL _MAIL_MSG_EXP_VAL
|
||||
print "${HC_MSG_VAR}" | while IFS=${MSG_SEP} read _MAIL_MSG_STC _ _MAIL_MSG_TEXT _MAIL_MSG_CUR_VAL _MAIL_MSG_EXP_VAL
|
||||
do
|
||||
# magically unquote if needed
|
||||
if [[ -n "${_MAIL_MSG_TEXT}" ]]
|
||||
@ -187,8 +175,8 @@ done
|
||||
[[ -r "${_MAIL_BODY_TPL}" ]] || die "cannot read mail body template at ${_MAIL_BODY_TPL}"
|
||||
eval "cat << __EOT
|
||||
$(sed 's/[\$`]/\\&/g;s/<## @\([^ ]*\) ##>/${\1}/g' <${_MAIL_BODY_TPL})
|
||||
__EOT" >>"${_TMP1_MAIL_FILE}"
|
||||
print "" >>"${_TMP1_MAIL_FILE}"
|
||||
__EOT" >>${_TMP1_MAIL_FILE}
|
||||
print "" >>${_TMP1_MAIL_FILE}
|
||||
|
||||
# HC STDOUT log? (drop the .$$ bit)
|
||||
_MAIL_STDOUT_LOG="${EVENTS_DIR}/${DIR_PREFIX}/${_MAIL_FAIL_ID}/${_HC_STDOUT_LOG_SHORT%.*}"
|
||||
@ -196,14 +184,7 @@ if [[ -s "${_MAIL_STDOUT_LOG}" ]]
|
||||
then
|
||||
# shellcheck disable=SC2034
|
||||
_MAIL_STDOUT_MSG="${_MAIL_STDOUT_LOG}"
|
||||
case "${_MAIL_METHOD}" in
|
||||
"mailx")
|
||||
_MAIL_ATTACH_BIT="${_MAILX_ATTACH_SWITCH} ${_MAIL_STDOUT_LOG}"
|
||||
;;
|
||||
*)
|
||||
_MAIL_ATTACH_BIT="-a ${_MAIL_STDOUT_LOG}"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
# shellcheck disable=SC2034
|
||||
_MAIL_STDOUT_MSG="no log file available"
|
||||
@ -214,14 +195,7 @@ if [[ -s "${_MAIL_STDERR_LOG}" ]]
|
||||
then
|
||||
# shellcheck disable=SC2034
|
||||
_MAIL_STDERR_MSG="${_MAIL_STDERR_LOG}"
|
||||
case "${_MAIL_METHOD}" in
|
||||
"mailx")
|
||||
_MAIL_ATTACH_BIT="${_MAIL_ATTACH_BIT} ${_MAILX_ATTACH_SWITCH} ${_MAIL_STDERR_LOG}"
|
||||
;;
|
||||
*)
|
||||
_MAIL_ATTACH_BIT="${_MAIL_ATTACH_BIT} -a ${_MAIL_STDERR_LOG}"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
# shellcheck disable=SC2034
|
||||
_MAIL_STDERR_MSG="no log file available"
|
||||
@ -231,29 +205,28 @@ fi
|
||||
[[ -r ${_MAIL_FOOTER_TPL} ]] || die "cannot read mail body template at ${_MAIL_FOOTER_TPL}"
|
||||
eval "cat << __EOT
|
||||
$(sed 's/[\$`]/\\&/g;s/<## @\([^ ]*\) ##>/${\1}/g' <${_MAIL_FOOTER_TPL})
|
||||
__EOT" >>"${_TMP1_MAIL_FILE}"
|
||||
__EOT" >>${_TMP1_MAIL_FILE}
|
||||
|
||||
# combine and send message components
|
||||
case "${_MAIL_METHOD}" in
|
||||
"mailx")
|
||||
# remove non-ASCII characters to avoid Exchange ATT00001.bin
|
||||
# shellcheck disable=SC2086
|
||||
tr -cd '[:print:]\n' < "${_TMP1_MAIL_FILE}" 2>/dev/null |\
|
||||
cat ${_TMP1_MAIL_FILE} | tr -cd '[:print:]\n' 2>/dev/null |\
|
||||
${_MAILX_BIN} ${_MAIL_ATTACH_BIT} -s "${_SUBJ_MSG}" "${ARG_MAIL_TO}"
|
||||
_MAIL_RC=$?
|
||||
;;
|
||||
"mutt")
|
||||
# attach bit goes at the end
|
||||
# shellcheck disable=SC2086
|
||||
${_MUTT_BIN} -s "${_SUBJ_MSG}" "${ARG_MAIL_TO}" ${_MAIL_ATTACH_BIT} <"${_TMP1_MAIL_FILE}"
|
||||
cat ${_TMP1_MAIL_FILE} 2>/dev/null |\
|
||||
${_MUTT_BIN} -s "${_SUBJ_MSG}" "${ARG_MAIL_TO}" ${_MAIL_ATTACH_BIT}
|
||||
_MAIL_RC=$?
|
||||
;;
|
||||
"sendmail")
|
||||
[[ -s "${_MAIL_STDOUT_LOG}" ]] && \
|
||||
uuencode "${_MAIL_STDOUT_LOG}" stdout.log >>"${_TMP2_MAIL_FILE}" 2>/dev/null
|
||||
uuencode ${_MAIL_STDOUT_LOG} stdout.log >>${_TMP2_MAIL_FILE} 2>/dev/null
|
||||
[[ -s "${_MAIL_STDERR_LOG}" ]] && \
|
||||
uuencode "${_MAIL_STDERR_LOG}" stderr.log >>"${_TMP2_MAIL_FILE}" 2>/dev/null
|
||||
cat "${_TMP1_MAIL_FILE}" "${_TMP2_MAIL_FILE}" 2>/dev/null | ${_SENDMAIL_BIN} -t
|
||||
uuencode ${_MAIL_STDERR_LOG} stderr.log >>${_TMP2_MAIL_FILE} 2>/dev/null
|
||||
cat ${_TMP1_MAIL_FILE} ${_TMP2_MAIL_FILE} 2>/dev/null | ${_SENDMAIL_BIN} -t
|
||||
_MAIL_RC=$?
|
||||
;;
|
||||
*)
|
||||
@ -269,8 +242,8 @@ else
|
||||
fi
|
||||
|
||||
# clean up temporary files
|
||||
[[ -f ${_TMP1_MAIL_FILE} ]] && rm -f "${_TMP1_MAIL_FILE}" >/dev/null 2>&1
|
||||
[[ -f ${_TMP2_MAIL_FILE} ]] && rm -f "${_TMP2_MAIL_FILE}" >/dev/null 2>&1
|
||||
[[ -f ${_TMP1_MAIL_FILE} ]] && rm -f ${_TMP1_MAIL_FILE} >/dev/null 2>&1
|
||||
[[ -f ${_TMP2_MAIL_FILE} ]] && rm -f ${_TMP2_MAIL_FILE} >/dev/null 2>&1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
@ -1,135 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) notify_slack.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2022 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: notify_slack
|
||||
# DOES: send message to slack app
|
||||
# EXPECTS: 1=HC name [string], 2=HC FAIL_ID [string]
|
||||
# RETURNS: 0
|
||||
# REQUIRES: data_contains_string(), data_get_lvalue_from_config(), data_magic_unquote(),
|
||||
# init_hc(), log(), warn(), curl
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function notify_slack
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/core/providers/$0.conf"
|
||||
typeset _VERSION="2022-10-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
|
||||
typeset _SLACK_HC="$1"
|
||||
typeset _SLACK_FAIL_ID="$2"
|
||||
|
||||
typeset _SLACK_TEXT=""
|
||||
typeset _SLACK_MSG_STC=""
|
||||
typeset _SLACK_MSG_TEXT=""
|
||||
typeset _SLACK_MSG_CUR_VAL=""
|
||||
typeset _SLACK_MSG_EXP_VAL=""
|
||||
typeset _CURL_BIN=""
|
||||
typeset _SLACK_WEBHOOK=""
|
||||
|
||||
# handle config file
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read required config values
|
||||
_SLACK_WEBHOOK=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'SLACK_WEBHOOK')
|
||||
if [[ -z "${_SLACK_WEBHOOK}" ]]
|
||||
then
|
||||
warn "no value set for 'SLACK_WEBHOOK' in ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# create header part
|
||||
_SLACK_TEXT="${EXEC_USER}@${HOST_NAME}: HC ${_SLACK_HC} failed, FAIL_ID=${_SLACK_FAIL_ID}"
|
||||
|
||||
# create body part (from $HC_MSG_VAR)
|
||||
print "${HC_MSG_VAR}" | while IFS=${MSG_SEP} read -r _SLACK_MSG_STC _ _SLACK_MSG_TEXT _SLACK_MSG_CUR_VAL _SLACK_MSG_EXP_VAL
|
||||
do
|
||||
# magically unquote if needed
|
||||
if [[ -n "${_SLACK_MSG_TEXT}" ]]
|
||||
then
|
||||
data_contains_string "${_SLACK_MSG_TEXT}" "${MAGIC_QUOTE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_SLACK_MSG_TEXT=$(data_magic_unquote "${_SLACK_MSG_TEXT}")
|
||||
fi
|
||||
fi
|
||||
if [[ -n "${_SLACK_MSG_CUR_VAL}" ]]
|
||||
then
|
||||
data_contains_string "${_SLACK_MSG_CUR_VAL}" "${MAGIC_QUOTE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_SLACK_MSG_CUR_VAL=$(data_magic_unquote "${_SLACK_MSG_CUR_VAL}")
|
||||
fi
|
||||
fi
|
||||
if [[ -n "${_SLACK_MSG_EXP_VAL}" ]]
|
||||
then
|
||||
data_contains_string "${_SLACK_MSG_EXP_VAL}" "${MAGIC_QUOTE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_SLACK_MSG_EXP_VAL=$(data_magic_unquote "${_SLACK_MSG_EXP_VAL}")
|
||||
fi
|
||||
fi
|
||||
if (( _SLACK_MSG_STC > 0 ))
|
||||
then
|
||||
# shellcheck disable=SC1117
|
||||
_SLACK_BODY=$(printf "%s\n%s\n" "${_SLACK_BODY}" "${_SLACK_MSG_TEXT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# send message
|
||||
# find 'curl'
|
||||
_CURL_BIN="$(command -v curl 2>/dev/null)"
|
||||
if [[ -x ${_CURL_BIN} ]] && [[ -n "${_CURL_BIN}" ]]
|
||||
then
|
||||
if (( ARG_DEBUG == 0 ))
|
||||
then
|
||||
${_CURL_BIN} --silent --data-urlencode \
|
||||
"$(printf 'payload={"text": "%s\n\n%s" }' "${_SLACK_TEXT}" "${_SLACK_BODY}")" \
|
||||
"${_SLACK_WEBHOOK}" >/dev/null 2>&1
|
||||
else
|
||||
${_CURL_BIN} --data-urlencode \
|
||||
"$(printf 'payload={"text": "%s\n\n%s" }' "${_SLACK_TEXT}" "${_SLACK_BODY}")" \
|
||||
"${_SLACK_WEBHOOK}"
|
||||
fi
|
||||
else
|
||||
die "unable to send message to Slack - curl is not installed here"
|
||||
fi
|
||||
|
||||
log "Slack alert sent: ${_SLACK_HC} failed, FAIL_ID=${_SLACK_FAIL_ID}"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -20,8 +20,7 @@
|
||||
# DOES: send sms alert
|
||||
# EXPECTS: 1=HC name [string], 2=HC FAIL_ID [string]
|
||||
# RETURNS: 0
|
||||
# REQUIRES: data_encode_url(), data_get_lvalue_from_config(), init_hc(),
|
||||
# log(), warn(), curl
|
||||
# REQUIRES: data_get_lvalue_from_config(), init_hc(), log(), warn()
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
@ -32,7 +31,7 @@ function notify_sms
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/core/providers/$0.conf"
|
||||
typeset _VERSION="2022-10-14" # YYYY-MM-DD
|
||||
typeset _VERSION="2018-10-28" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
function report_std
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _VERSION="2020-04-08" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-03-16" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX,HP-UX,Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -59,14 +59,8 @@ typeset _OLDER_MONTH=""
|
||||
typeset _OLDER_YEAR=""
|
||||
typeset _NEWER_MONTH=""
|
||||
typeset _NEWER_YEAR=""
|
||||
typeset _HC_REPORT_CACHE_LAST_STUB="${STATE_PERM_DIR}/cache.report-last"
|
||||
typeset _HC_REPORT_CACHE_LAST_FILE=""
|
||||
typeset _HC_REPORT_CACHE_TODAY_FILE="${STATE_PERM_DIR}/cache.report-today"
|
||||
typeset _USE_CACHE=0
|
||||
typeset _USE_ANY_CACHE=0
|
||||
typeset _CACHE_NOTE_BIT=""
|
||||
|
||||
# set archive log stash (never use cache files)
|
||||
# set archive log stash
|
||||
if (( ARG_HISTORY > 0 )) || [[ -n "${ARG_OLDER}" ]] || [[ -n "${ARG_NEWER}" ]]
|
||||
then
|
||||
set +f # file globbing must be on
|
||||
@ -154,46 +148,10 @@ then
|
||||
_HC_LAST_TIME=""
|
||||
_HC_LAST_STC=0
|
||||
_HC_LAST_FAIL_ID="-"
|
||||
_USE_CACHE=0
|
||||
|
||||
# check for cache usage
|
||||
if (( ARG_HISTORY == 0 ))
|
||||
then
|
||||
case "${HC_REPORT_CACHE_LAST}" in
|
||||
Yes|yes|YES)
|
||||
_HC_REPORT_CACHE_LAST_FILE="${_HC_REPORT_CACHE_LAST_STUB}-${_HC_LAST}"
|
||||
# check if cache file exists
|
||||
if [[ -s "${_HC_REPORT_CACHE_LAST_FILE}" ]]
|
||||
then
|
||||
_LOG_STASH="${_HC_REPORT_CACHE_LAST_FILE}"
|
||||
_USE_CACHE=1
|
||||
_USE_ANY_CACHE=1
|
||||
(( ARG_DEBUG > 0 )) && debug "setting log stash to cache file at ${_HC_REPORT_CACHE_LAST_FILE}"
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "HC_REPORT_CACHE_LAST is enabled but unable to find cache file at ${_HC_REPORT_CACHE_LAST_FILE}"
|
||||
# reset log stash to current log
|
||||
_LOG_STASH="${HC_LOG} ${_LOG_STASH}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# no caching: reset log stash to current log
|
||||
(( ARG_DEBUG > 0 )) && debug "HC_REPORT_CACHE_LAST is disabled"
|
||||
_LOG_STASH="${HC_LOG} ${_LOG_STASH}"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# determine LAST_TIME from cache or log(s)
|
||||
if (( ARG_HISTORY == 0 )) && (( _USE_CACHE > 1 ))
|
||||
then
|
||||
_HC_LAST_TIME="$(tail -n 1 ${_LOG_STASH} 2>/dev/null | cut -f1 -d${LOG_SEP} 2>/dev/null)"
|
||||
else
|
||||
# find last event or block of events (same timestamp)
|
||||
# (but unfortunately this is only accurate to events within the SAME second!)
|
||||
# shellcheck disable=SC2086
|
||||
_HC_LAST_TIME="$(grep -h ${_HC_LAST} ${_LOG_STASH} 2>/dev/null | sort -n 2>/dev/null | cut -f1 -d${LOG_SEP} 2>/dev/null | uniq 2>/dev/null | tail -1 2>/dev/null)"
|
||||
fi
|
||||
|
||||
if [[ -z "${_HC_LAST_TIME}" ]]
|
||||
then
|
||||
_HC_LAST_TIME="-"
|
||||
@ -223,26 +181,12 @@ then
|
||||
fi
|
||||
# report on findings
|
||||
# shellcheck disable=SC1117
|
||||
if (( _USE_CACHE > 0 ))
|
||||
then
|
||||
printf "| %-40s | %-20s | %-14s | %-4s (C)\n" \
|
||||
"${_HC_LAST}" "${_HC_LAST_TIME}" "${_HC_LAST_FAIL_ID}" "${_HC_LAST_STC}"
|
||||
else
|
||||
printf "| %-40s | %-20s | %-14s | %-4s\n" \
|
||||
"${_HC_LAST}" "${_HC_LAST_TIME}" "${_HC_LAST_FAIL_ID}" "${_HC_LAST_STC}"
|
||||
fi
|
||||
done
|
||||
# spacer
|
||||
print
|
||||
# disclaimer & note(s)
|
||||
if (( _USE_ANY_CACHE > 0 ))
|
||||
then
|
||||
print "NOTE: entries suffixed by (C) indicate results were retrieved from a cache file. If you wish to use "
|
||||
print " the real log files then disable HC_REPORT_CACHE_LAST in ${CONFIG_FILE}"
|
||||
fi
|
||||
(( ARG_HISTORY == 0 )) && _CACHE_NOTE_BIT="for non-cached entries: "
|
||||
print "NOTE: ${_CACHE_NOTE_BIT}this report only shows the overall combined status of all events of each HC"
|
||||
print " within exactly the *same* time stamp (seconds precise). It may therefore fail to report certain FAIL IDs."
|
||||
# disclaimer
|
||||
print "NOTE: this report only shows the overall combined status of all events of each HC within exactly"
|
||||
print " the *same* time stamp (seconds precise). It may therefore fail to report certain FAIL IDs."
|
||||
print " Use '--report' to get the exact list of failure events."
|
||||
# other reports
|
||||
else
|
||||
@ -254,31 +198,7 @@ else
|
||||
(( _IS_VALID_ID > 0 )) || die "invalid ID specified"
|
||||
_ID_NEEDLE="${ARG_FAIL_ID}"
|
||||
fi
|
||||
# set today's needle and check cache usage
|
||||
if (( ARG_TODAY > 0 ))
|
||||
then
|
||||
_ID_NEEDLE="$(date '+%Y%m%d')" # refers to timestamp of HC FAIL_ID
|
||||
# do not use a cache file when --with-history
|
||||
if (( ARG_HISTORY == 0 ))
|
||||
then
|
||||
case "${HC_REPORT_CACHE_TODAY}" in
|
||||
Yes|yes|YES)
|
||||
# check if cache file exists
|
||||
if [[ -s "${_HC_REPORT_CACHE_TODAY_FILE}" ]]
|
||||
then
|
||||
_LOG_STASH="${_HC_REPORT_CACHE_TODAY_FILE}"
|
||||
_USE_CACHE=1
|
||||
(( ARG_DEBUG > 0 )) && debug "setting log stash to today's cache file at ${_HC_REPORT_CACHE_TODAY_FILE}"
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "HC_REPORT_CACHE_TODAY is enabled but unable to find cache file at ${_HC_REPORT_CACHE_TODAY_FILE}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
(( ARG_DEBUG > 0 )) && debug "HC_REPORT_CACHE_TODAY is disabled"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
(( ARG_TODAY > 0 )) && _ID_NEEDLE="$(date '+%Y%m%d')" # refers to timestamp of HC FAIL_ID
|
||||
|
||||
# reverse?
|
||||
if (( ARG_REVERSE == 0 ))
|
||||
@ -342,9 +262,9 @@ else
|
||||
split (events[i], event, "|");
|
||||
printf ("\n| %-20s | %-14s | %-40s | %-s", event[1], event[5], event[2], event[4]);
|
||||
}
|
||||
printf ("\n\nSUMMARY: %s failed HC event(s) found.\n\n", event_count);
|
||||
printf ("\n\nSUMMARY: %s failed HC event(s) found.\n", event_count);
|
||||
} else {
|
||||
printf ("\nSUMMARY: 0 failed HC events found.\n\n");
|
||||
printf ("\nSUMMARY: 0 failed HC events found.\n");
|
||||
}
|
||||
}
|
||||
' 2>/dev/null
|
||||
@ -396,28 +316,17 @@ else
|
||||
# shellcheck disable=SC2183,SC1117
|
||||
printf "%80s\n" | tr ' ' -
|
||||
fi
|
||||
|
||||
# add notes
|
||||
# cache or not?
|
||||
if (( _USE_CACHE > 0 ))
|
||||
then
|
||||
print "NOTE: results were retrieved from a cache file. If you wish to use the real log files then"
|
||||
print " remove ${_HC_REPORT_CACHE_TODAY_FILE} and/or disable HC_REPORT_CACHE_TODAY in ${CONFIG_FILE}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# add general notes
|
||||
# history or not?
|
||||
# general note: history or not?
|
||||
if (( ARG_HISTORY > 0 ))
|
||||
then
|
||||
print "NOTE: showing results with all history (archive) included (--with-history)"
|
||||
else
|
||||
print "NOTE: showing only results of current entries (use --with-history to view all entries)"
|
||||
print "NOTE: showing results only of current log entries (use --with-history to view all entries)"
|
||||
fi
|
||||
|
||||
# check consistency of log(s)
|
||||
# note: should no longer be necessary as of release 19/05/2019 but we keep the
|
||||
# check alive for now
|
||||
# shellcheck disable=SC2086
|
||||
find ${_LOG_STASH} -type f -print 2>/dev/null | while read -r _CHECK_FILE
|
||||
do
|
||||
|
@ -29,8 +29,6 @@
|
||||
# @(#) 2019-01-24: arguments fix [Patrick Van der Veken]
|
||||
# @(#) 2019-03-09: added support for --log-healthy [Patrick Van der Veken]
|
||||
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
|
||||
# @(#) 2020-09-05: fix log_hc call for failed checksum + quote fix [Patrick Van der Veken]
|
||||
# @(#) 2021-01-13: fix log_hc call (wrong logic) [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
@ -40,7 +38,7 @@ function check_aix_file_change
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2021-01-13" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-03-16" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -277,7 +275,7 @@ do
|
||||
openssl-sha256)
|
||||
if (( _USE_OPENSSL == 1 ))
|
||||
then
|
||||
_FILE_CKSUM=$(${_OPENSSL_BIN} dgst -sha256 "${_FILE_TO_CHECK}" 2>>${HC_STDERR_LOG} | cut -f2 -d'=' | tr -d ' ')
|
||||
_FILE_CKSUM=$(${_OPENSSL_BIN} dgst -sha256 ${_FILE_TO_CHECK} 2>>${HC_STDERR_LOG} | cut -f2 -d'=' | tr -d ' ')
|
||||
_FILE_TYPE="openssl-sha256"
|
||||
else
|
||||
_MSG="cannot compute checksum [${_FILE_TYPE}] for ${_FILE_TO_CHECK}"
|
||||
@ -287,7 +285,7 @@ do
|
||||
cksum-crc32)
|
||||
if (( _USE_CKSUM == 1 ))
|
||||
then
|
||||
_FILE_CKSUM=$(${_CKSUM_BIN} "${_FILE_TO_CHECK}" 2>>${HC_STDERR_LOG} | cut -f1 -d' ')
|
||||
_FILE_CKSUM=$(${_CKSUM_BIN} ${_FILE_TO_CHECK} 2>>${HC_STDERR_LOG} | cut -f1 -d' ')
|
||||
_FILE_TYPE="cksum-crc32"
|
||||
else
|
||||
_MSG="cannot compute checksum [${_FILE_TYPE}] for ${_FILE_TO_CHECK}"
|
||||
@ -303,11 +301,11 @@ do
|
||||
# new file
|
||||
if (( _USE_OPENSSL == 1 ))
|
||||
then
|
||||
_FILE_CKSUM=$(${_OPENSSL_BIN} dgst -sha256 "${_FILE_TO_CHECK}" 2>>${HC_STDERR_LOG} | cut -f2 -d'=' | tr -d ' ')
|
||||
_FILE_CKSUM=$(${_OPENSSL_BIN} dgst -sha256 ${_FILE_TO_CHECK} 2>>${HC_STDERR_LOG} | cut -f2 -d'=' | tr -d ' ')
|
||||
_FILE_TYPE="openssl-sha256"
|
||||
elif (( _USE_CKSUM == 1 ))
|
||||
then
|
||||
_FILE_CKSUM=$(${_CKSUM_BIN} "${_FILE_TO_CHECK}" 2>>${HC_STDERR_LOG} | cut -f1 -d' ')
|
||||
_FILE_CKSUM=$(${_CKSUM_BIN} ${_FILE_TO_CHECK} 2>>${HC_STDERR_LOG} | cut -f1 -d' ')
|
||||
_FILE_TYPE="cksum-crc32"
|
||||
else
|
||||
_MSG="cannot compute checksum (openssl/cksum) for ${_FILE_TO_CHECK}"
|
||||
@ -323,7 +321,7 @@ do
|
||||
fi
|
||||
|
||||
# bounce failures back and jump to next file
|
||||
if (( _LOG_HEALTHY > 0 && _STC > 0 ))
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
continue
|
||||
@ -349,7 +347,7 @@ do
|
||||
printf "%s|%s|%s\n" "${_FILE_TO_CHECK}" "${_FILE_TYPE}" "${_FILE_CKSUM}" >>${_TMP2_FILE}
|
||||
|
||||
# report with curr/exp values
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_FILE_CKSUM}" "${_STATE_FILE_CKSUM}"
|
||||
continue
|
||||
|
@ -1,349 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#------------------------------------------------------------------------------
|
||||
# @(#) check_aix_uptime
|
||||
#------------------------------------------------------------------------------
|
||||
# @(#) Copyright (C) 2020 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_aix_uptime
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_is_numeric(), data_timestring_to_mins(), data_comma2space(),
|
||||
# init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2020-12-21: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_aix_uptime
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _STATE_FILE="${STATE_PERM_DIR}/current.uptime"
|
||||
typeset _VERSION="2020-12-21" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="AIX" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _CFG_CHECK_REBOOT=""
|
||||
typeset _CFG_REBOOT_TIME=""
|
||||
typeset _CFG_CHECK_OLD_AGE=""
|
||||
typeset _CFG_OLD_AGE_TIME=""
|
||||
typeset _CHECK_REBOOT=""
|
||||
typeset _REBOOT_TIME=""
|
||||
typeset _REBOOT_TIME_MINS=""
|
||||
typeset _CHECK_OLD_AGE=""
|
||||
typeset _OLD_AGE_TIME=""
|
||||
typeset _OLD_AGE_TIME_MINS=""
|
||||
typeset _CURRENT_UPTIME=""
|
||||
typeset _CURRENT_UPTIME_MINS=""
|
||||
typeset _INIT_TIME=""
|
||||
typeset _PREVIOUS_UPTIME=""
|
||||
typeset _PREVIOUS_UPTIME_MINS=""
|
||||
typeset _THRESHOLD_UPTIME_MINS=""
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage "$0" "${_VERSION}" "${_CONFIG_FILE}" && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle config file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read required config values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_CHECK_REBOOT=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_reboot')
|
||||
case "${_CFG_CHECK_REBOOT}" in
|
||||
no|No|NO)
|
||||
_CHECK_REBOOT=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_REBOOT=1
|
||||
;;
|
||||
esac
|
||||
_CFG_REBOOT_TIME=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'reboot_time')
|
||||
if [[ -z "${_CFG_REBOOT_TIME}" ]]
|
||||
then
|
||||
# default
|
||||
_REBOOT_TIME="60m"
|
||||
else
|
||||
_REBOOT_TIME="${_CFG_REBOOT_TIME}"
|
||||
fi
|
||||
_CFG_CHECK_OLD_AGE=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_old_age')
|
||||
case "${_CFG_CHECK_OLD_AGE}" in
|
||||
yes|Yes|Yes)
|
||||
_CHECK_OLD_AGE=1
|
||||
;;
|
||||
*)
|
||||
_CHECK_OLD_AGE=0
|
||||
;;
|
||||
esac
|
||||
_CFG_OLD_AGE_TIME=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'old_age_time')
|
||||
if [[ -z "${_CFG_OLD_AGE_TIME}" ]]
|
||||
then
|
||||
# default
|
||||
_OLD_AGE_TIME="365d"
|
||||
else
|
||||
_OLD_AGE_TIME="${_CFG_OLD_AGE_TIME}"
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# read uptime from init process
|
||||
# shellcheck disable=SC1007
|
||||
_INIT_TIME=$(UNIX95= ps -p 1 -o etime 2>/dev/null | tail -1)
|
||||
if [[ -n "${_INIT_TIME}" ]]
|
||||
then
|
||||
# calculate exact uptime (seconds)
|
||||
_CURRENT_UPTIME=$(print "${_INIT_TIME}" |\
|
||||
awk '
|
||||
BEGIN { days = 0; hours = 0; mins = 0; seconds = 0 };
|
||||
{
|
||||
gsub(/[ \t]/, "");
|
||||
# get days
|
||||
split ($0, day_str, "-");
|
||||
if (2 in day_str) {
|
||||
# string has days
|
||||
days = day_str[1];
|
||||
split (day_str[2], hour_str, ":");
|
||||
} else {
|
||||
split (day_str[1], hour_str, ":");
|
||||
}
|
||||
# get hours/minutes/seconds
|
||||
hours = hour_str[1];
|
||||
mins = hour_str[2];
|
||||
if (3 in hour_str) {
|
||||
# string has seconds
|
||||
secs = hour_str[3];
|
||||
}
|
||||
}
|
||||
END {
|
||||
time = (days * 24 * 60 * 60) + (hours * 60 * 60) + (mins * 60) + secs;
|
||||
print time;
|
||||
}' 2>/dev/null)
|
||||
data_is_numeric "${_CURRENT_UPTIME}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "unable to calculate uptime value (seconds)"
|
||||
(( ARG_DEBUG )) && debug "_CURRENT_UPTIME=${_CURRENT_UPTIME}"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
warn "uptime of INIT (1) process cannot be determined"
|
||||
return 1
|
||||
fi
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# read state file
|
||||
if [[ -r ${_STATE_FILE} ]]
|
||||
then
|
||||
_PREVIOUS_UPTIME=$(<"${_STATE_FILE}")
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "failed to read state file at ${_STATE_FILE}"
|
||||
_PREVIOUS_UPTIME=""
|
||||
fi
|
||||
fi
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# convert uptimes values
|
||||
_CURRENT_UPTIME_MINS=$(( _CURRENT_UPTIME / 60 ))
|
||||
data_is_numeric "${_CURRENT_UPTIME_MINS}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "unable to calculate current uptime value (minutes)"
|
||||
(( ARG_DEBUG )) && debug "_CURRENT_UPTIME_MINS=${_CURRENT_UPTIME_MINS}"
|
||||
return 1
|
||||
fi
|
||||
_PREVIOUS_UPTIME_MINS=$(( _PREVIOUS_UPTIME / 60 ))
|
||||
data_is_numeric "${_CURRENT_UPTIME_MINS}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "unable to calculate previous uptime value (minutes)"
|
||||
(( ARG_DEBUG )) && debug "_PREVIOUS_UPTIME_MINS=${_PREVIOUS_UPTIME_MINS}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# check reboot event
|
||||
if (( _CHECK_REBOOT > 0 ))
|
||||
then
|
||||
# convert _REBOOT_TIME to minutes
|
||||
_REBOOT_TIME_MINS=$(data_timestring_to_mins "${_REBOOT_TIME}")
|
||||
data_is_numeric "${_REBOOT_TIME_MINS}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "unable to calculate 'reboot_time' value from configuration file ${_CONFIG_FILE}"
|
||||
(( ARG_DEBUG )) && debug "_REBOOT_TIME=${_REBOOT_TIME}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# previous uptime missing?
|
||||
if [[ -z "${_PREVIOUS_UPTIME}" ]]
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
print "${_CURRENT_UPTIME}" >"${_STATE_FILE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "failed to update state file at ${_STATE_FILE}"
|
||||
return 1
|
||||
else
|
||||
log "unable to find previously recorded uptime, resetting to current uptime"
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
log "unable to find previously recorded uptime, resetting to current uptime"
|
||||
fi
|
||||
else
|
||||
# current uptime + reboot time is smaller than previous uptime?
|
||||
_THRESHOLD_UPTIME_MINS=$(( _CURRENT_UPTIME_MINS + _REBOOT_TIME_MINS ))
|
||||
if (( _THRESHOLD_UPTIME_MINS < _PREVIOUS_UPTIME_MINS ))
|
||||
then
|
||||
_MSG="reboot check: current uptime is NOK; check if reboot occurred"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="reboot check: current uptime is OK"
|
||||
_STC=0
|
||||
fi
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_THRESHOLD_UPTIME_MINS}" "${_PREVIOUS_UPTIME_MINS}"
|
||||
fi
|
||||
|
||||
# update state file
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
print "${_CURRENT_UPTIME}" >"${_STATE_FILE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "failed to update state file at ${_STATE_FILE}"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log "reboot check: not enabled"
|
||||
fi
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# check old age event
|
||||
if (( _CHECK_OLD_AGE > 0 ))
|
||||
then
|
||||
# convert _OLD_AGE_TIME to minutes
|
||||
_OLD_AGE_TIME_MINS=$(data_timestring_to_mins "${_OLD_AGE_TIME}")
|
||||
data_is_numeric "${_OLD_AGE_TIME_MINS}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "unable to calculate 'old_age_time' value from configuration file ${_CONFIG_FILE}"
|
||||
(( ARG_DEBUG )) && debug "_OLD_AGE_TIME=${_OLD_AGE_TIME}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# are we old age yet?
|
||||
if (( _CURRENT_UPTIME_MINS > _OLD_AGE_TIME_MINS ))
|
||||
then
|
||||
_MSG="old_age check: current uptime is NOK; old age has arrived (>${_OLD_AGE_TIME})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="old_age check: current uptime is OK"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_CURRENT_UPTIME_MINS}" "${_OLD_AGE_TIME_MINS}"
|
||||
fi
|
||||
else
|
||||
log "old age check: not enabled"
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with:
|
||||
log_healthy=<yes|no>
|
||||
check_reboot=<yes|no>
|
||||
reboot_time=<timestring>
|
||||
check_old_age=<yes|no>
|
||||
old_age_time=<timestring>
|
||||
PURPOSE : Checks for unexpected/unplanned reboot events based on uptime
|
||||
values.
|
||||
Checks whether the host has been up and running for too much time.
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# END of script
|
||||
#------------------------------------------------------------------------------
|
@ -23,7 +23,6 @@
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-04-20: merged HP-UX+Linux version + fixes [Patrick Van der Veken]
|
||||
# @(#) 2019-04-26: made _CRSCTL_BIN path configurable + fix [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
@ -33,7 +32,7 @@ function check_clusterware_resource_config
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-04-26" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-04-20" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
typeset _MAX_LENGTH_VALUE_STRING=30
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
@ -89,19 +88,6 @@ case "${_CFG_HEALTHY}" in
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CRSCTL_BIN=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'crsctl_bin')
|
||||
if [[ -z "${_CRSCTL_BIN}" ]]
|
||||
then
|
||||
_CRSCTL_BIN="$(command -v crsctl 2>>${HC_STDERR_LOG})"
|
||||
[[ -n "${_CRSCTL_BIN}" ]] && (( ARG_DEBUG > 0 )) && debug "crsctl path: ${_CRSCTL_BIN} (discover)"
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "crsctl path: ${_CRSCTL_BIN} (config)"
|
||||
fi
|
||||
if [[ -z "${_CRSCTL_BIN}" || ! -x ${_CRSCTL_BIN} ]]
|
||||
then
|
||||
warn "could not determine location for CRS {crsctl} (or it is not installed here)"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
@ -130,18 +116,25 @@ then
|
||||
fi
|
||||
|
||||
# get resource information from crsctl
|
||||
for _RES_INSTANCE in ${_RES_INSTANCES}
|
||||
do
|
||||
${_CRSCTL_BIN} status resource ${_RES_INSTANCE} -f 2>>${HC_STDERR_LOG} |\
|
||||
_CRSCTL_BIN="$(command -v crsctl 2>>${HC_STDERR_LOG})"
|
||||
if [[ -z "${_CRSCTL_BIN}" || ! -x ${_CRSCTL_BIN} ]]
|
||||
then
|
||||
warn "CRS {crsctl} is not installed here"
|
||||
return 1
|
||||
else
|
||||
for _RES_INSTANCE in ${_RES_INSTANCES}
|
||||
do
|
||||
crsctl status resource ${_RES_INSTANCE} -f 2>>${HC_STDERR_LOG} |\
|
||||
tr -d ' \t' >${_RES_RUN_FILE}.${_RES_INSTANCE} 2>/dev/null
|
||||
[[ -s ${_RES_RUN_FILE}.${_RES_INSTANCE} ]] || {
|
||||
_MSG="unable to run command: {${_CRSCTL_BIN} status resource -f ${_RES_INSTANCE}}"
|
||||
[[ -s ${_RES_RUN_FILE}.${_RES_INSTANCE} ]] || {
|
||||
_MSG="unable to gather configuration cluster resource ${_RES_INSTANCE}"
|
||||
log_hc "$0" 1 "${_MSG}"
|
||||
# dump debug info
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
return 1
|
||||
}
|
||||
done
|
||||
}
|
||||
done
|
||||
fi
|
||||
|
||||
# gather resource information from healthcheck configuration
|
||||
for _RES_INSTANCE in ${_RES_INSTANCES}
|
||||
@ -228,7 +221,6 @@ NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
crsctl_bin=<path_to_crsctl>
|
||||
and formatted stanzas for resource definitions
|
||||
PURPOSE : Checks the configuration of Clusterware resources (parameters/values)
|
||||
(comparing serialized strings from the HC configuration file to the
|
||||
|
@ -24,7 +24,6 @@
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-04-20: merged HP-UX+Linux version [Patrick Van der Veken]
|
||||
# @(#) 2019-04-26: made _CRSCTL_BIN path configurable + fix [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
@ -34,7 +33,7 @@ function check_clusterware_resource_status
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-04-26" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-04-20" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -86,19 +85,6 @@ case "${_CFG_HEALTHY}" in
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CRSCTL_BIN=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'crsctl_bin')
|
||||
if [[ -z "${_CRSCTL_BIN}" ]]
|
||||
then
|
||||
_CRSCTL_BIN="$(command -v crsctl 2>>${HC_STDERR_LOG})"
|
||||
[[ -n "${_CRSCTL_BIN}" ]] && (( ARG_DEBUG > 0 )) && debug "crsctl path: ${_CRSCTL_BIN} (discover)"
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "crsctl path: ${_CRSCTL_BIN} (config)"
|
||||
fi
|
||||
if [[ -z "${_CRSCTL_BIN}" || ! -x ${_CRSCTL_BIN} ]]
|
||||
then
|
||||
warn "could not determine location for CRS {crsctl} (or it is not installed here)"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
@ -114,16 +100,24 @@ else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# check for clusterware
|
||||
_CRSCTL_BIN="$(command -v crsctl 2>>${HC_STDERR_LOG})"
|
||||
if [[ -z "${_CRSCTL_BIN}" || ! -x ${_CRSCTL_BIN} ]]
|
||||
then
|
||||
warn "CRS {crsctl} is not installed here"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# do resource status checks
|
||||
grep -E -e "^crs:" ${_CONFIG_FILE} 2>/dev/null |\
|
||||
while IFS=":" read -r _ _CRS_RESOURCE _CRS_STATES
|
||||
do
|
||||
# get actual resource info
|
||||
(( ARG_DEBUG > 0 )) && debug "checking for resource: ${_CRS_RESOURCE}"
|
||||
_CRSCTL_STATUS=$(${_CRSCTL_BIN} status resource "${_CRS_RESOURCE}" 2>>${HC_STDERR_LOG})
|
||||
_CRSCTL_STATUS=$(crsctl status resource "${_CRS_RESOURCE}" 2>>${HC_STDERR_LOG})
|
||||
if (( $? > 0 )) || [[ -z "${_CRSCTL_STATUS}" ]]
|
||||
then
|
||||
_MSG="unable to run command: {${_CRSCTL_BIN} status resource ${_CRS_RESOURCE}}"
|
||||
_MSG="unable to run command: {crsctl status resource ${_CRS_RESOURCE}}"
|
||||
log_hc "$0" 1 "${_MSG}"
|
||||
# dump debug info
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
@ -208,7 +202,6 @@ NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
crsctl_bin=<path_to_crsctl>
|
||||
and formatted stanzas:
|
||||
crs:<resource_name>:<*|node>=<ONLINE|OFFLINE>,<*|node>=<ONLINE|OFFLINE>,...
|
||||
PURPOSE : Checks the STATE of CRS resource(s)
|
||||
|
@ -1,297 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_alerts.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_alerts
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# data_lc(), data_list_contains_string(), data_is_numeric(),
|
||||
# data_get_lvalue_from_config(), dump_logs(), exadata_exec_dcli(),
|
||||
# init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_alerts
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm03: name: 2
|
||||
#celadm03: alertMessage: "VD bad block table cleared on Adapter 0 VD Target 2"
|
||||
#celadm03: alertSequenceID: 2
|
||||
#celadm03: alertShortName: Hardware
|
||||
#celadm03: alertType: Stateless
|
||||
#celadm03: beginTime: 2019-04-21T08:17:44+02:00
|
||||
#celadm03: endTime:
|
||||
#celadm03: examinedBy:
|
||||
#celadm03: notificationState: non-deliverable
|
||||
#celadm03: sequenceBeginTime: 2019-04-21T08:17:44+02:00
|
||||
#celadm03: severity: info
|
||||
#celadm03: alertAction: Informational.
|
||||
typeset _CELL_COMMAND="cellcli -e 'LIST ALERTHISTORY DETAIL'"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_ALERT_SEVERITIES=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _LAST_SEQUENCE=0
|
||||
typeset _STATE_FILE=""
|
||||
typeset _ALERT_DESCRIPTION=""
|
||||
typeset _ALERT_SEQUENCE=""
|
||||
typeset _ALERT_SEVERITY=""
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_ALERT_SEVERITIES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'alert_severities')
|
||||
if [[ -z "${_CFG_ALERT_SEVERITIES}" ]]
|
||||
then
|
||||
warn "no alert severities specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
else
|
||||
_CFG_ALERT_SEVERITIES=$(data_lc "${_CFG_ALERT_SEVERITIES}")
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
# check state file
|
||||
_STATE_FILE="${STATE_PERM_DIR}/${_CFG_CELL_SERVER}.alerts"
|
||||
(( ARG_DEBUG > 0 )) && debug "checking/reading state file at ${_STATE_FILE}"
|
||||
if [[ -r ${_STATE_FILE} ]]
|
||||
then
|
||||
_LAST_SEQUENCE=$(<"${_STATE_FILE}")
|
||||
if [[ -z "${_LAST_SEQUENCE}" ]]
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "no recorded last log entry for ${_CFG_CELL_SERVER}, resetting to 0"
|
||||
_LAST_SEQUENCE=0
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "recorded last log entry for ${_CFG_CELL_SERVER}: ${_LAST_SEQUENCE}"
|
||||
fi
|
||||
else
|
||||
: >${_STATE_FILE}
|
||||
# shellcheck disable=SC2181
|
||||
(( $? > 0 )) && {
|
||||
warn "failed to create new state file at ${_STATE_FILE}"
|
||||
return 1
|
||||
}
|
||||
log "created new state file at ${_STATE_FILE}"
|
||||
fi
|
||||
|
||||
# execute remote command
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
# empty _CELL_OUTPUT means alert history reset
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# empty alert history?
|
||||
if [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
# zero the state file
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
warn "null resetting the current log pointer for ${_CFG_CELL_SERVER}"
|
||||
: >${_STATE_FILE} 2>>${HC_STDERR_LOG}
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_OUTPUT}" | awk '
|
||||
|
||||
BEGIN { found = 0; alert_description = ""; alert_sequence = ""; alert_severity = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /alertDescription/ ) {
|
||||
found = 1;
|
||||
alert_description = cell_line[3];
|
||||
# strip leading spaces & quotes
|
||||
gsub (/^[[:space:]]*/, "", alert_description);
|
||||
gsub (/\"/, "", alert_description);
|
||||
}
|
||||
if ( cell_line[2] ~ /alertSequenceID/ ) {
|
||||
alert_sequence = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", alert_sequence);
|
||||
};
|
||||
if ( cell_line[2] ~ /severity/ ) {
|
||||
alert_severity = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", alert_severity);
|
||||
};
|
||||
if ( alert_description != "" && alert_sequence != "" && alert_severity != "" && found ) {
|
||||
printf "%s|%s|%s\n", alert_description, alert_sequence, tolower (alert_severity)
|
||||
found = 0; alert_description = ""; alert_sequence = ""; alert_severity = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _ALERT_DESCRIPTION _ALERT_SEQUENCE _ALERT_SEVERITY
|
||||
do
|
||||
# check for numeric
|
||||
data_is_numeric "${_ALERT_SEQUENCE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
warn "non-numeric sequence ID encountered: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]"
|
||||
continue
|
||||
fi
|
||||
if (( _ALERT_SEQUENCE > _LAST_SEQUENCE ))
|
||||
then
|
||||
# check severities list
|
||||
data_list_contains_string "${_CFG_ALERT_SEVERITIES}" "${_ALERT_SEVERITY}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring alert because of severity: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]"
|
||||
continue
|
||||
else
|
||||
_MSG="ID=${_ALERT_SEQUENCE} (${_ALERT_SEVERITY}) ${_ALERT_DESCRIPTION}"
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
log_hc "$0" 1 "${_CFG_CELL_SERVER}: ${_MSG}"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
_MSG="no (new) messages discovered from ${_CFG_CELL_SERVER}"
|
||||
log_hc "$0" 0 "${_MSG}"
|
||||
fi
|
||||
fi
|
||||
# rewrite log pointer from the last log entry we discovered
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
(( _ALERT_SEQUENCE == 0 )) && _ALERT_SEQUENCE=${_LAST_SEQUENCE}
|
||||
(( ARG_DEBUG > 0 )) && debug "updating last log entry for ${_CFG_CELL_SERVER} to ${_ALERT_SEQUENCE}"
|
||||
print "${_ALERT_SEQUENCE}" >${_STATE_FILE} 2>>${HC_STDERR_LOG}
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
alert_severities=<list_of_severities_to_report_on>
|
||||
PURPOSE : Checks the alert history on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST ALERTHISTORY DETAIL'
|
||||
CAVEAT : Requires a working dcli setup for the root user
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,246 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_celldisks.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_celldisks
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_celldisks
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01:name: CD_00_celadm01
|
||||
#celadm01:comment:
|
||||
#celadm01:creationTime: 2017-08-29T12:46:36+02:00
|
||||
#celadm01:deviceName: /dev/sda
|
||||
#celadm01:devicePartition: /dev/sda3
|
||||
#celadm01:diskType: HardDisk
|
||||
#celadm01:errorCount: 0
|
||||
#celadm01:freeSpace: 379.3125G
|
||||
#celadm01:id: a6ef7710-6fe7-456e-9571-9ee3c8e53c3f
|
||||
#celadm01:physicalDisk: P6KRUV
|
||||
#celadm01:size: 7.1194915771484375T
|
||||
#celadm01:status: normal
|
||||
typeset _CELL_COMMAND="cellcli -e 'LIST CELLDISK DETAIL'"
|
||||
typeset _TARGET_STATUS="normal"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_EXCLUDED_DISKS=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _CELL_DISK=""
|
||||
typeset _DISK_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
|
||||
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
|
||||
then
|
||||
log "excluding following cell disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found = 0; cell_disk = ""; disk_status = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /name/ ) {
|
||||
found = 1;
|
||||
cell_disk = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", cell_disk);
|
||||
}
|
||||
if ( cell_line[2] ~ /status/ ) {
|
||||
disk_status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", disk_status);
|
||||
};
|
||||
if ( cell_disk != "" && disk_status != "" && found ) {
|
||||
printf "%s|%s|%s\n", cell_line[1], cell_disk, disk_status
|
||||
found = 0; cell_disk = ""; disk_status = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _CELL_DISK _DISK_STATUS
|
||||
do
|
||||
# check exclusion list
|
||||
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_CELL_DISK}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring cell disk ${_CELL_DISK}"
|
||||
else
|
||||
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
excluded_disks=<list_of_cell_disks_to_exclude>
|
||||
PURPOSE : Checks the status of cell disks on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST CELLDISK DETAIL'
|
||||
Target attributes:
|
||||
* Status: normal
|
||||
CAVEAT : Requires a working dcli setup for the root user
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,308 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_flash.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_flash
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_flash
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01: name: celadm01_FLASHCACHE
|
||||
#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01
|
||||
#celadm01: creationTime: 2017-06-07T18:48:54+02:00
|
||||
#celadm01: degradedCelldisks:
|
||||
#celadm01: effectiveCacheSize: 5.821319580078125T
|
||||
#celadm01: id: 42423718-e520-4d14-95df-cefc798f528f
|
||||
#celadm01: size: 5.821319580078125T
|
||||
#celadm01: status: normal
|
||||
typeset _CELL_FLASHCACHE_COMMAND="cellcli -e 'LIST FLASHCACHE DETAIL'"
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01: name: celadm01_FLASHLOG
|
||||
#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01
|
||||
#celadm01: creationTime: 2017-06-07T18:48:52+02:00
|
||||
#celadm01: degradedCelldisks:
|
||||
#celadm01: effectiveSize: 512M
|
||||
#celadm01: efficiency: 99.37209135951484
|
||||
#celadm01: id: 40de35b1-84c7-45db-82ec-9eea5f38b40b
|
||||
#celadm01: size: 512M
|
||||
#celadm01: status: normal
|
||||
typeset _CELL_FLASHLOG_COMMAND="cellcli -e 'LIST FLASHLOG DETAIL'"
|
||||
typeset _TARGET_STATUS="normal"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_CHECK_FLASHCACHE=""
|
||||
typeset _CHECK_FLASHCACHE=0
|
||||
typeset _CFG_CHECK_FLASHLOG=""
|
||||
typeset _CHECK_FLASHLOG=0
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _FLASH_DEVICE=""
|
||||
typeset _FLASH_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_EXCLUDED_DEVICES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_devices')
|
||||
if [[ -n "${_CFG_EXCLUDED_DEVICES}" ]]
|
||||
then
|
||||
log "excluding following flash devices from the check: ${_CFG_EXCLUDED_DEVICES}"
|
||||
fi
|
||||
_CFG_CHECK_FLASHCACHE=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashcache')
|
||||
case "${_CFG_CHECK_FLASHCACHE}" in
|
||||
yes|YES|Yes)
|
||||
_CHECK_FLASHCACHE=1
|
||||
;;
|
||||
*)
|
||||
_CHECK_FLASHCACHE=0
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_FLASHCACHE > 0 )) || log "checking flash cache has been disabled"
|
||||
_CFG_CHECK_FLASHLOG=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashlog')
|
||||
case "${_CFG_CHECK_FLASHLOG}" in
|
||||
yes|YES|Yes)
|
||||
_CHECK_FLASHLOG=1
|
||||
;;
|
||||
*)
|
||||
_CHECK_FLASHLOG=0
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_FLASHLOG > 0 )) || log "checking flash log has been disabled"
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
# flash cache
|
||||
if (( _CHECK_FLASHCACHE > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash cache) on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHCACHE_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash cache)"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
fi
|
||||
# flash log
|
||||
if (( _CHECK_FLASHLOG > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash log) on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHLOG_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash log)"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found = 0; flash_device = ""; flash_status = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /name/ ) {
|
||||
found = 1;
|
||||
flash_device = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", flash_device);
|
||||
}
|
||||
if ( cell_line[2] ~ /status/ ) {
|
||||
flash_status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", flash_status);
|
||||
};
|
||||
if ( flash_device != "" && flash_status != "" && found ) {
|
||||
printf "%s|%s|%s\n", cell_line[1], flash_device, flash_status
|
||||
found = 0; flash_device = ""; flash_status = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _FLASH_DEVICE _FLASH_STATUS
|
||||
do
|
||||
# check exclusion list
|
||||
data_list_contains_string "${_CFG_EXCLUDED_DEVICES}" "${_FLASH_DEVICE}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring flash device ${_FLASH_DEVICE}"
|
||||
else
|
||||
if [[ "${_FLASH_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is NOK (${_FLASH_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is OK (${_FLASH_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_FLASH_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
if (( _CHECK_FLASHCACHE > 0 ))
|
||||
then
|
||||
print "==== {dcli ${_CELL_FLASHCACHE_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
fi
|
||||
if (( _CHECK_FLASHLOG > 0 ))
|
||||
then
|
||||
print "==== {dcli ${_CELL_FLASHLOG_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
excluded_devices=<list_of_flash_devices_to_exclude>
|
||||
check_flashcache=<yes|no>
|
||||
check_flashlog=<yes|no>
|
||||
PURPOSE : Checks the status of the flash cache/log devices on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST FLASHCACHE'
|
||||
dcli> cellcli -e 'LIST FLASHLOG'
|
||||
Target attributes:
|
||||
* Flash cache: normal [optional]
|
||||
* Flash log: normal [optional]
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,288 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_griddisks.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_griddisks
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_griddisks
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01: name: RECOC5_CD_03_celadm01
|
||||
#celadm01: asmDiskGroupName: RECOC5
|
||||
#celadm01: asmDiskName: RECOC5_CD_03_CELADM01
|
||||
#celadm01: asmFailGroupName: CELADM01
|
||||
#celadm01: availableTo:
|
||||
#celadm01: cachedBy:
|
||||
#celadm01: cachingPolicy: none
|
||||
#celadm01: cellDisk: CD_03_celadm01
|
||||
#celadm01: comment: "Cluster DB diskgroup RECOC5"
|
||||
#celadm01: creationTime: 2017-09-14T16:21:31+02:00
|
||||
#celadm01: diskType: HardDisk
|
||||
#celadm01: errorCount: 0
|
||||
#celadm01: id: 67c31489-1ab4-4649-85f5-1e65ef9af213
|
||||
#celadm01: size: 118G
|
||||
#celadm01: status: active
|
||||
typeset _CELL_COMMAND="cellcli -e 'LIST GRIDDISK DETAIL'"
|
||||
typeset _TARGET_STATUS="active"
|
||||
typeset _TARGET_ERRORS=0
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_CHECK_ERRORCOUNT=""
|
||||
typeset _CHECK_ERRORCOUNT=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_EXCLUDED_DISKS=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _GRID_DISK=""
|
||||
typeset _DISK_ERRORS=0
|
||||
typeset _DISK_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
|
||||
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
|
||||
then
|
||||
log "excluding following grid disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
|
||||
fi
|
||||
_CFG_CHECK_ERRORCOUNT=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_errorcount')
|
||||
case "${_CFG_CHECK_ERRORCOUNT}" in
|
||||
no|NO|No)
|
||||
_CHECK_ERRORCOUNT=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_ERRORCOUNT=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_ERRORCOUNT > 0 )) || log "checking errorCount has been disabled"
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found = 0; grid_disk = ""; disk_errors = 0; disk_status = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /name/ ) {
|
||||
found = 1;
|
||||
grid_disk = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", grid_disk);
|
||||
}
|
||||
if ( cell_line[2] ~ /errorCount/ ) {
|
||||
disk_errors = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", disk_errors);
|
||||
};
|
||||
|
||||
if ( cell_line[2] ~ /status/ ) {
|
||||
disk_status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", disk_status);
|
||||
};
|
||||
if ( grid_disk != "" && disk_status != "" && found ) {
|
||||
printf "%s|%s|%s|%s\n", cell_line[1], grid_disk, disk_errors, disk_status
|
||||
found = 0; grid_disk = ""; disk_errors = 0; disk_status = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _GRID_DISK _DISK_ERRORS _DISK_STATUS
|
||||
do
|
||||
# check exclusion list
|
||||
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_GRID_DISK}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring grid disk ${_GRID_DISK}"
|
||||
else
|
||||
# errorCount
|
||||
if (( _CHECK_ERRORCOUNT > 0 ))
|
||||
then
|
||||
if (( _DISK_ERRORS > _TARGET_COUNT ))
|
||||
then
|
||||
_MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_ERRORS}!=${_TARGET_ERRORS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_ERRORS}==${_TARGET_ERRORS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_ERRORS}" "${_TARGET_ERRORS}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# status
|
||||
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
excluded_disks=<list_of_grid_disks_to_exclude>
|
||||
check_errorcount=<yes|no>
|
||||
PURPOSE : Checks the status of grid disks on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST GRIDDISK DETAIL'
|
||||
Target attributes:
|
||||
* Status: normal
|
||||
* Error count: 0 [optional]
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,241 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_luns.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: ccheck_exadata_cell_luns
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_luns
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01: name: 0_2
|
||||
#celadm01: deviceName: /dev/sdc
|
||||
#celadm01: diskType: HardDisk
|
||||
#celadm01: id: 0_2
|
||||
#celadm01: isSystemLun: FALSE
|
||||
#celadm01: lunSize: 7.1522655487060546875T
|
||||
#celadm01: lunUID: 0_2
|
||||
#celadm01: physicalDrives: 8:2
|
||||
#celadm01: raidLevel: 0
|
||||
#celadm01: lunWriteCacheMode: "WriteBack, ReadAheadNone, Direct, No Write Cache if Bad BBU"
|
||||
#celadm01: status: normal
|
||||
typeset _CELL_COMMAND="cellcli -e 'LIST LUN DETAIL'"
|
||||
typeset _TARGET_STATUS="normal"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_EXCLUDED_LUNS=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _LUN=""
|
||||
typeset _LUN_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_EXCLUDED_LUNS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_luns')
|
||||
[[ -n "${_CFG_EXCLUDED_LUNS}" ]] && log "excluding following LUNs from the check: ${_CFG_EXCLUDED_LUNS}"
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found = 0; lun = ""; lun_status = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /name/ ) {
|
||||
found = 1;
|
||||
lun = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", lun);
|
||||
}
|
||||
if ( cell_line[2] ~ /status/ ) {
|
||||
lun_status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", lun_status);
|
||||
};
|
||||
if ( lun != "" && lun_status != "" && found ) {
|
||||
printf "%s|%s|%s\n", cell_line[1], lun, lun_status
|
||||
found = 0; lun = ""; lun_status = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _LUN _LUN_STATUS
|
||||
do
|
||||
# check exclusion list
|
||||
data_list_contains_string "${_CFG_EXCLUDED_LUNS}" "${_LUN}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring LUN ${_LUN}"
|
||||
else
|
||||
if [[ "${_LUN_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is NOK (${_LUN_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is OK (${_LUN_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_LUN_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
excluded_luns=<list_of_luns_to_exclude>
|
||||
PURPOSE : Checks the status of LUNs on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST LUN DETAIL'
|
||||
Target attributes:
|
||||
* Status: normal
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,512 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_megaraid.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_megaraid
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_contains_string(),
|
||||
# data_get_lvalue_from_config, dump_logs(), exadata_exec_dcli(),
|
||||
# init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# @(#) 2019-07-08: update _CELL_COMMAND [Patrick Van der Veken]
|
||||
# @(#) 2019-07-18: added supercap check, see Oracle bug 28564584 + exclusion
|
||||
# logic for components (cell_exclude) [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_megaraid
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-07-18" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
typeset _CELL_COMMAND="/opt/MegaRAID/storcli/storcli64 -ShowSummary -aALL"
|
||||
typeset _SUPERCAP_COMMAND="/opt/MegaRAID/storcli/storcli64 /c0/cv show all"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_CHECK_CONTROLLER=""
|
||||
typeset _CHECK_CONTROLLER=0
|
||||
typeset _CFG_CHECK_BBU=""
|
||||
typeset _CHECK_BBU=0
|
||||
typeset _CFG_CHECK_SUPERCAP=""
|
||||
typeset _CHECK_SUPERCAP=0
|
||||
typeset _CFG_CHECK_PHYSICAL=""
|
||||
typeset _CHECK_PHYSICAL=0
|
||||
typeset _CFG_CHECK_VIRTUAL=""
|
||||
typeset _CHECK_VIRTUAL=0
|
||||
typeset _CFG_EXCLUDES=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _RAID_DEVICE=""
|
||||
typeset _RAID_DEVICE_TYPE=""
|
||||
typeset _RAID_STATUS=""
|
||||
typeset _SUPERCAP_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller')
|
||||
case "${_CFG_CHECK_CONTROLLER}" in
|
||||
no|NO|No)
|
||||
_CHECK_CONTROLLER=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_CONTROLLER=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled"
|
||||
_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu')
|
||||
case "${_CFG_CHECK_BBU}" in
|
||||
no|NO|No)
|
||||
_CHECK_BBU=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_BBU=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_BBU > 0 )) || log "checking bbu (battery) has been disabled"
|
||||
_CFG_CHECK_SUPERCAP=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_supercap')
|
||||
case "${_CFG_CHECK_SUPERCAP}" in
|
||||
no|NO|No)
|
||||
_CHECK_SUPERCAP=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_SUPERCAP=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_SUPERCAP > 0 )) || log "checking bbu (supercap) has been disabled"
|
||||
_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical')
|
||||
case "${_CFG_CHECK_PHYSICAL}" in
|
||||
no|NO|No)
|
||||
_CHECK_PHYSICAL=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_PHYSICAL=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled"
|
||||
_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual')
|
||||
case "${_CFG_CHECK_VIRTUAL}" in
|
||||
no|NO|No)
|
||||
_CHECK_VIRTUAL=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_VIRTUAL=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled"
|
||||
_CFG_EXCLUDES=$(grep -i -E -e '^cell_exclude:' ${_CONFIG_FILE} 2>/dev/null)
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found_controller = 0; controller_status = "";
|
||||
found_bbu = 0; bbu_status = "";
|
||||
found_physical = 0; physical_device = ""; physical_status = "";
|
||||
found_virtual = 0; vitual_device = ""; virtual_status = "";
|
||||
status = "";
|
||||
}
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
# find markers
|
||||
if ( cell_line[2] ~ /Controller/ ) {
|
||||
found_controller = 1;
|
||||
}
|
||||
if ( cell_line[2] ~ /BBU/ ) {
|
||||
found_bbu = 1;
|
||||
}
|
||||
if ( cell_line[2] ~ /Connector/ ) {
|
||||
found_physical = 1;
|
||||
physical_device = cell_line[4];
|
||||
# strip leading spaces
|
||||
gsub (/^[[:space:]]*/, "", physical_device);
|
||||
}
|
||||
if ( cell_line[2] ~ /Virtual drive/ ) {
|
||||
found_virtual = 1;
|
||||
virtual_device = cell_line[3];
|
||||
# strip leading spaces
|
||||
gsub (/^[[:space:]]*/, "", virtual_device);
|
||||
}
|
||||
|
||||
# find attributes
|
||||
if ( cell_line[2] ~ /Status/ ) {
|
||||
status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", status);
|
||||
if (found_controller > 0 ) { controller_status = status }
|
||||
if (found_bbu > 0 ) {
|
||||
# delete the PITA "PD" string
|
||||
gsub (/[[:space:]]*PD[[:space:]]*/, "", status);
|
||||
bbu_status = status;
|
||||
}
|
||||
};
|
||||
if ( cell_line[2] ~ /State/ ) {
|
||||
status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", status);
|
||||
if (found_physical > 0 ) { physical_status = status }
|
||||
if (found_virtual > 0 ) { virtual_status = status }
|
||||
};
|
||||
|
||||
# report results
|
||||
if ( controller_status != "" && found_controller ) {
|
||||
printf "%s|%s|%s|%s\n", cell_line[1], "CONTROLLER", "", controller_status
|
||||
found_controller = 0; controller_status = ""; status = "";
|
||||
}
|
||||
if ( bbu_status != "" && found_bbu ) {
|
||||
printf "%s|%s|%s|%s\n", cell_line[1], "BBU", "", bbu_status
|
||||
found_bbu = 0; bbu_status = ""; status = "";
|
||||
}
|
||||
if ( physical_device != "" && physical_status != "" && found_physical ) {
|
||||
printf "%s|%s|%s|%s\n", cell_line[1], "PHYSICAL", physical_device, physical_status
|
||||
found_physical = 0; physical_device = ""; physical_status = ""; status = "";
|
||||
}
|
||||
if ( virtual_device != "" && virtual_status != "" && found_virtual ) {
|
||||
printf "%s|%s|%s|%s\n", cell_line[1], "VIRTUAL", virtual_device, virtual_status
|
||||
found_virtual = 0; virtual_device = ""; virtual_status = ""; status = "";
|
||||
}
|
||||
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS
|
||||
do
|
||||
case "${_RAID_DEVICE_TYPE}" in
|
||||
CONTROLLER)
|
||||
if (( _CHECK_CONTROLLER > 0 ))
|
||||
then
|
||||
# check for exclusion
|
||||
$(data_contains_string "${_CFG_EXCLUDES}" "${_CELL_SERVER}:controller")
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Optimal"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of controller on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of controller on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for controller on ${_CELL_SERVER}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for controller (disabled) [${_CELL_SERVER}]"
|
||||
fi
|
||||
;;
|
||||
BBU)
|
||||
if (( _CHECK_BBU > 0 ))
|
||||
then
|
||||
# check for exclusion
|
||||
$(data_contains_string "${_CFG_EXCLUDES}" "${_CELL_SERVER}:bbu")
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Healthy"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of bbu (battery) on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of bbu (battery) on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for BBU (battery) on ${_CELL_SERVER}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for bbu (battery) (disabled) [${_CELL_SERVER}]"
|
||||
fi
|
||||
;;
|
||||
PHYSICAL)
|
||||
if (( _CHECK_PHYSICAL > 0 ))
|
||||
then
|
||||
# check for exclusion
|
||||
$(data_contains_string "${_CFG_EXCLUDES}" "${_CELL_SERVER}:physical")
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Online"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of physical device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of physical device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for physical devices on ${_CELL_SERVER}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)"
|
||||
fi
|
||||
;;
|
||||
VIRTUAL)
|
||||
if (( _CHECK_VIRTUAL > 0 ))
|
||||
then
|
||||
# check for exclusion
|
||||
$(data_contains_string "${_CFG_EXCLUDES}" "${_CELL_SERVER}:virtual")
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Optimal"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of virtual device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of virtual device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for virtual devices on ${_CELL_SERVER}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND} [${_CFG_CELL_SERVER}]} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
# check if we need to check the BBU (supercap). Use different storcli query
|
||||
# see Oracle Bug 28564584 : X5-2 Aspen w/storcli utility shows false bbu failed status
|
||||
if (( _CHECK_SUPERCAP > 0 ))
|
||||
then
|
||||
_CELL_DATA=""
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_SUPERCAP_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
_TARGET_STATUS="Optimal"
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
# check for exclusion
|
||||
$(data_contains_string "${_CFG_EXCLUDES}" "${_CELL_SERVER}:supercap")
|
||||
if (( $? == 0 ))
|
||||
then
|
||||
_SUPERCAP_STATUS=$(print -R "${_CELL_DATA}" | grep -c -E -e "^${_CFG_CELL_SERVER}: *State *${_TARGET_STATUS}" 2>/dev/null)
|
||||
if (( _SUPERCAP_STATUS == 0 ))
|
||||
then
|
||||
_MSG="state of BBU (supercap) device on ${_CFG_CELL_SERVER} is NOK"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of BBU (supercap) device on ${_CFG_CELL_SERVER} is OK"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "Non-optimal" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for bbu (supercap) on ${_CELL_SERVER}"
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_SUPERCAP_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
check_controller=<yes|no>
|
||||
check_bbu=<yes|no>
|
||||
check_supercap=<yes|no>
|
||||
check_physical=<yes|no>
|
||||
check_virtual=<yes|no>
|
||||
and formatted stanzas of:
|
||||
cell_exclude:<cell_server>:<component>
|
||||
PURPOSE : 1) Checks the status of MegaRAID device(s) on cell servers (via dcli)
|
||||
dcli> /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL
|
||||
Target attributes:
|
||||
* Controller: Optimal [optional]
|
||||
* BBU (battery): Healthy [optional]
|
||||
* Physical devices: Online [optional]
|
||||
* Virtual devices: Optimal [optional]
|
||||
2) Checks the status of the Supercap (battery):
|
||||
dcli> /opt/MegaRAID/storcli/storcli64 /c0/cv show all
|
||||
CAVEAT : Requires a working dcli setup for the root user
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,252 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_cell_physicaldisks.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_cell_physicaldisks
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
|
||||
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_cell_physicaldisks
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-05-14" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cell query command -- DO NOT CHANGE --
|
||||
#celadm01: name: 8:5
|
||||
#celadm01: deviceId: 12
|
||||
#celadm01: deviceName: /dev/sdf
|
||||
#celadm01: diskType: HardDisk
|
||||
#celadm01: enclosureDeviceId: 8
|
||||
#celadm01: errOtherCount: 0
|
||||
#celadm01: luns: 0_5
|
||||
#celadm01: makeModel: "HGST H7280A520SUN8.0T"
|
||||
#celadm01: physicalFirmware: PD51
|
||||
#celadm01: physicalInsertTime: 2017-06-07T14:24:51+02:00
|
||||
#celadm01: physicalInterface: sas
|
||||
#celadm01: physicalSerial: P9MG6V
|
||||
#celadm01: physicalSize: 7.1536639072000980377197265625T
|
||||
#celadm01: slotNumber: 5
|
||||
#celadm01: status: normal
|
||||
typeset _CELL_COMMAND="cellcli -e 'LIST PHYSICALDISK DETAIL'"
|
||||
typeset _TARGET_STATUS="normal"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_DCLI_USER=""
|
||||
typeset _CFG_CELL_SERVERS=""
|
||||
typeset _CFG_CELL_SERVER=""
|
||||
typeset _CFG_EXCLUDED_DISKS=""
|
||||
typeset _CELL_OUTPUT=""
|
||||
typeset _CELL_DATA=""
|
||||
typeset _PHYSICAL_DISK=""
|
||||
typeset _DISK_STATUS=""
|
||||
typeset _CELL_ALL_RC=0
|
||||
typeset _CELL_RC=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
|
||||
if [[ -z "${_CFG_DCLI_USER}" ]]
|
||||
then
|
||||
_CFG_DCLI_USER="root"
|
||||
log "will use DCLI user ${_CFG_DCLI_USER}"
|
||||
fi
|
||||
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
|
||||
if [[ -z "${_CFG_CELL_SERVERS}" ]]
|
||||
then
|
||||
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
|
||||
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
|
||||
then
|
||||
log "excluding following physical disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# gather cell data (serialized way to have better control of output & errors)
|
||||
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
|
||||
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
_CELL_RC=$?
|
||||
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
|
||||
then
|
||||
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
|
||||
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
|
||||
# shellcheck disable=SC1117
|
||||
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
|
||||
fi
|
||||
done
|
||||
|
||||
# validate cell data
|
||||
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover cell data or one of the discoveries failed"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on cell data
|
||||
print -R "${_CELL_DATA}" | awk '
|
||||
|
||||
BEGIN { found = 0; physical_disk = ""; disk_status = ""; }
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
if ( cell_line[2] ~ /name/ ) {
|
||||
found = 1;
|
||||
physical_disk = cell_line[3];
|
||||
if (cell_line[4] != "") {
|
||||
physical_disk = physical_disk ":" cell_line[4];
|
||||
}
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", physical_disk);
|
||||
}
|
||||
if ( cell_line[2] ~ /status/ ) {
|
||||
disk_status = cell_line[3];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", disk_status);
|
||||
};
|
||||
if ( physical_disk != "" && disk_status != "" && found ) {
|
||||
printf "%s|%s|%s\n", cell_line[1], physical_disk, disk_status
|
||||
found = 0; physical_disk = ""; disk_status = "";
|
||||
}
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _PHYSICAL_DISK _DISK_STATUS
|
||||
do
|
||||
# check exclusion list
|
||||
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_PHYSICAL_DISK}"
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 ))
|
||||
then
|
||||
(( ARG_DEBUG > 0 )) && debug "ignoring physical disk ${_PHYSICAL_DISK}"
|
||||
else
|
||||
# status
|
||||
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
dlci_user=<dlci_user_account>
|
||||
cell_servers=<list_of_cell_servers>
|
||||
excluded_disks=<list_of_physical_disks_to_exclude>
|
||||
PURPOSE : Checks the status of physical disks on cell servers (via dcli)
|
||||
dcli> cellcli -e 'LIST PHYSICALDISK DETAIL'
|
||||
Target attributes:
|
||||
* Status: normal
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,171 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_ib_status.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2020 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_ib_status
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2020-07-07: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_ib_status
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _VERSION="2020-07-07" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
typeset _IBSTATUS_BIN="/usr/sbin/ibstatus"
|
||||
typeset _IBSWITCHES_BIN="/usr/sbin/ibswitches"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _IBSTATUS_OUTPUT=""
|
||||
typeset _IBSWITCHES_OUTPUT=""
|
||||
typeset _NUM_INACTIVE_PORTS=0
|
||||
typeset _NUM_SWITCHES=0
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# check IB tools
|
||||
if [[ ! -x ${_IBSTATUS_BIN} || -z "${_IBSWITCHES_BIN}" ]]
|
||||
then
|
||||
warn "IB tools are not installed here. This is not an Exadata compute node?"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# gather infiniband status data
|
||||
(( ARG_DEBUG > 0 )) && debug "executing command {${_IBSTATUS_BIN}}"
|
||||
_IBSTATUS_OUTPUT=$(${_IBSTATUS_BIN} 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $?> 0 )) || [[ -z "${_IBSTATUS_OUTPUT}" ]]
|
||||
then
|
||||
_MSG="unable to run command {${_IBSTATUS_BIN}}"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
return 1
|
||||
fi
|
||||
(( ARG_DEBUG > 0 )) && debug "executing command {${_IBSWITCHES_BIN}}"
|
||||
_IBSWITCHES_OUTPUT=$(${_IBSWITCHES_BIN} 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $?> 0 )) || [[ -z "${_IBSWITCHES_OUTPUT}" ]]
|
||||
then
|
||||
_MSG="unable to run command {${_IBSWITCHES_BIN}}"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on IB status data
|
||||
_NUM_INACTIVE_PORTS=$(print -R "${_IBSTATUS_OUTPUT}" | grep -E -e '^[[:space:]]+state:' 2>/dev/null | grep -c -v "ACTIVE" 2>/dev/null)
|
||||
if (( _HAS_OFFLINE_PORTS > 0 ))
|
||||
then
|
||||
_MSG="${_NUM_INACTIVE_PORTS} IB port(s) are/is in state INACTIVE"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="all IB port(s) are/is in ACTIVE state"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
|
||||
# perform checks on IB switches data
|
||||
_NUM_SWITCHES=$(print -R "${_IBSWITCHES_OUTPUT}" | wc -l 2>/dev/null)
|
||||
if (( _NUM_SWITCHES != 2 ))
|
||||
then
|
||||
_MSG="only ${_NUM_SWITCHES} IB switch(es) are/is reporting (${_NUM_SWITCHES}<>2)"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="${_NUM_SWITCHES} IB switch(es) are/is reporting"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
|
||||
# add IB output to stdout log
|
||||
print "==== {${_IBSTATUS_BIN}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_IBSTATUS_OUTPUT}" >>${HC_STDOUT_LOG}
|
||||
|
||||
print "==== {${_IBSWITCHES_BIN}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_IBSWITCHES_OUTPUT}" >>${HC_STDOUT_LOG}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
PURPOSE : 1) Checks that (local) Infiniband ports are in active
|
||||
2) Checks that Infiniband switches are present (should be 2)
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,427 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_megaraid.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_megaraid
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_comma2newline(), data_contains_string(),
|
||||
# data_get_lvalue_from_config, dump_logs(), init_hc(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
|
||||
# @(#) 2019-07-08: update _MEGACLI_BIN [Patrick Van der Veken]
|
||||
# @(#) 2019-07-18: added supercap check, see Oracle bug 28564584 [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_megaraid
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-07-18" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
typeset _MEGACLI_BIN="/opt/MegaRAID/storcli/storcli64"
|
||||
typeset _MEGACLI_COMMAND="-ShowSummary -aALL"
|
||||
typeset _SUPERCAP_COMMAND="/c0/cv show all"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_CHECK_CONTROLLER=""
|
||||
typeset _CHECK_CONTROLLER=0
|
||||
typeset _CFG_CHECK_BBU=""
|
||||
typeset _CHECK_BBU=0
|
||||
typeset _CFG_CHECK_SUPERCAP=""
|
||||
typeset _CHECK_SUPERCAP=0
|
||||
typeset _CFG_CHECK_PHYSICAL=""
|
||||
typeset _CHECK_PHYSICAL=0
|
||||
typeset _CFG_CHECK_VIRTUAL=""
|
||||
typeset _CHECK_VIRTUAL=0
|
||||
typeset _CLI_OUTPUT=""
|
||||
typeset _CLI_DATA=""
|
||||
typeset _RAID_DEVICE=""
|
||||
typeset _RAID_DEVICE_TYPE=""
|
||||
typeset _RAID_STATUS=""
|
||||
typeset _SUPERCAP_STATUS=""
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller')
|
||||
case "${_CFG_CHECK_CONTROLLER}" in
|
||||
no|NO|No)
|
||||
_CHECK_CONTROLLER=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_CONTROLLER=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled"
|
||||
_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu')
|
||||
case "${_CFG_CHECK_BBU}" in
|
||||
no|NO|No)
|
||||
_CHECK_BBU=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_BBU=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_BBU > 0 )) || log "checking bbu has been disabled"
|
||||
_CFG_CHECK_SUPERCAP=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_supercap')
|
||||
case "${_CFG_CHECK_SUPERCAP}" in
|
||||
no|NO|No)
|
||||
_CHECK_SUPERCAP=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_SUPERCAP=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_SUPERCAP > 0 )) || log "checking bbu (supercap) has been disabled"
|
||||
_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical')
|
||||
case "${_CFG_CHECK_PHYSICAL}" in
|
||||
no|NO|No)
|
||||
_CHECK_PHYSICAL=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_PHYSICAL=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled"
|
||||
_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual')
|
||||
case "${_CFG_CHECK_VIRTUAL}" in
|
||||
no|NO|No)
|
||||
_CHECK_VIRTUAL=0
|
||||
;;
|
||||
*)
|
||||
_CHECK_VIRTUAL=1
|
||||
;;
|
||||
esac
|
||||
(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled"
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# check megacli
|
||||
if [[ ! -x ${_MEGACLI_BIN} || -z "${_MEGACLI_BIN}" ]]
|
||||
then
|
||||
warn "MegaCLI is not installed here. This is not an Exadata compute node?"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# gather MegaCLI data
|
||||
(( ARG_DEBUG > 0 )) && debug "executing MegaCLI command"
|
||||
_CLI_OUTPUT=$(${_MEGACLI_BIN} "${_MEGACLI_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $?> 0 )) || [[ -z "${_CLI_OUTPUT}" ]]
|
||||
then
|
||||
_MSG="unable to query MegaRAID controller"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on MegaCLI data
|
||||
print -R "${_CLI_OUTPUT}" | awk '
|
||||
|
||||
BEGIN { found_controller = 0; controller_status = "";
|
||||
found_bbu = 0; bbu_status = "";
|
||||
found_physical = 0; physical_device = ""; physical_status = "";
|
||||
found_virtual = 0; vitual_device = ""; virtual_status = "";
|
||||
status = "";
|
||||
}
|
||||
|
||||
{
|
||||
# split cell data line
|
||||
split ($0, cell_line, ":");
|
||||
|
||||
# find markers
|
||||
if ( cell_line[1] ~ /Controller/ ) {
|
||||
found_controller = 1;
|
||||
}
|
||||
if ( cell_line[1] ~ /BBU/ ) {
|
||||
found_bbu = 1;
|
||||
}
|
||||
if ( cell_line[1] ~ /Connector/ ) {
|
||||
found_physical = 1;
|
||||
physical_device = cell_line[3];
|
||||
# strip leading & trailing spaces
|
||||
gsub (/^[[:space:]]*/, "", physical_device);
|
||||
gsub (/[[:space:]]*$/, "", physical_device);
|
||||
}
|
||||
if ( cell_line[1] ~ /Virtual drive/ ) {
|
||||
found_virtual = 1;
|
||||
virtual_device = cell_line[2];
|
||||
# strip leading spaces
|
||||
gsub (/^[[:space:]]*/, "", virtual_device);
|
||||
}
|
||||
|
||||
# find attributes
|
||||
if ( cell_line[1] ~ /Status/ ) {
|
||||
status = cell_line[2];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", status);
|
||||
if (found_controller > 0 ) { controller_status = status }
|
||||
if (found_bbu > 0 ) {
|
||||
# delete the PITA "PD" string
|
||||
gsub (/[[:space:]]*PD[[:space:]]*/, "", status);
|
||||
bbu_status = status;
|
||||
}
|
||||
};
|
||||
if ( cell_line[1] ~ /State/ ) {
|
||||
status = cell_line[2];
|
||||
# strip spaces
|
||||
gsub (/[[:space:]]/, "", status);
|
||||
if (found_physical > 0 ) { physical_status = status }
|
||||
if (found_virtual > 0 ) { virtual_status = status }
|
||||
};
|
||||
|
||||
# report results
|
||||
if ( controller_status != "" && found_controller ) {
|
||||
printf "%s|%s|%s\n", "CONTROLLER", "", controller_status
|
||||
found_controller = 0; controller_status = ""; status = "";
|
||||
}
|
||||
if ( bbu_status != "" && found_bbu ) {
|
||||
printf "%s|%s|%s\n", "BBU", "", bbu_status
|
||||
found_bbu = 0; bbu_status = ""; status = "";
|
||||
}
|
||||
if ( physical_device != "" && physical_status != "" && found_physical ) {
|
||||
printf "%s|%s|%s\n", "PHYSICAL", physical_device, physical_status
|
||||
found_physical = 0; physical_device = ""; physical_status = ""; status = "";
|
||||
}
|
||||
if ( virtual_device != "" && virtual_status != "" && found_virtual ) {
|
||||
printf "%s|%s|%s\n", "VIRTUAL", virtual_device, virtual_status
|
||||
found_virtual = 0; virtual_device = ""; virtual_status = ""; status = "";
|
||||
}
|
||||
|
||||
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS
|
||||
do
|
||||
case "${_RAID_DEVICE_TYPE}" in
|
||||
CONTROLLER)
|
||||
if (( _CHECK_CONTROLLER > 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Optimal"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of controller is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of controller is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for controller"
|
||||
fi
|
||||
;;
|
||||
BBU)
|
||||
if (( _CHECK_BBU > 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Healthy"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of bbu (battery) is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of bbu (battery) is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for bbu (disabled)"
|
||||
fi
|
||||
;;
|
||||
PHYSICAL)
|
||||
if (( _CHECK_PHYSICAL > 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Online"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of physical device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of physical device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_RAID_DEVICE}] (disabled)"
|
||||
fi
|
||||
;;
|
||||
VIRTUAL)
|
||||
if (( _CHECK_VIRTUAL > 0 ))
|
||||
then
|
||||
_TARGET_STATUS="Optimal"
|
||||
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
|
||||
then
|
||||
_MSG="state of virtual device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of virtual device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
|
||||
fi
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_RAID_DEVICE}] (disabled)"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {${_MEGACLI_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CLI_DATA}" >>${HC_STDOUT_LOG}
|
||||
|
||||
# check if we need to check the BBU (supercap). Use different storcli query
|
||||
# see Oracle Bug 28564584 : X5-2 Aspen w/storcli utility shows false bbu failed status
|
||||
if (( _CHECK_SUPERCAP > 0 ))
|
||||
then
|
||||
# gather MegaCLI data
|
||||
(( ARG_DEBUG > 0 )) && debug "executing Supercap command"
|
||||
_CLI_OUTPUT=$(${_MEGACLI_BIN} "${_SUPERCAP_COMMAND}" 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $?> 0 )) || [[ -z "${_CLI_OUTPUT}" ]]
|
||||
then
|
||||
_MSG="unable to query MegaRAID controller"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
return 1
|
||||
fi
|
||||
|
||||
# perform checks on MegaCLI data
|
||||
_TARGET_STATUS="Optimal"
|
||||
_SUPERCAP_STATUS=$(print -R "${_CLI_OUTPUT}" | grep -c -E -e "^State *${_TARGET_STATUS}" 2>/dev/null)
|
||||
if (( _SUPERCAP_STATUS == 0 ))
|
||||
then
|
||||
_MSG="state of BBU (supercap) device is NOK"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="state of BBU (supercap) device is OK"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "Non-optimal" "${_TARGET_STATUS}"
|
||||
fi
|
||||
|
||||
# add dcli output to stdout log
|
||||
print "==== {dcli ${_SUPERCAP_COMMAND}} ====" >>${HC_STDOUT_LOG}
|
||||
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
|
||||
else
|
||||
(( ARG_DEBUG > 0 )) && debug "excluded check for bbu (supercap) on ${_CELL_SERVER}"
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
check_controller=<yes|no>
|
||||
check_bbu=<yes|no>
|
||||
check_supercap=<yes|no>
|
||||
check_physical=<yes|no>
|
||||
check_virtual=<yes|no>
|
||||
PURPOSE : 1) Checks the status of MegaRAID device(s)
|
||||
# /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL
|
||||
Target attributes:
|
||||
* Controller: Optimal [optional]
|
||||
* BBU (battery): Healthy [optional]
|
||||
* Physical devices: Online [optional]
|
||||
* Virtual devices: Optimal [optional]
|
||||
2) Checks the status of the Supercap (battery):
|
||||
dcli> /opt/MegaRAID/storcli/storcli64 /c0/cv show all
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -1,264 +0,0 @@
|
||||
#!/usr/bin/env ksh
|
||||
#******************************************************************************
|
||||
# @(#) check_exadata_zfs_cluster.sh
|
||||
#******************************************************************************
|
||||
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
|
||||
#
|
||||
# This program is a free software; you can redistribute it and/or modify
|
||||
# it under the same terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
||||
#******************************************************************************
|
||||
#
|
||||
# DOCUMENTATION (MAIN)
|
||||
# -----------------------------------------------------------------------------
|
||||
# @(#) MAIN: check_exadata_zfs_cluster
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(),
|
||||
# data_strip_outer_space(), init_hc(), linux_exec_ssh(), log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-07-05: initial version [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function check_exadata_zfs_cluster
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-07-05" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# cluster query script -- DO NOT CHANGE --
|
||||
# state=AKCS_CLUSTERED
|
||||
# link=clustron3_ng3:0/clustron_uart:0 = AKCIOS_ACTIVE
|
||||
# link=clustron3_ng3:0/clustron_uart:1 = AKCIOS_ACTIVE
|
||||
# link=clustron3_ng3:0/dlpi:0 = AKCIOS_ACTIVE
|
||||
typeset _ZFS_SCRIPT="
|
||||
script
|
||||
run('configuration cluster');
|
||||
printf('state=%s\n', get('state'));
|
||||
var links = run('links');
|
||||
var links_array = links.split('\n');
|
||||
for (var i = 0; i < links_array.length; ++i) {
|
||||
if (links_array[i] != '') {
|
||||
printf('link=%s\n', links_array[i].replace(/^\s+|\s+$/g,''));
|
||||
}
|
||||
}"
|
||||
# target state of the cluster
|
||||
typeset _CLUSTER_TARGET="AKCS_CLUSTERED"
|
||||
# target state of the cluster links
|
||||
typeset _LINK_TARGET="AKCIOS_ACTIVE"
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
# set defaults
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
|
||||
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
|
||||
typeset _ARGS=$(data_comma2space "$*")
|
||||
typeset _ARG=""
|
||||
typeset _MSG=""
|
||||
typeset _STC=0
|
||||
typeset _CFG_HEALTHY=""
|
||||
typeset _LOG_HEALTHY=0
|
||||
typeset _CFG_SSH_KEY_FILE=""
|
||||
typeset _CFG_SSH_OPTS=""
|
||||
typeset _CFG_SSH_USER=""
|
||||
typeset _CFG_SPACE_THRESHOLD=""
|
||||
typeset _CFG_ZFS_HOSTS=""
|
||||
typeset _CFG_ZFS_HOST=""
|
||||
typeset _CFG_ZFS_LINE=""
|
||||
typeset _SSH_BIN=""
|
||||
typeset _SSH_OUTPUT=""
|
||||
typeset _ZFS_DATA=""
|
||||
|
||||
# handle arguments (originally comma-separated)
|
||||
for _ARG in ${_ARGS}
|
||||
do
|
||||
case "${_ARG}" in
|
||||
help)
|
||||
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# handle configuration file
|
||||
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
|
||||
if [[ ! -r ${_CONFIG_FILE} ]]
|
||||
then
|
||||
warn "unable to read configuration file at ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
# read configuration values
|
||||
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
|
||||
case "${_CFG_HEALTHY}" in
|
||||
yes|YES|Yes)
|
||||
_LOG_HEALTHY=1
|
||||
;;
|
||||
*)
|
||||
# do not override hc_arg
|
||||
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
|
||||
;;
|
||||
esac
|
||||
_CFG_SSH_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'ssh_user')
|
||||
if [[ -z "${_CFG_SSH_USER}" ]]
|
||||
then
|
||||
_CFG_SSH_USER="root"
|
||||
fi
|
||||
_CFG_SSH_KEY_FILE=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'ssh_key_file')
|
||||
_CFG_SSH_OPTS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'ssh_opts')
|
||||
# add quiet mode
|
||||
_CFG_SSH_OPTS="${_CFG_SSH_OPTS} -q"
|
||||
if [[ -n "${_CFG_SSH_KEY_FILE}" ]]
|
||||
then
|
||||
if [[ -r "${_CFG_SSH_KEY_FILE}" ]]
|
||||
then
|
||||
log "will use SSH key ${_CFG_SSH_KEY_FILE}"
|
||||
_CFG_SSH_OPTS="${_CFG_SSH_OPTS} -i ${_CFG_SSH_KEY_FILE}"
|
||||
else
|
||||
warn "will use SSH key ${_CFG_SSH_KEY_FILE}, but file does not exist"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# log_healthy
|
||||
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
then
|
||||
if (( ARG_LOG > 0 ))
|
||||
then
|
||||
log "logging/showing passed health checks"
|
||||
else
|
||||
log "showing passed health checks (but not logging)"
|
||||
fi
|
||||
else
|
||||
log "not logging/showing passed health checks"
|
||||
fi
|
||||
|
||||
# check ssh
|
||||
_SSH_BIN="$(command -v ssh 2>>${HC_STDERR_LOG})"
|
||||
if [[ ! -x ${_SSH_BIN} || -z "${_SSH_BIN}" ]]
|
||||
then
|
||||
warn "SSH is not installed here"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# gather ZFS hostnames (for this we need at least one data line, possibly with wildcards)
|
||||
_CFG_ZFS_HOSTS=$(grep -i -E -e '^zfs:' ${_CONFIG_FILE} 2>/dev/null | cut -f2 -d':' 2>/dev/null | sort -u 2>/dev/null)
|
||||
if [[ -z "${_CFG_ZFS_HOSTS}" ]]
|
||||
then
|
||||
warn "no monitoring rules defined in ${_CONFIG_FILE}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# gather ZFS usage data
|
||||
print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}"
|
||||
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
|
||||
then
|
||||
warn "unable to discover usage data on ${_CFG_ZFS_HOST}"
|
||||
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
|
||||
continue
|
||||
else
|
||||
# mangle SSH output by prefixing with hostname
|
||||
print "${_SSH_OUTPUT}" | while read -r _SSH_LINE
|
||||
do
|
||||
if [[ -z "${_ZFS_DATA}" ]]
|
||||
then
|
||||
_ZFS_DATA="${_CFG_ZFS_HOST}#${_SSH_LINE}"
|
||||
else
|
||||
# shellcheck disable=SC1117
|
||||
_ZFS_DATA=$(printf "%s\n%s#%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}")
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
# process usage status data
|
||||
if [[ -z "${_ZFS_DATA}" ]]
|
||||
then
|
||||
_MSG="did not discover any ZFS share data"
|
||||
_STC=2
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
print "${_ZFS_DATA}" | while IFS='#' read -r _ZFS_HOST _CLUSTER_LINE
|
||||
do
|
||||
(( ARG_DEBUG > 0 )) && debug "parsing cluster data for appliance: ${_ZFS_HOST}"
|
||||
|
||||
# split up cluster data & perform checks
|
||||
case "${_CLUSTER_LINE}" in
|
||||
link=*)
|
||||
_LINK_STATE=$(data_strip_outer_space "$(print "${_CLUSTER_LINE}" | cut -f3 -d'=' 2>/dev/null)")
|
||||
|
||||
if [[ "${_LINK_STATE}" != "${_LINK_TARGET}" ]]
|
||||
then
|
||||
_MSG="${_ZFS_HOST} cluster link state is NOK ([${_LINK_STATE}!=${_LINK_TARGET})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="${_ZFS_HOST} cluster link state is OK (${_LINK_STATE}==${_LINK_TARGET})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_LINK_STATE}" "${_LINK_TARGET}"
|
||||
fi
|
||||
;;
|
||||
state=*)
|
||||
_CLUSTER_STATE=$(print "${_CLUSTER_LINE##state=}")
|
||||
|
||||
if [[ "${_CLUSTER_STATE}" != "${_CLUSTER_TARGET}" ]]
|
||||
then
|
||||
_MSG="${_ZFS_HOST} cluster state is NOK (${_CLUSTER_STATE}!=${_CLUSTER_TARGET})"
|
||||
_STC=1
|
||||
else
|
||||
_MSG="${_ZFS_HOST} cluster state is OK (${_CLUSTER_STATE}==${_CLUSTER_TARGET})"
|
||||
_STC=0
|
||||
fi
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
log_hc "$0" ${_STC} "${_MSG}" "${_CLUSTER_STATE}" "${_CLUSTER_TARGET}"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
function _show_usage
|
||||
{
|
||||
cat <<- EOT
|
||||
NAME : $1
|
||||
VERSION : $2
|
||||
CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
ssh_user=<ssh_user_account>
|
||||
ssh_key_file=<ssh_private_key_file>
|
||||
ssh_opts=<ssh_options>
|
||||
and formatted stanzas of:
|
||||
zfs:<host_name>
|
||||
PURPOSE : Checks the state of the cluster and its links
|
||||
CLI: zfs > configuration > cluster > show
|
||||
LOG HEALTHY : Supported
|
||||
|
||||
EOT
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#******************************************************************************
|
||||
# END of script
|
||||
#******************************************************************************
|
@ -19,14 +19,12 @@
|
||||
# @(#) MAIN: check_exadata_zfs_logs
|
||||
# DOES: see _show_usage()
|
||||
# EXPECTS: see _show_usage()
|
||||
# REQUIRES: data_comma2space(), data_get_lvalue_from_config(), dump_logs(),
|
||||
# init_hc(), linux_exec_ssh(), log_hc(), warn()
|
||||
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(),
|
||||
# log_hc(), warn()
|
||||
#
|
||||
# @(#) HISTORY:
|
||||
# @(#) 2019-02-18: initial version [Patrick Van der Veken]
|
||||
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
|
||||
# @(#) 2019-05-14: _STC fix [Patrick Van der Veken]
|
||||
# @(#) 2019-07-05: help fix [Patrick Van der Veken]
|
||||
# -----------------------------------------------------------------------------
|
||||
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
#******************************************************************************
|
||||
@ -36,7 +34,7 @@ function check_exadata_zfs_logs
|
||||
{
|
||||
# ------------------------- CONFIGURATION starts here -------------------------
|
||||
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
|
||||
typeset _VERSION="2019-07-05" # YYYY-MM-DD
|
||||
typeset _VERSION="2019-03-16" # YYYY-MM-DD
|
||||
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
|
||||
# ------------------------- CONFIGURATION ends here ---------------------------
|
||||
|
||||
@ -277,7 +275,6 @@ do
|
||||
fi
|
||||
else
|
||||
: >${_STATE_FILE}
|
||||
# shellcheck disable=SC2181
|
||||
(( $? > 0 )) && {
|
||||
warn "failed to create new state file at ${_STATE_FILE}"
|
||||
return 1
|
||||
@ -287,7 +284,6 @@ do
|
||||
|
||||
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST} for log ${_ZFS_LOG}"
|
||||
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
|
||||
# shellcheck disable=SC2181
|
||||
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
|
||||
then
|
||||
warn "unable to discover ${_ZFS_LOG} log data on ${_CFG_ZFS_HOST}"
|
||||
@ -424,7 +420,7 @@ do
|
||||
;;
|
||||
esac
|
||||
else
|
||||
if (( _LOG_HEALTHY > 0 ))
|
||||
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
|
||||
then
|
||||
_MSG="no (new) messages discovered from ${_CFG_ZFS_HOST}:/${_ZFS_LOG}"
|
||||
log_hc "$0" 0 "${_MSG}"
|
||||
@ -455,17 +451,16 @@ CONFIG : $3 with parameters:
|
||||
log_healthy=<yes|no>
|
||||
ssh_user=<ssh_user_account>
|
||||
ssh_key_file=<ssh_private_key_file>
|
||||
ssh_opts=<ssh_options>
|
||||
and formatted stanzas of:
|
||||
zfs:<host_name>:<alert|fltlog|scrk|system>:<filters>
|
||||
PURPOSE : Checks the ZFS logs for (new) entries with particular alert level(s)
|
||||
PURPOSE : checks the ZFS logs for (new) entries with particular alert level(s)
|
||||
Following logs are supported (filters in brackets):
|
||||
* alert (critical,major,minor)
|
||||
* fltlog (critical,major,minor)
|
||||
* system (error)
|
||||
* scrk (failed)
|
||||
CLI: zfs > maintenance > logs > select (log) > show
|
||||
CAVEAT: Plugin will use state files to track 'seen' messages. However each
|
||||
CAVEAT: plugin will use state files to track 'seen' messages. However each
|
||||
check will only retrieve the default 100 last log entries. So it
|
||||
is possible that log entries are lost between health checks (this
|
||||
can be avoided by scheduling the check quicker than the likely
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user