From 62095c510e3a592055693951c2b157d294ef80a2 Mon Sep 17 00:00:00 2001 From: Patrick Van der Veken Date: Thu, 16 May 2019 16:23:06 +0200 Subject: [PATCH] Added Exadata stuff: - Added include_exadata - Added plugin check_exadata_cell_alerts - Added plugin check_exadata_cell_celldisks - Added plugin check_exadata_cell_flash - Added plugin check_exadata_cell_griddisks - Added plugin check_exadata_cell_luns - Added plugin check_exadata_cell_megaraid - Added plugin check_exadata_cell_physicaldisks - Added plugin check_exadata_megaraid --- .../linux/ARCH/hc-exadata-platform/PKGBUILD | 18 + .../linux/SPECS/hc-exadata-platform.spec | 46 ++ .../hc/check_exadata_cell_alerts.conf.dist | 30 ++ .../hc/check_exadata_cell_celldisks.conf.dist | 29 ++ etc/opt/hc/check_exadata_cell_flash.conf.dist | 37 ++ .../hc/check_exadata_cell_griddisks.conf.dist | 33 ++ etc/opt/hc/check_exadata_cell_luns.conf.dist | 29 ++ .../hc/check_exadata_cell_megaraid.conf.dist | 41 ++ ...check_exadata_cell_physicaldisks.conf.dist | 29 ++ etc/opt/hc/check_exadata_megaraid.conf.dist | 33 ++ etc/opt/hc/check_exadata_zfs_logs.conf.dist | 2 +- opt/hc/bin/check_health.sh | 46 +- opt/hc/lib/core/include_exadata.sh | 85 ++++ opt/hc/lib/core/include_os.sh | 2 +- .../exadata/check_exadata_cell_alerts.sh | 297 +++++++++++++ .../exadata/check_exadata_cell_celldisks.sh | 246 +++++++++++ .../exadata/check_exadata_cell_flash.sh | 308 ++++++++++++++ .../exadata/check_exadata_cell_griddisks.sh | 288 +++++++++++++ .../exadata/check_exadata_cell_luns.sh | 241 +++++++++++ .../exadata/check_exadata_cell_megaraid.sh | 392 ++++++++++++++++++ .../check_exadata_cell_physicaldisks.sh | 252 +++++++++++ .../exadata/check_exadata_megaraid.sh | 365 ++++++++++++++++ .../exadata/check_exadata_zfs_logs.sh | 15 +- .../exadata/check_exadata_zfs_pool_usage.sh | 11 +- .../exadata/check_exadata_zfs_services.sh | 14 +- .../check_exadata_zfs_share_replication.sh | 16 +- .../exadata/check_exadata_zfs_share_usage.sh | 11 +- 27 files changed, 2863 insertions(+), 53 deletions(-) create mode 100644 etc/opt/hc/check_exadata_cell_alerts.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_celldisks.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_flash.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_griddisks.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_luns.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_megaraid.conf.dist create mode 100644 etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist create mode 100644 etc/opt/hc/check_exadata_megaraid.conf.dist create mode 100644 opt/hc/lib/core/include_exadata.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh create mode 100644 opt/hc/lib/platform/exadata/check_exadata_megaraid.sh diff --git a/build/exadata/linux/ARCH/hc-exadata-platform/PKGBUILD b/build/exadata/linux/ARCH/hc-exadata-platform/PKGBUILD index 11167f5..2e9f2f6 100644 --- a/build/exadata/linux/ARCH/hc-exadata-platform/PKGBUILD +++ b/build/exadata/linux/ARCH/hc-exadata-platform/PKGBUILD @@ -17,14 +17,32 @@ sha256sums=('SKIP') package() { cd "${srcdir}/${_pkgname}" install -d -m 755 ${pkgdir}/opt/hc/lib + install -d -m 755 ${pkgdir}/opt/hc/core + install -D -m 755 opt/hc/lib/core/include_exadata.sh ${pkgdir}/opt/hc/lib/core/include_exadata.sh install -d -m 755 ${pkgdir}/opt/hc/lib/platform install -d -m 755 ${pkgdir}/opt/hc/lib/platform/exadata + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh + install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh install -d -m 755 ${pkgdir}/etc/opt/hc + install -D -m 644 etc/opt/hc/check_exadata_cell_alerts.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_alerts.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_celldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_celldisks.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_flash.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_flash.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_griddisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_griddisks.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_luns.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_luns.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_megaraid.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist + install -D -m 644 etc/opt/hc/check_exadata_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_megaraid.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_logs.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_logs.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_services.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_services.conf.dist diff --git a/build/exadata/linux/SPECS/hc-exadata-platform.spec b/build/exadata/linux/SPECS/hc-exadata-platform.spec index 901720f..d66e77f 100644 --- a/build/exadata/linux/SPECS/hc-exadata-platform.spec +++ b/build/exadata/linux/SPECS/hc-exadata-platform.spec @@ -25,8 +25,18 @@ This package contains platform/OS specific plugins. %install rm -rf $RPM_BUILD_ROOT install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib +install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/core +cp ../SOURCES/opt/hc/lib/core/include_exadata.sh $RPM_BUILD_ROOT/opt/hc/lib/core install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh +cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh @@ -34,6 +44,14 @@ cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc +cp ../SOURCES/etc/opt/hc/check_exadata_cell_alerts.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_alerts.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_celldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_celldisks.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_flash.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_flash.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_griddisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_griddisks.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_luns.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_luns.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_megaraid.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist +cp ../SOURCES/etc/opt/hc/check_exadata_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_megaraid.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_logs.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_logs.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_services.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_services.conf.dist @@ -74,14 +92,32 @@ echo "INFO: finished post-uninstall script" %files %defattr(-,root,root,755) %dir /opt/hc/lib +%dir /opt/hc/lib/core +%attr(755, root, root) /opt/hc/lib/core/include_exadata.sh %dir /opt/hc/lib/platform %dir /opt/hc/lib/platform/exadata +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh +%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_megaraid.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh %dir /etc/opt/hc +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_alerts.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_celldisks.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_flash.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_griddisks.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_luns.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_megaraid.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist +%attr(644, root, root) /etc/opt/hc/check_exadata_megaraid.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_logs.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_services.conf.dist @@ -90,6 +126,16 @@ echo "INFO: finished post-uninstall script" %changelog +* Fri May 14 2019 - 0.3.0 +- Added include_exadata +- Added plugin check_exadata_cell_alerts +- Added plugin check_exadata_cell_celldisks +- Added plugin check_exadata_cell_flash +- Added plugin check_exadata_cell_griddisks +- Added plugin check_exadata_cell_luns +- Added plugin check_exadata_cell_megaraid +- Added plugin check_exadata_cell_physicaldisks +- Added plugin check_exadata_megaraid * Fri Apr 12 2019 - 0.2.0 - Added plugin check_exadata_zfs_pool_usage * Tue Mar 26 2019 - 0.1.0 diff --git a/etc/opt/hc/check_exadata_cell_alerts.conf.dist b/etc/opt/hc/check_exadata_cell_alerts.conf.dist new file mode 100644 index 0000000..23e5df8 --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_alerts.conf.dist @@ -0,0 +1,30 @@ +#****************************************************************************** +# @(#) check_exadata_cell_alerts.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_alerts HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify the alert severities to report (comma-separated) +# Possible values: critical/warning/info/clear +# [default: critical] +alert_severities="critical" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_celldisks.conf.dist b/etc/opt/hc/check_exadata_cell_celldisks.conf.dist new file mode 100644 index 0000000..c4a8968 --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_celldisks.conf.dist @@ -0,0 +1,29 @@ +#****************************************************************************** +# @(#) check_exadata_cell_celldisks.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_celldisks HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify which cell disks to exclude from the check (comma-separated) +# [default: null] +excluded_disks="" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_flash.conf.dist b/etc/opt/hc/check_exadata_cell_flash.conf.dist new file mode 100644 index 0000000..fea18d6 --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_flash.conf.dist @@ -0,0 +1,37 @@ +#****************************************************************************** +# @(#) check_exadata_cell_flash.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_flash HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify which flash devices to exclude from the check (comma-separated) +# [default: null] +excluded_devices="" + +# specify whether to check the flash cache status (yes|no) +# [default: yes] +check_flashcache="yes" + +# specify whether to check the flash log status (yes|no) +# [default: yes] +check_flashlog="yes" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_griddisks.conf.dist b/etc/opt/hc/check_exadata_cell_griddisks.conf.dist new file mode 100644 index 0000000..295052e --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_griddisks.conf.dist @@ -0,0 +1,33 @@ +#****************************************************************************** +# @(#) check_exadata_cell_griddisks.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_griddisks HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify which grid disks to exclude from the check (comma-separated) +# [default: null] +excluded_disks="" + +# specify whether to check the errorCount of griddisks (yes|no) +# [default: yes] +check_errorcount="yes" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_luns.conf.dist b/etc/opt/hc/check_exadata_cell_luns.conf.dist new file mode 100644 index 0000000..2bcab1a --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_luns.conf.dist @@ -0,0 +1,29 @@ +#****************************************************************************** +# @(#) check_exadata_cell_luns.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_luns HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify which luns to exclude from the check (comma-separated) +# [default: null] +excluded_luns="" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_megaraid.conf.dist b/etc/opt/hc/check_exadata_cell_megaraid.conf.dist new file mode 100644 index 0000000..a751457 --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_megaraid.conf.dist @@ -0,0 +1,41 @@ +#****************************************************************************** +# @(#) check_exadata_cell_megaraid.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_megaraid HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify whether to check the controller state (yes|no) +# [default: yes] +check_controller="yes" + +# specify whether to check the BBU (battery) state (yes|no) +# [default: yes] +check_bbu="yes" + +# specify whether to check the physical devices (yes|no) +# [default: yes] +check_physical="yes" + +# specify whether to check the virtual devices (yes|no) +# [default: yes] +check_virtual="yes" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist b/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist new file mode 100644 index 0000000..0fe75c5 --- /dev/null +++ b/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist @@ -0,0 +1,29 @@ +#****************************************************************************** +# @(#) check_exadata_cell_physicaldisks.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_cell_physicaldisks HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify the user account for the dcli session to the cell server +# [default: root] +dcli_user="" + +# specify the cell servers to query +# [default: null] +cell_servers="" + +# specify which physical disks to exclude from the check (comma-separated) +# [default: null] +excluded_disks="" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_megaraid.conf.dist b/etc/opt/hc/check_exadata_megaraid.conf.dist new file mode 100644 index 0000000..c469bad --- /dev/null +++ b/etc/opt/hc/check_exadata_megaraid.conf.dist @@ -0,0 +1,33 @@ +#****************************************************************************** +# @(#) check_exadata_megaraid.conf +#****************************************************************************** +# This is a configuration file for the check_exadata_megaraid HC plugin. +# All lines starting with a '#' are comment lines. +# [default: indicates hardcoded script values if no value is defined here] +#****************************************************************************** + +# specify whether to also log passed health checks +# (warning: this may rapidly grow the HC log) +# [default: no] +log_healthy="yes" + +# specify whether to check the controller state (yes|no) +# [default: yes] +check_controller="yes" + +# specify whether to check the BBU (battery) state (yes|no) +# [default: yes] +check_bbu="yes" + +# specify whether to check the physical devices (yes|no) +# [default: yes] +check_physical="yes" + +# specify whether to check the virtual devices (yes|no) +# [default: yes] +check_virtual="yes" + + +#****************************************************************************** +# End of FILE +#****************************************************************************** diff --git a/etc/opt/hc/check_exadata_zfs_logs.conf.dist b/etc/opt/hc/check_exadata_zfs_logs.conf.dist index 9960c12..e964782 100644 --- a/etc/opt/hc/check_exadata_zfs_logs.conf.dist +++ b/etc/opt/hc/check_exadata_zfs_logs.conf.dist @@ -25,7 +25,7 @@ ssh_opts="" # specify the ZFS hostname(s), log name(s) & alert levels. Filters # should be comma-separated. Following logs are supported (filters in brackets) -# alert (critical,major,minor +# alert (critical,major,minor) # fltlog (critical,major,minor) # system (error) # scrk (failed) diff --git a/opt/hc/bin/check_health.sh b/opt/hc/bin/check_health.sh index c72c44a..a4086bd 100755 --- a/opt/hc/bin/check_health.sh +++ b/opt/hc/bin/check_health.sh @@ -38,7 +38,7 @@ # ------------------------- CONFIGURATION starts here ------------------------- # define the version (YYYY-MM-DD) -typeset -r SCRIPT_VERSION="2019-04-03" +typeset -r SCRIPT_VERSION="2019-05-14" # location of parent directory containing KSH functions/HC plugins typeset -r FPATH_PARENT="/opt/hc/lib" # location of custom HC configuration files @@ -235,32 +235,28 @@ return 0 function check_core { (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" +typeset INCLUDE_FILE="" -# check and include core helper libs -if [[ -r ${FPATH_PARENT}/core/include_core.sh && -h ${FPATH_PARENT}/core/include_core ]] +# check include_core (MUST be present) +if [[ ! -r ${FPATH_PARENT}/core/include_core.sh || ! -h ${FPATH_PARENT}/core/include_core ]] then - # shellcheck source=/dev/null - . ${FPATH_PARENT}/core/include_core.sh -else - print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core.sh is not present (tip: run --fix-symlinks)" - exit 1 -fi -if [[ -r ${FPATH_PARENT}/core/include_data.sh && -h ${FPATH_PARENT}/core/include_data ]] -then - # shellcheck source=/dev/null - . ${FPATH_PARENT}/core/include_data.sh -else - print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_data.sh is not present (tip: run --fix-symlinks)" - exit 1 -fi -if [[ -r ${FPATH_PARENT}/core/include_os.sh && -h ${FPATH_PARENT}/core/include_os ]] -then - # shellcheck source=/dev/null - . ${FPATH_PARENT}/core/include_os.sh -else - print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_os.sh is not present (tip: run --fix-symlinks)" + print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core(.sh) is not present (tip: run --fix-symlinks)" exit 1 fi +# include include_* +find ${FPATH_PARENT}/core -name "include_*.sh" -type f -print 2>/dev/null | while read INCLUDE_FILE +do + if [[ -h ${INCLUDE_FILE%%.sh} ]] + then + # shellcheck source=/dev/null + (( ARG_DEBUG > 0 )) && print -u2 "DEBUG: including ${INCLUDE_FILE}" + # shellcheck source=/dev/null + . ${INCLUDE_FILE} + else + print -u2 "ERROR: library file ${INCLUDE_FILE} exists but has no symlink. Run --fix-symlinks" + exit 1 + fi +done # check for core directories [[ -d ${ARCHIVE_DIR} ]] || mkdir -p "${ARCHIVE_DIR}" >/dev/null 2>&1 @@ -519,9 +515,9 @@ case "${KSH_VERSION}" in if [[ -z "${ERRNO}" ]] then # shellcheck disable=SC2154 - (( ARG_DEBUG > 0 )) && print "running ksh: ${.sh.version}" + (( ARG_DEBUG > 0 )) && debug "running ksh: ${.sh.version}" else - (( ARG_DEBUG > 0 )) && print "running ksh: ksh88 or older" + (( ARG_DEBUG > 0 )) && debug "running ksh: ksh88 or older" fi ;; esac diff --git a/opt/hc/lib/core/include_exadata.sh b/opt/hc/lib/core/include_exadata.sh new file mode 100644 index 0000000..e5765b8 --- /dev/null +++ b/opt/hc/lib/core/include_exadata.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) include_exadata.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: include_exadata +# DOES: helper functions for Exadata related functions +# +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: version_include_core() +# DOES: dummy function for version placeholder +# EXPECTS: n/a +# RETURNS: 0 +function version_include_exadata +{ +typeset _VERSION="2019-05-14" # YYYY-MM-DD + +print "INFO: $0: ${_VERSION#version_*}" + +return 0 +} + +# ----------------------------------------------------------------------------- +# @(#) FUNCTION: exadata_exec_dcli() +# DOES: execute a command via dcli +# EXPECTS: 1=options [string], 2=user [string], 3=host(s) [string], +# 4=SSH options [string], 5=command [string] +# RETURNS: exit code of remote command +# OUTPUTS: STDOUT from DCLI call +# REQUIRES: dcli command-line utility +function exadata_exec_dcli +{ +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +typeset _DCLI_OPTS="${1}" +typeset _DCLI_USER="${2}" +typeset _DCLI_HOSTS="${3}" +typeset _SSH_OPTS="${4}" +typeset _DCLI_COMMAND="${5}" +typeset _DCLI_BIN="" + +if [[ -z "${_DCLI_USER}" || -z "${_DCLI_HOSTS}" || -z "${_DCLI_COMMAND}" ]] +then + return 255 +fi + +# find dcli +_DCLI_BIN="$(command -v dcli 2>>${HC_STDERR_LOG})" +if [[ -z "${_DCLI_BIN}" || ! -x ${_DCLI_BIN} ]] +then + # don't spoil STDOUT + ARG_VERBOSE=0 warn "could not determine location for {dcli} (or it is not installed here)" + return 255 +fi + +# execute dcli +if [[ -z "${_SSH_OPTS}" ]] +then + ${_DCLI_BIN} ${_DCLI_OPTS} -l ${_DCLI_USER} -c "${_DCLI_HOSTS}" "${_DCLI_COMMAND}" 2>>${HC_STDERR_LOG} >${HC_STDERR_LOG} 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_ALERT_SEVERITIES="" +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _LAST_SEQUENCE=0 +typeset _STATE_FILE="" +typeset _ALERT_DESCRIPTION="" +typeset _ALERT_SEQUENCE="" +typeset _ALERT_SEVERITY="" + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_ALERT_SEVERITIES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'alert_severities') +if [[ -z "${_CFG_ALERT_SEVERITIES}" ]] +then + warn "no alert severities specified in configuration file at ${_CONFIG_FILE}" + return 1 +else + _CFG_ALERT_SEVERITIES=$(data_lc "${_CFG_ALERT_SEVERITIES}") +fi + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + # check state file + _STATE_FILE="${STATE_PERM_DIR}/${_CFG_CELL_SERVER}.alerts" + (( ARG_DEBUG > 0 )) && debug "checking/reading state file at ${_STATE_FILE}" + if [[ -r ${_STATE_FILE} ]] + then + _LAST_SEQUENCE=$(<"${_STATE_FILE}") + if [[ -z "${_LAST_SEQUENCE}" ]] + then + (( ARG_DEBUG > 0 )) && debug "no recorded last log entry for ${_CFG_CELL_SERVER}, resetting to 0" + _LAST_SEQUENCE=0 + else + (( ARG_DEBUG > 0 )) && debug "recorded last log entry for ${_CFG_CELL_SERVER}: ${_LAST_SEQUENCE}" + fi + else + : >${_STATE_FILE} + # shellcheck disable=SC2181 + (( $? > 0 )) && { + warn "failed to create new state file at ${_STATE_FILE}" + return 1 + } + log "created new state file at ${_STATE_FILE}" + fi + + # execute remote command + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + # empty _CELL_OUTPUT means alert history reset + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # empty alert history? + if [[ -z "${_CELL_OUTPUT}" ]] + then + # zero the state file + if (( ARG_LOG > 0 )) + then + warn "null resetting the current log pointer for ${_CFG_CELL_SERVER}" + : >${_STATE_FILE} 2>>${HC_STDERR_LOG} + fi + fi + fi + + # perform checks on cell data + print -R "${_CELL_OUTPUT}" | awk ' + + BEGIN { found = 0; alert_description = ""; alert_sequence = ""; alert_severity = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /alertDescription/ ) { + found = 1; + alert_description = cell_line[3]; + # strip leading spaces & quotes + gsub (/^[[:space:]]*/, "", alert_description); + gsub (/\"/, "", alert_description); + } + if ( cell_line[2] ~ /alertSequenceID/ ) { + alert_sequence = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", alert_sequence); + }; + if ( cell_line[2] ~ /severity/ ) { + alert_severity = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", alert_severity); + }; + if ( alert_description != "" && alert_sequence != "" && alert_severity != "" && found ) { + printf "%s|%s|%s\n", alert_description, alert_sequence, tolower (alert_severity) + found = 0; alert_description = ""; alert_sequence = ""; alert_severity = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _ALERT_DESCRIPTION _ALERT_SEQUENCE _ALERT_SEVERITY + do + # check for numeric + data_is_numeric "${_ALERT_SEQUENCE}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + warn "non-numeric sequence ID encountered: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]" + continue + fi + if (( _ALERT_SEQUENCE > _LAST_SEQUENCE )) + then + # check severities list + data_list_contains_string "${_CFG_ALERT_SEVERITIES}" "${_ALERT_SEVERITY}" + # shellcheck disable=SC2181 + if (( $? == 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring alert because of severity: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]" + continue + else + _MSG="ID=${_ALERT_SEQUENCE} (${_ALERT_SEVERITY}) ${_ALERT_DESCRIPTION}" + if (( _LOG_HEALTHY > 0 )) + then + log_hc "$0" 1 "${_CFG_CELL_SERVER}: ${_MSG}" + fi + fi + else + if (( _LOG_HEALTHY > 0 )) + then + _MSG="no (new) messages discovered from ${_CFG_CELL_SERVER}" + log_hc "$0" 0 "${_MSG}" + fi + fi + # rewrite log pointer from the last log entry we discovered + if (( ARG_LOG > 0 )) + then + (( _ALERT_SEQUENCE == 0 )) && _ALERT_SEQUENCE=${_LAST_SEQUENCE} + (( ARG_DEBUG > 0 )) && debug "updating last log entry for ${_CFG_CELL_SERVER} to ${_ALERT_SEQUENCE}" + print "${_ALERT_SEQUENCE}" >${_STATE_FILE} 2>>${HC_STDERR_LOG} + fi + done + + # add dcli output to stdout log + print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} + print "${_CELL_DATA}" >>${HC_STDOUT_LOG} +done + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + alert_severities= +PURPOSE : Checks the alert history on cell servers (via dcli) + dcli> cellcli -e 'LIST ALERTHISTORY DETAIL' +CAVEAT : Requires a working dcli setup for the root user +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh new file mode 100644 index 0000000..9418e95 --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh @@ -0,0 +1,246 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_celldisks.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_cell_celldisks +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_celldisks +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +#celadm01:name: CD_00_celadm01 +#celadm01:comment: +#celadm01:creationTime: 2017-08-29T12:46:36+02:00 +#celadm01:deviceName: /dev/sda +#celadm01:devicePartition: /dev/sda3 +#celadm01:diskType: HardDisk +#celadm01:errorCount: 0 +#celadm01:freeSpace: 379.3125G +#celadm01:id: a6ef7710-6fe7-456e-9571-9ee3c8e53c3f +#celadm01:physicalDisk: P6KRUV +#celadm01:size: 7.1194915771484375T +#celadm01:status: normal +typeset _CELL_COMMAND="cellcli -e 'LIST CELLDISK DETAIL'" +typeset _TARGET_STATUS="normal" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_EXCLUDED_DISKS="" +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _CELL_DISK="" +typeset _DISK_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks') +if [[ -n "${_CFG_EXCLUDED_DISKS}" ]] +then + log "excluding following cell disk(s) from the check: ${_CFG_EXCLUDED_DISKS}" +fi + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER}" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found = 0; cell_disk = ""; disk_status = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /name/ ) { + found = 1; + cell_disk = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", cell_disk); + } + if ( cell_line[2] ~ /status/ ) { + disk_status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", disk_status); + }; + if ( cell_disk != "" && disk_status != "" && found ) { + printf "%s|%s|%s\n", cell_line[1], cell_disk, disk_status + found = 0; cell_disk = ""; disk_status = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _CELL_DISK _DISK_STATUS +do + # check exclusion list + data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_CELL_DISK}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring cell disk ${_CELL_DISK}" + else + if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}" + fi + fi +done + +# add dcli output to stdout log +print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CELL_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + excluded_disks= +PURPOSE : Checks the status of cell disks on cell servers (via dcli) + dcli> cellcli -e 'LIST CELLDISK DETAIL' + Target attributes: + * Status: normal +CAVEAT : Requires a working dcli setup for the root user +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh new file mode 100644 index 0000000..e2b9ca5 --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh @@ -0,0 +1,308 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_flash.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_cell_flash +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_flash +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +#celadm01: name: celadm01_FLASHCACHE +#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01 +#celadm01: creationTime: 2017-06-07T18:48:54+02:00 +#celadm01: degradedCelldisks: +#celadm01: effectiveCacheSize: 5.821319580078125T +#celadm01: id: 42423718-e520-4d14-95df-cefc798f528f +#celadm01: size: 5.821319580078125T +#celadm01: status: normal +typeset _CELL_FLASHCACHE_COMMAND="cellcli -e 'LIST FLASHCACHE DETAIL'" +# cell query command -- DO NOT CHANGE -- +#celadm01: name: celadm01_FLASHLOG +#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01 +#celadm01: creationTime: 2017-06-07T18:48:52+02:00 +#celadm01: degradedCelldisks: +#celadm01: effectiveSize: 512M +#celadm01: efficiency: 99.37209135951484 +#celadm01: id: 40de35b1-84c7-45db-82ec-9eea5f38b40b +#celadm01: size: 512M +#celadm01: status: normal +typeset _CELL_FLASHLOG_COMMAND="cellcli -e 'LIST FLASHLOG DETAIL'" +typeset _TARGET_STATUS="normal" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_CHECK_FLASHCACHE="" +typeset _CHECK_FLASHCACHE=0 +typeset _CFG_CHECK_FLASHLOG="" +typeset _CHECK_FLASHLOG=0 +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _FLASH_DEVICE="" +typeset _FLASH_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_EXCLUDED_DEVICES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_devices') +if [[ -n "${_CFG_EXCLUDED_DEVICES}" ]] +then + log "excluding following flash devices from the check: ${_CFG_EXCLUDED_DEVICES}" +fi +_CFG_CHECK_FLASHCACHE=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashcache') +case "${_CFG_CHECK_FLASHCACHE}" in + yes|YES|Yes) + _CHECK_FLASHCACHE=1 + ;; + *) + _CHECK_FLASHCACHE=0 + ;; +esac +(( _CHECK_FLASHCACHE > 0 )) || log "checking flash cache has been disabled" +_CFG_CHECK_FLASHLOG=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashlog') +case "${_CFG_CHECK_FLASHLOG}" in + yes|YES|Yes) + _CHECK_FLASHLOG=1 + ;; + *) + _CHECK_FLASHLOG=0 + ;; +esac +(( _CHECK_FLASHLOG > 0 )) || log "checking flash log has been disabled" + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + # flash cache + if (( _CHECK_FLASHCACHE > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash cache) on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHCACHE_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash cache)" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi + fi + # flash log + if (( _CHECK_FLASHLOG > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash log) on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHLOG_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash log)" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found = 0; flash_device = ""; flash_status = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /name/ ) { + found = 1; + flash_device = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", flash_device); + } + if ( cell_line[2] ~ /status/ ) { + flash_status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", flash_status); + }; + if ( flash_device != "" && flash_status != "" && found ) { + printf "%s|%s|%s\n", cell_line[1], flash_device, flash_status + found = 0; flash_device = ""; flash_status = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _FLASH_DEVICE _FLASH_STATUS +do + # check exclusion list + data_list_contains_string "${_CFG_EXCLUDED_DEVICES}" "${_FLASH_DEVICE}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring flash device ${_FLASH_DEVICE}" + else + if [[ "${_FLASH_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is NOK (${_FLASH_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is OK (${_FLASH_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_FLASH_STATUS}" "${_TARGET_STATUS}" + fi + fi +done + +# add dcli output to stdout log +if (( _CHECK_FLASHCACHE > 0 )) +then + print "==== {dcli ${_CELL_FLASHCACHE_COMMAND}} ====" >>${HC_STDOUT_LOG} + print "${_CELL_DATA}" >>${HC_STDOUT_LOG} +fi +if (( _CHECK_FLASHLOG > 0 )) +then + print "==== {dcli ${_CELL_FLASHLOG_COMMAND}} ====" >>${HC_STDOUT_LOG} + print "${_CELL_DATA}" >>${HC_STDOUT_LOG} +fi + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + excluded_devices= + check_flashcache= + check_flashlog= +PURPOSE : Checks the status of the flash cache/log devices on cell servers (via dcli) + dcli> cellcli -e 'LIST FLASHCACHE' + dcli> cellcli -e 'LIST FLASHLOG' + Target attributes: + * Flash cache: normal [optional] + * Flash log: normal [optional] +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh new file mode 100644 index 0000000..30d905a --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh @@ -0,0 +1,288 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_griddisks.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_cell_griddisks +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_griddisks +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +#celadm01: name: RECOC5_CD_03_celadm01 +#celadm01: asmDiskGroupName: RECOC5 +#celadm01: asmDiskName: RECOC5_CD_03_CELADM01 +#celadm01: asmFailGroupName: CELADM01 +#celadm01: availableTo: +#celadm01: cachedBy: +#celadm01: cachingPolicy: none +#celadm01: cellDisk: CD_03_celadm01 +#celadm01: comment: "Cluster DB diskgroup RECOC5" +#celadm01: creationTime: 2017-09-14T16:21:31+02:00 +#celadm01: diskType: HardDisk +#celadm01: errorCount: 0 +#celadm01: id: 67c31489-1ab4-4649-85f5-1e65ef9af213 +#celadm01: size: 118G +#celadm01: status: active +typeset _CELL_COMMAND="cellcli -e 'LIST GRIDDISK DETAIL'" +typeset _TARGET_STATUS="active" +typeset _TARGET_ERRORS=0 +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_CHECK_ERRORCOUNT="" +typeset _CHECK_ERRORCOUNT=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_EXCLUDED_DISKS="" +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _GRID_DISK="" +typeset _DISK_ERRORS=0 +typeset _DISK_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks') +if [[ -n "${_CFG_EXCLUDED_DISKS}" ]] +then + log "excluding following grid disk(s) from the check: ${_CFG_EXCLUDED_DISKS}" +fi +_CFG_CHECK_ERRORCOUNT=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_errorcount') +case "${_CFG_CHECK_ERRORCOUNT}" in + no|NO|No) + _CHECK_ERRORCOUNT=0 + ;; + *) + _CHECK_ERRORCOUNT=1 + ;; +esac +(( _CHECK_ERRORCOUNT > 0 )) || log "checking errorCount has been disabled" + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER}" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found = 0; grid_disk = ""; disk_errors = 0; disk_status = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /name/ ) { + found = 1; + grid_disk = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", grid_disk); + } + if ( cell_line[2] ~ /errorCount/ ) { + disk_errors = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", disk_errors); + }; + + if ( cell_line[2] ~ /status/ ) { + disk_status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", disk_status); + }; + if ( grid_disk != "" && disk_status != "" && found ) { + printf "%s|%s|%s|%s\n", cell_line[1], grid_disk, disk_errors, disk_status + found = 0; grid_disk = ""; disk_errors = 0; disk_status = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _GRID_DISK _DISK_ERRORS _DISK_STATUS +do + # check exclusion list + data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_GRID_DISK}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring grid disk ${_GRID_DISK}" + else + # errorCount + if (( _CHECK_ERRORCOUNT > 0 )) + then + if (( _DISK_ERRORS > _TARGET_COUNT )) + then + _MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_ERRORS}!=${_TARGET_ERRORS})" + _STC=1 + else + _MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_ERRORS}==${_TARGET_ERRORS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_DISK_ERRORS}" "${_TARGET_ERRORS}" + fi + fi + + # status + if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}" + fi + fi +done + +# add dcli output to stdout log +print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CELL_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + excluded_disks= + check_errorcount= +PURPOSE : Checks the status of grid disks on cell servers (via dcli) + dcli> cellcli -e 'LIST GRIDDISK DETAIL' + Target attributes: + * Status: normal + * Error count: 0 [optional] +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh new file mode 100644 index 0000000..c2d81ce --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_luns.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: ccheck_exadata_cell_luns +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_luns +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +#celadm01: name: 0_2 +#celadm01: deviceName: /dev/sdc +#celadm01: diskType: HardDisk +#celadm01: id: 0_2 +#celadm01: isSystemLun: FALSE +#celadm01: lunSize: 7.1522655487060546875T +#celadm01: lunUID: 0_2 +#celadm01: physicalDrives: 8:2 +#celadm01: raidLevel: 0 +#celadm01: lunWriteCacheMode: "WriteBack, ReadAheadNone, Direct, No Write Cache if Bad BBU" +#celadm01: status: normal +typeset _CELL_COMMAND="cellcli -e 'LIST LUN DETAIL'" +typeset _TARGET_STATUS="normal" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_EXCLUDED_LUNS="" +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _LUN="" +typeset _LUN_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_EXCLUDED_LUNS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_luns') +[[ -n "${_CFG_EXCLUDED_LUNS}" ]] && log "excluding following LUNs from the check: ${_CFG_EXCLUDED_LUNS}" + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER}" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found = 0; lun = ""; lun_status = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /name/ ) { + found = 1; + lun = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", lun); + } + if ( cell_line[2] ~ /status/ ) { + lun_status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", lun_status); + }; + if ( lun != "" && lun_status != "" && found ) { + printf "%s|%s|%s\n", cell_line[1], lun, lun_status + found = 0; lun = ""; lun_status = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _LUN _LUN_STATUS +do + # check exclusion list + data_list_contains_string "${_CFG_EXCLUDED_LUNS}" "${_LUN}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring LUN ${_LUN}" + else + if [[ "${_LUN_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is NOK (${_LUN_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is OK (${_LUN_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_LUN_STATUS}" "${_TARGET_STATUS}" + fi + fi +done + +# add dcli output to stdout log +print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CELL_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + excluded_luns= +PURPOSE : Checks the status of LUNs on cell servers (via dcli) + dcli> cellcli -e 'LIST LUN DETAIL' + Target attributes: + * Status: normal +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh new file mode 100644 index 0000000..ca51ac0 --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh @@ -0,0 +1,392 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_megaraid.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_cell_megaraid +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_megaraid +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +typeset _CELL_COMMAND="/opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_CHECK_CONTROLLER="" +typeset _CHECK_CONTROLLER=0 +typeset _CFG_CHECK_BBU="" +typeset _CHECK_BBU=0 +typeset _CFG_CHECK_PHYSICAL="" +typeset _CHECK_PHYSICAL=0 +typeset _CFG_CHECK_VIRTUAL="" +typeset _CHECK_VIRTUAL=0 +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _RAID_DEVICE="" +typeset _RAID_DEVICE_TYPE="" +typeset _RAID_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller') +case "${_CFG_CHECK_CONTROLLER}" in + no|NO|No) + _CHECK_CONTROLLER=0 + ;; + *) + _CHECK_CONTROLLER=1 + ;; +esac +(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled" +_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu') +case "${_CFG_CHECK_BBU}" in + no|NO|No) + _CHECK_BBU=0 + ;; + *) + _CHECK_BBU=1 + ;; +esac +(( _CHECK_BBU > 0 )) || log "checking bbu has been disabled" +_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical') +case "${_CFG_CHECK_PHYSICAL}" in + no|NO|No) + _CHECK_PHYSICAL=0 + ;; + *) + _CHECK_PHYSICAL=1 + ;; +esac +(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled" +_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual') +case "${_CFG_CHECK_VIRTUAL}" in + no|NO|No) + _CHECK_VIRTUAL=0 + ;; + *) + _CHECK_VIRTUAL=1 + ;; +esac +(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled" + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER}" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found_controller = 0; controller_status = ""; + found_bbu = 0; bbu_status = ""; + found_physical = 0; physical_device = ""; physical_status = ""; + found_virtual = 0; vitual_device = ""; virtual_status = ""; + status = ""; + } + + { + # split cell data line + split ($0, cell_line, ":"); + + # find markers + if ( cell_line[2] ~ /Controller/ ) { + found_controller = 1; + } + if ( cell_line[2] ~ /BBU/ ) { + found_bbu = 1; + } + if ( cell_line[2] ~ /Connector/ ) { + found_physical = 1; + physical_device = cell_line[4]; + # strip leading spaces + gsub (/^[[:space:]]*/, "", physical_device); + } + if ( cell_line[2] ~ /Virtual drive/ ) { + found_virtual = 1; + virtual_device = cell_line[3]; + # strip leading spaces + gsub (/^[[:space:]]*/, "", virtual_device); + } + + # find attributes + if ( cell_line[2] ~ /Status/ ) { + status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", status); + if (found_controller > 0 ) { controller_status = status } + if (found_bbu > 0 ) { + # delete the PITA "PD" string + gsub (/[[:space:]]*PD[[:space:]]*/, "", status); + bbu_status = status; + } + }; + if ( cell_line[2] ~ /State/ ) { + status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", status); + if (found_physical > 0 ) { physical_status = status } + if (found_virtual > 0 ) { virtual_status = status } + }; + + # report results + if ( controller_status != "" && found_controller ) { + printf "%s|%s|%s|%s\n", cell_line[1], "CONTROLLER", "", controller_status + found_controller = 0; controller_status = ""; status = ""; + } + if ( bbu_status != "" && found_bbu ) { + printf "%s|%s|%s|%s\n", cell_line[1], "BBU", "", bbu_status + found_bbu = 0; bbu_status = ""; status = ""; + } + if ( physical_device != "" && physical_status != "" && found_physical ) { + printf "%s|%s|%s|%s\n", cell_line[1], "PHYSICAL", physical_device, physical_status + found_physical = 0; physical_device = ""; physical_status = ""; status = ""; + } + if ( virtual_device != "" && virtual_status != "" && found_virtual ) { + printf "%s|%s|%s|%s\n", cell_line[1], "VIRTUAL", virtual_device, virtual_status + found_virtual = 0; virtual_device = ""; virtual_status = ""; status = ""; + } + + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS +do + case "${_RAID_DEVICE_TYPE}" in + CONTROLLER) + if (( _CHECK_CONTROLLER > 0 )) + then + _TARGET_STATUS="Optimal" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of controller on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of controller on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for controller (disabled)" + fi + ;; + BBU) + if (( _CHECK_BBU > 0 )) + then + _TARGET_STATUS="Healthy" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of bbu on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of bbu on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for bbu (disabled)" + fi + ;; + PHYSICAL) + if (( _CHECK_PHYSICAL > 0 )) + then + _TARGET_STATUS="Online" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of physical device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of physical device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)" + fi + ;; + VIRTUAL) + if (( _CHECK_VIRTUAL > 0 )) + then + _TARGET_STATUS="Optimal" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of virtual device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of virtual device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)" + fi + ;; + esac +done + +# add dcli output to stdout log +print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CELL_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + check_controller= + check_bbu= + check_physical= + check_virtual= +PURPOSE : Checks the status of MegaRAID device(s) on cell servers (via dcli) + dcli> /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL + Target attributes: + * Controller: Optimal [optional] + * BBU: Healthy [optional] + * Physical devices: Online [optional] + * Virtual devices: Optimal [optional] +CAVEAT : Requires a working dcli setup for the root user +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh b/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh new file mode 100644 index 0000000..9a0a8bc --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh @@ -0,0 +1,252 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_cell_physicaldisks.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_cell_physicaldisks +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_cell_physicaldisks +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +# cell query command -- DO NOT CHANGE -- +#celadm01: name: 8:5 +#celadm01: deviceId: 12 +#celadm01: deviceName: /dev/sdf +#celadm01: diskType: HardDisk +#celadm01: enclosureDeviceId: 8 +#celadm01: errOtherCount: 0 +#celadm01: luns: 0_5 +#celadm01: makeModel: "HGST H7280A520SUN8.0T" +#celadm01: physicalFirmware: PD51 +#celadm01: physicalInsertTime: 2017-06-07T14:24:51+02:00 +#celadm01: physicalInterface: sas +#celadm01: physicalSerial: P9MG6V +#celadm01: physicalSize: 7.1536639072000980377197265625T +#celadm01: slotNumber: 5 +#celadm01: status: normal +typeset _CELL_COMMAND="cellcli -e 'LIST PHYSICALDISK DETAIL'" +typeset _TARGET_STATUS="normal" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_DCLI_USER="" +typeset _CFG_CELL_SERVERS="" +typeset _CFG_CELL_SERVER="" +typeset _CFG_EXCLUDED_DISKS="" +typeset _CELL_OUTPUT="" +typeset _CELL_DATA="" +typeset _PHYSICAL_DISK="" +typeset _DISK_STATUS="" +typeset _CELL_ALL_RC=0 +typeset _CELL_RC=0 + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user') +if [[ -z "${_CFG_DCLI_USER}" ]] +then + _CFG_DCLI_USER="root" + log "will use DCLI user ${_CFG_DCLI_USER}" +fi +_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers') +if [[ -z "${_CFG_CELL_SERVERS}" ]] +then + warn "no cell servers specified in configuration file at ${_CONFIG_FILE}" + return 1 +fi +_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks') +if [[ -n "${_CFG_EXCLUDED_DISKS}" ]] +then + log "excluding following physical disk(s) from the check: ${_CFG_EXCLUDED_DISKS}" +fi + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# gather cell data (serialized way to have better control of output & errors) +data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER +do + (( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}" + _CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG}) + _CELL_RC=$? + if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]] + then + _CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC )) + warn "unable to discover cell data on ${_CFG_CELL_SERVER}" + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + continue + else + # _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed + # shellcheck disable=SC1117 + _CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}") + fi +done + +# validate cell data +if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]] +then + _MSG="did not discover cell data or one of the discoveries failed" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + return 1 +fi + +# perform checks on cell data +print -R "${_CELL_DATA}" | awk ' + + BEGIN { found = 0; physical_disk = ""; disk_status = ""; } + + { + # split cell data line + split ($0, cell_line, ":"); + + if ( cell_line[2] ~ /name/ ) { + found = 1; + physical_disk = cell_line[3]; + if (cell_line[4] != "") { + physical_disk = physical_disk ":" cell_line[4]; + } + # strip spaces + gsub (/[[:space:]]/, "", physical_disk); + } + if ( cell_line[2] ~ /status/ ) { + disk_status = cell_line[3]; + # strip spaces + gsub (/[[:space:]]/, "", disk_status); + }; + if ( physical_disk != "" && disk_status != "" && found ) { + printf "%s|%s|%s\n", cell_line[1], physical_disk, disk_status + found = 0; physical_disk = ""; disk_status = ""; + } + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _PHYSICAL_DISK _DISK_STATUS +do + # check exclusion list + data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_PHYSICAL_DISK}" + # shellcheck disable=SC2181 + if (( $? > 0 )) + then + (( ARG_DEBUG > 0 )) && debug "ignoring physical disk ${_PHYSICAL_DISK}" + else + # status + if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}" + fi + fi +done + +# add dcli output to stdout log +print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CELL_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + dlci_user= + cell_servers= + excluded_disks= +PURPOSE : Checks the status of physical disks on cell servers (via dcli) + dcli> cellcli -e 'LIST PHYSICALDISK DETAIL' + Target attributes: + * Status: normal +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh b/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh new file mode 100644 index 0000000..cded64c --- /dev/null +++ b/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh @@ -0,0 +1,365 @@ +#!/usr/bin/env ksh +#****************************************************************************** +# @(#) check_exadata_megaraid.sh +#****************************************************************************** +# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved. +# +# This program is a free software; you can redistribute it and/or modify +# it under the same terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +#****************************************************************************** +# +# DOCUMENTATION (MAIN) +# ----------------------------------------------------------------------------- +# @(#) MAIN: check_exadata_megaraid +# DOES: see _show_usage() +# EXPECTS: see _show_usage() +# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config, +# dump_logs(), init_hc(), log_hc(), warn() +# +# @(#) HISTORY: +# @(#) 2019-05-14: initial version [Patrick Van der Veken] +# ----------------------------------------------------------------------------- +# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! +#****************************************************************************** + +# ----------------------------------------------------------------------------- +function check_exadata_megaraid +{ +# ------------------------- CONFIGURATION starts here ------------------------- +typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" +typeset _VERSION="2019-05-14" # YYYY-MM-DD +typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match +typeset _MEGACLI_BIN="/opt/MegaRAID/MegaCli/MegaCli64" +typeset _MEGACLI_COMMAND="-ShowSummary -aALL" +# ------------------------- CONFIGURATION ends here --------------------------- + +# set defaults +(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS} +init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}" +typeset _ARGS=$(data_comma2space "$*") +typeset _ARG="" +typeset _MSG="" +typeset _STC=0 +typeset _CFG_HEALTHY="" +typeset _LOG_HEALTHY=0 +typeset _CFG_CHECK_CONTROLLER="" +typeset _CHECK_CONTROLLER=0 +typeset _CFG_CHECK_BBU="" +typeset _CHECK_BBU=0 +typeset _CFG_CHECK_PHYSICAL="" +typeset _CHECK_PHYSICAL=0 +typeset _CFG_CHECK_VIRTUAL="" +typeset _CHECK_VIRTUAL=0 +typeset _CLI_OUTPUT="" +typeset _CLI_DATA="" +typeset _RAID_DEVICE="" +typeset _RAID_DEVICE_TYPE="" +typeset _RAID_STATUS="" + +# handle arguments (originally comma-separated) +for _ARG in ${_ARGS} +do + case "${_ARG}" in + help) + _show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0 + ;; + esac +done + +# handle configuration file +[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}" +if [[ ! -r ${_CONFIG_FILE} ]] +then + warn "unable to read configuration file at ${_CONFIG_FILE}" + return 1 +fi +# read configuration values +_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy') +case "${_CFG_HEALTHY}" in + yes|YES|Yes) + _LOG_HEALTHY=1 + ;; + *) + # do not override hc_arg + (( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0 + ;; +esac +_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller') +case "${_CFG_CHECK_CONTROLLER}" in + no|NO|No) + _CHECK_CONTROLLER=0 + ;; + *) + _CHECK_CONTROLLER=1 + ;; +esac +(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled" +_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu') +case "${_CFG_CHECK_BBU}" in + no|NO|No) + _CHECK_BBU=0 + ;; + *) + _CHECK_BBU=1 + ;; +esac +(( _CHECK_BBU > 0 )) || log "checking bbu has been disabled" +_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical') +case "${_CFG_CHECK_PHYSICAL}" in + no|NO|No) + _CHECK_PHYSICAL=0 + ;; + *) + _CHECK_PHYSICAL=1 + ;; +esac +(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled" +_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual') +case "${_CFG_CHECK_VIRTUAL}" in + no|NO|No) + _CHECK_VIRTUAL=0 + ;; + *) + _CHECK_VIRTUAL=1 + ;; +esac +(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled" + +# log_healthy +(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1 +if (( _LOG_HEALTHY > 0 )) +then + if (( ARG_LOG > 0 )) + then + log "logging/showing passed health checks" + else + log "showing passed health checks (but not logging)" + fi +else + log "not logging/showing passed health checks" +fi + +# check megacli +if [[ ! -x ${_MEGACLI_BIN} || -z "${_MEGACLI_BIN}" ]] +then + warn "MegaCLI is not installed here. This is not an Exadata compute node?" + return 1 +fi + +# gather MegaCLI data +(( ARG_DEBUG > 0 )) && debug "executing MegaCLI command" +_CLI_OUTPUT=$(${_MEGACLI_BIN} "${_MEGACLI_COMMAND}" 2>>${HC_STDERR_LOG}) +# shellcheck disable=SC2181 +if (( $?> 0 )) || [[ -z "${_CLI_OUTPUT}" ]] +then + _MSG="unable to query MegaRAID controller" + _STC=2 + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" + fi + (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs + return 1 +fi + +# perform checks on cell data +print -R "${_CLI_OUTPUT}" | awk ' + + BEGIN { found_controller = 0; controller_status = ""; + found_bbu = 0; bbu_status = ""; + found_physical = 0; physical_device = ""; physical_status = ""; + found_virtual = 0; vitual_device = ""; virtual_status = ""; + status = ""; + } + + { + # split cell data line + split ($0, cell_line, ":"); + + # find markers + if ( cell_line[1] ~ /Controller/ ) { + found_controller = 1; + } + if ( cell_line[1] ~ /BBU/ ) { + found_bbu = 1; + } + if ( cell_line[1] ~ /Connector/ ) { + found_physical = 1; + physical_device = cell_line[3]; + # strip leading & trailing spaces + gsub (/^[[:space:]]*/, "", physical_device); + gsub (/[[:space:]]*$/, "", physical_device); + } + if ( cell_line[1] ~ /Virtual drive/ ) { + found_virtual = 1; + virtual_device = cell_line[2]; + # strip leading spaces + gsub (/^[[:space:]]*/, "", virtual_device); + } + + # find attributes + if ( cell_line[1] ~ /Status/ ) { + status = cell_line[2]; + # strip spaces + gsub (/[[:space:]]/, "", status); + if (found_controller > 0 ) { controller_status = status } + if (found_bbu > 0 ) { + # delete the PITA "PD" string + gsub (/[[:space:]]*PD[[:space:]]*/, "", status); + bbu_status = status; + } + }; + if ( cell_line[1] ~ /State/ ) { + status = cell_line[2]; + # strip spaces + gsub (/[[:space:]]/, "", status); + if (found_physical > 0 ) { physical_status = status } + if (found_virtual > 0 ) { virtual_status = status } + }; + + # report results + if ( controller_status != "" && found_controller ) { + printf "%s|%s|%s\n", "CONTROLLER", "", controller_status + found_controller = 0; controller_status = ""; status = ""; + } + if ( bbu_status != "" && found_bbu ) { + printf "%s|%s|%s\n", "BBU", "", bbu_status + found_bbu = 0; bbu_status = ""; status = ""; + } + if ( physical_device != "" && physical_status != "" && found_physical ) { + printf "%s|%s|%s\n", "PHYSICAL", physical_device, physical_status + found_physical = 0; physical_device = ""; physical_status = ""; status = ""; + } + if ( virtual_device != "" && virtual_status != "" && found_virtual ) { + printf "%s|%s|%s\n", "VIRTUAL", virtual_device, virtual_status + found_virtual = 0; virtual_device = ""; virtual_status = ""; status = ""; + } + + }' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS +do + case "${_RAID_DEVICE_TYPE}" in + CONTROLLER) + if (( _CHECK_CONTROLLER > 0 )) + then + _TARGET_STATUS="Optimal" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of controller is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of controller is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for controller (disabled)" + fi + ;; + BBU) + if (( _CHECK_BBU > 0 )) + then + _TARGET_STATUS="Healthy" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of bbu is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of bbu is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for bbu (disabled)" + fi + ;; + PHYSICAL) + if (( _CHECK_PHYSICAL > 0 )) + then + _TARGET_STATUS="Online" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of physical device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of physical device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_RAID_DEVICE}] (disabled)" + fi + ;; + VIRTUAL) + if (( _CHECK_VIRTUAL > 0 )) + then + _TARGET_STATUS="Optimal" + if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]] + then + _MSG="state of virtual device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})" + _STC=1 + else + _MSG="state of virtual device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})" + _STC=0 + fi + if (( _LOG_HEALTHY > 0 || _STC > 0 )) + then + log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}" + fi + else + (( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_RAID_DEVICE}] (disabled)" + fi + ;; + esac +done + +# add dcli output to stdout log +print "==== {${_MEGACLI_COMMAND}} ====" >>${HC_STDOUT_LOG} +print "${_CLI_DATA}" >>${HC_STDOUT_LOG} + +return 0 +} + +# ----------------------------------------------------------------------------- +function _show_usage +{ +cat <<- EOT +NAME : $1 +VERSION : $2 +CONFIG : $3 with parameters: + log_healthy= + check_controller= + check_bbu= + check_physical= + check_virtual= +PURPOSE : Checks the status of MegaRAID device(s) + # /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL + Target attributes: + * Controller: Optimal [optional] + * BBU: Healthy [optional] + * Physical devices: Online [optional] + * Virtual devices: Optimal [optional] +LOG HEALTHY : Supported + +EOT + +return 0 +} + +#****************************************************************************** +# END of script +#****************************************************************************** diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh index 1b85399..e366f1a 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh @@ -19,12 +19,13 @@ # @(#) MAIN: check_exadata_zfs_logs # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), -# log_hc(), warn() +# REQUIRES: data_comma2space(), data_get_lvalue_from_config(), dump_logs(), +# init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] +# @(#) 2019-05-14: _STC fix [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -34,7 +35,7 @@ function check_exadata_zfs_logs { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-03-16" # YYYY-MM-DD +typeset _VERSION="2019-05-14" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # ------------------------- CONFIGURATION ends here --------------------------- @@ -275,6 +276,7 @@ do fi else : >${_STATE_FILE} + # shellcheck disable=SC2181 (( $? > 0 )) && { warn "failed to create new state file at ${_STATE_FILE}" return 1 @@ -284,6 +286,7 @@ do (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST} for log ${_ZFS_LOG}" _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) + # shellcheck disable=SC2181 if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] then warn "unable to discover ${_ZFS_LOG} log data on ${_CFG_ZFS_HOST}" @@ -420,7 +423,7 @@ do ;; esac else - if (( _LOG_HEALTHY > 0 || _STC > 0 )) + if (( _LOG_HEALTHY > 0 )) then _MSG="no (new) messages discovered from ${_CFG_ZFS_HOST}:/${_ZFS_LOG}" log_hc "$0" 0 "${_MSG}" @@ -453,14 +456,14 @@ CONFIG : $3 with parameters: ssh_key_file= and formatted stanzas of: zfs::: -PURPOSE : checks the ZFS logs for (new) entries with particular alert level(s) +PURPOSE : Checks the ZFS logs for (new) entries with particular alert level(s) Following logs are supported (filters in brackets): * alert (critical,major,minor) * fltlog (critical,major,minor) * system (error) * scrk (failed) CLI: zfs > maintenance > logs > select (log) > show -CAVEAT: plugin will use state files to track 'seen' messages. However each +CAVEAT: Plugin will use state files to track 'seen' messages. However each check will only retrieve the default 100 last log entries. So it is possible that log entries are lost between health checks (this can be avoided by scheduling the check quicker than the likely diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh index d61dc81..da9e366 100644 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh @@ -19,11 +19,12 @@ # @(#) MAIN: check_exadata_zfs_pool_usage # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), -# log_hc(), warn() +# REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(), +# init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-04-12: initial version [Patrick Van der Veken] +# @(#) 2019-05-14: small fixes [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -33,7 +34,7 @@ function check_exadata_zfs_pool_usage { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-04-12" # YYYY-MM-DD +typeset _VERSION="2019-05-14" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # usage query script -- DO NOT CHANGE -- # prj1:share1:16 @@ -166,6 +167,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST do (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) + # shellcheck disable=SC2181 if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] then warn "unable to discover usage data on ${_CFG_ZFS_HOST}" @@ -180,7 +182,7 @@ do _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" else # shellcheck disable=SC1117 - _ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" + _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}") fi done fi @@ -230,6 +232,7 @@ do if [[ -n "${_CFG_SPACE_THRESHOLD}" ]] then data_is_numeric "${_CFG_SPACE_THRESHOLD}" + # shellcheck disable=SC2181 if (( $? > 0 )) then warn "value for is not numeric in configuration file ${_CONFIG_FILE}" diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh index 1c04e2a..b6b7a22 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh @@ -19,12 +19,13 @@ # @(#) MAIN: check_exadata_zfs_services # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), -# log_hc(), warn() +# REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(), +# init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] +# @(#) 2019-05-14: small fixes [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -34,7 +35,7 @@ function check_exadata_zfs_services { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-03-16" # YYYY-MM-DD +typeset _VERSION="2019-05-14" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # usage query script -- DO NOT CHANGE -- # svc1:online @@ -158,6 +159,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST do (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) + # shellcheck disable=SC2181 if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] then warn "unable to discover services data on ${_CFG_ZFS_HOST}" @@ -172,7 +174,7 @@ do _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" else # shellcheck disable=SC1117 - _ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" + _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}") fi done fi @@ -213,10 +215,10 @@ do then if [[ $(data_lc "${_SERVICE_STATE}") != $(data_lc "${_CFG_SERVICE_STATE}") ]] then - _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is incorrect (${_SERVICE_STATE}!=${_CFG_SERVICE_STATE})" + _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is NOK (${_SERVICE_STATE}!=${_CFG_SERVICE_STATE})" _STC=1 else - _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is correct (${_SERVICE_STATE}=${_CFG_SERVICE_STATE})" + _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is OK (${_SERVICE_STATE}==${_CFG_SERVICE_STATE})" _STC=0 fi if (( _LOG_HEALTHY > 0 || _STC > 0 )) diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh index 3463d34..008d3ec 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh @@ -27,6 +27,7 @@ # @(#) 2019-02-19: fix for replication value [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken] +# @(#) 2019-05-14: small fixes [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -36,7 +37,7 @@ function check_exadata_zfs_share_replication { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-04-12" # YYYY-MM-DD +typeset _VERSION="2019-05-14" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # replication query script -- DO NOT CHANGE -- # prj1/share1:true:idle:success:111 @@ -170,6 +171,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST do (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) + # shellcheck disable=SC2181 if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] then warn "unable to discover replication data on ${_CFG_ZFS_HOST}" @@ -184,7 +186,7 @@ do _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" else # shellcheck disable=SC1117 - _ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" + _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}") fi done fi @@ -240,6 +242,7 @@ do if [[ -n "${_CFG_REPLICATION_LAG}" ]] then data_is_numeric "${_CFG_REPLICATION_LAG}" + # shellcheck disable=SC2181 if (( $? > 0 )) then warn "value for is not numeric in configuration file ${_CONFIG_FILE}" @@ -263,10 +266,10 @@ do # check replication enabled state (active or not?) if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] then - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is incorrect [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" _STC=1 else - _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is correct [${_REPLICATION_ENABLED}=${_CFG_REPLICATION_ENABLED}]" + _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]" _STC=0 fi if (( _LOG_HEALTHY > 0 || _STC > 0 )) @@ -276,10 +279,10 @@ do # check replication last result (success or not?) if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] then - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is incorrect [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" _STC=1 else - _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is correct [${_REPLICATION_RESULT}=${_CFG_REPLICATION_RESULT}]" + _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]" _STC=0 fi if (( _LOG_HEALTHY > 0 || _STC > 0 )) @@ -289,6 +292,7 @@ do # check replication lag # caveat: replication lag is at initial replication data_contains_string "${_REPLICATION_LAG}" "unknown" + # shellcheck disable=SC2181 if (( $? > 0 )) then _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" diff --git a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh index c505c0e..88064c8 100755 --- a/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh +++ b/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh @@ -19,14 +19,15 @@ # @(#) MAIN: check_exadata_zfs_share_usage # DOES: see _show_usage() # EXPECTS: see _show_usage() -# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), -# log_hc(), warn() +# REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(), +# init_hc(), linux_exec_ssh(), log_hc(), warn() # # @(#) HISTORY: # @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-04-09: fix bad math in ZFS script & HC message [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken] +# @(#) 2019-05-14: small fixes [Patrick Van der Veken] # ----------------------------------------------------------------------------- # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! #****************************************************************************** @@ -36,7 +37,7 @@ function check_exadata_zfs_share_usage { # ------------------------- CONFIGURATION starts here ------------------------- typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" -typeset _VERSION="2019-04-12" # YYYY-MM-DD +typeset _VERSION="2019-05-14" # YYYY-MM-DD typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match # usage query script -- DO NOT CHANGE -- # prj1:share1:16 @@ -178,6 +179,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST do (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) + # shellcheck disable=SC2181 if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] then warn "unable to discover usage data on ${_CFG_ZFS_HOST}" @@ -192,7 +194,7 @@ do _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" else # shellcheck disable=SC1117 - _ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" + _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}") fi done fi @@ -242,6 +244,7 @@ do if [[ -n "${_CFG_SPACE_THRESHOLD}" ]] then data_is_numeric "${_CFG_SPACE_THRESHOLD}" + # shellcheck disable=SC2181 if (( $? > 0 )) then warn "value for is not numeric in configuration file ${_CONFIG_FILE}"