Added Exadata stuff:

- Added include_exadata
- Added plugin check_exadata_cell_alerts
- Added plugin check_exadata_cell_celldisks
- Added plugin check_exadata_cell_flash
- Added plugin check_exadata_cell_griddisks
- Added plugin check_exadata_cell_luns
- Added plugin check_exadata_cell_megaraid
- Added plugin check_exadata_cell_physicaldisks
- Added plugin check_exadata_megaraid
This commit is contained in:
Patrick Van der Veken 2019-05-16 16:23:06 +02:00
parent a6c907f236
commit 62095c510e
27 changed files with 2863 additions and 53 deletions

View File

@ -17,14 +17,32 @@ sha256sums=('SKIP')
package() { package() {
cd "${srcdir}/${_pkgname}" cd "${srcdir}/${_pkgname}"
install -d -m 755 ${pkgdir}/opt/hc/lib install -d -m 755 ${pkgdir}/opt/hc/lib
install -d -m 755 ${pkgdir}/opt/hc/core
install -D -m 755 opt/hc/lib/core/include_exadata.sh ${pkgdir}/opt/hc/lib/core/include_exadata.sh
install -d -m 755 ${pkgdir}/opt/hc/lib/platform install -d -m 755 ${pkgdir}/opt/hc/lib/platform
install -d -m 755 ${pkgdir}/opt/hc/lib/platform/exadata install -d -m 755 ${pkgdir}/opt/hc/lib/platform/exadata
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_megaraid.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh install -D -m 755 opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh ${pkgdir}/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
install -d -m 755 ${pkgdir}/etc/opt/hc install -d -m 755 ${pkgdir}/etc/opt/hc
install -D -m 644 etc/opt/hc/check_exadata_cell_alerts.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_alerts.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_celldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_celldisks.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_flash.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_flash.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_griddisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_griddisks.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_luns.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_luns.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_megaraid.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_megaraid.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_megaraid.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_zfs_logs.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_logs.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_logs.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_logs.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
install -D -m 644 etc/opt/hc/check_exadata_zfs_services.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_services.conf.dist install -D -m 644 etc/opt/hc/check_exadata_zfs_services.conf.dist ${pkgdir}/etc/opt/hc/check_exadata_zfs_services.conf.dist

View File

@ -25,8 +25,18 @@ This package contains platform/OS specific plugins.
%install %install
rm -rf $RPM_BUILD_ROOT rm -rf $RPM_BUILD_ROOT
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/core
cp ../SOURCES/opt/hc/lib/core/include_exadata.sh $RPM_BUILD_ROOT/opt/hc/lib/core
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform
install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata install -d -m 755 $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
@ -34,6 +44,14 @@ cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh cp ../SOURCES/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh $RPM_BUILD_ROOT/opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc install -d -m 755 $RPM_BUILD_ROOT/etc/opt/hc
cp ../SOURCES/etc/opt/hc/check_exadata_cell_alerts.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_alerts.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_celldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_celldisks.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_flash.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_flash.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_griddisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_griddisks.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_luns.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_luns.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_megaraid.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_megaraid.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_megaraid.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_logs.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_logs.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_logs.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_logs.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
cp ../SOURCES/etc/opt/hc/check_exadata_zfs_services.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_services.conf.dist cp ../SOURCES/etc/opt/hc/check_exadata_zfs_services.conf.dist $RPM_BUILD_ROOT/etc/opt/hc/check_exadata_zfs_services.conf.dist
@ -74,14 +92,32 @@ echo "INFO: finished post-uninstall script"
%files %files
%defattr(-,root,root,755) %defattr(-,root,root,755)
%dir /opt/hc/lib %dir /opt/hc/lib
%dir /opt/hc/lib/core
%attr(755, root, root) /opt/hc/lib/core/include_exadata.sh
%dir /opt/hc/lib/platform %dir /opt/hc/lib/platform
%dir /opt/hc/lib/platform/exadata %dir /opt/hc/lib/platform/exadata
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_alerts.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_celldisks.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_flash.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_griddisks.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_luns.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_megaraid.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_cell_physicaldisks.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_megaraid.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_logs.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_pool_usage.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_services.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_replication.sh
%attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh %attr(755, root, root) /opt/hc/lib/platform/exadata/check_exadata_zfs_share_usage.sh
%dir /etc/opt/hc %dir /etc/opt/hc
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_alerts.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_celldisks.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_flash.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_griddisks.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_luns.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_megaraid.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_cell_physicaldisks.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_megaraid.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_logs.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_logs.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_pool_usage.conf.dist
%attr(644, root, root) /etc/opt/hc/check_exadata_zfs_services.conf.dist %attr(644, root, root) /etc/opt/hc/check_exadata_zfs_services.conf.dist
@ -90,6 +126,16 @@ echo "INFO: finished post-uninstall script"
%changelog %changelog
* Fri May 14 2019 <patrick@kudos.be> - 0.3.0
- Added include_exadata
- Added plugin check_exadata_cell_alerts
- Added plugin check_exadata_cell_celldisks
- Added plugin check_exadata_cell_flash
- Added plugin check_exadata_cell_griddisks
- Added plugin check_exadata_cell_luns
- Added plugin check_exadata_cell_megaraid
- Added plugin check_exadata_cell_physicaldisks
- Added plugin check_exadata_megaraid
* Fri Apr 12 2019 <patrick@kudos.be> - 0.2.0 * Fri Apr 12 2019 <patrick@kudos.be> - 0.2.0
- Added plugin check_exadata_zfs_pool_usage - Added plugin check_exadata_zfs_pool_usage
* Tue Mar 26 2019 <patrick@kudos.be> - 0.1.0 * Tue Mar 26 2019 <patrick@kudos.be> - 0.1.0

View File

@ -0,0 +1,30 @@
#******************************************************************************
# @(#) check_exadata_cell_alerts.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_alerts HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify the alert severities to report (comma-separated)
# Possible values: critical/warning/info/clear
# [default: critical]
alert_severities="critical"
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,29 @@
#******************************************************************************
# @(#) check_exadata_cell_celldisks.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_celldisks HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify which cell disks to exclude from the check (comma-separated)
# [default: null]
excluded_disks=""
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,37 @@
#******************************************************************************
# @(#) check_exadata_cell_flash.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_flash HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify which flash devices to exclude from the check (comma-separated)
# [default: null]
excluded_devices=""
# specify whether to check the flash cache status (yes|no)
# [default: yes]
check_flashcache="yes"
# specify whether to check the flash log status (yes|no)
# [default: yes]
check_flashlog="yes"
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,33 @@
#******************************************************************************
# @(#) check_exadata_cell_griddisks.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_griddisks HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify which grid disks to exclude from the check (comma-separated)
# [default: null]
excluded_disks=""
# specify whether to check the errorCount of griddisks (yes|no)
# [default: yes]
check_errorcount="yes"
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,29 @@
#******************************************************************************
# @(#) check_exadata_cell_luns.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_luns HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify which luns to exclude from the check (comma-separated)
# [default: null]
excluded_luns=""
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,41 @@
#******************************************************************************
# @(#) check_exadata_cell_megaraid.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_megaraid HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify whether to check the controller state (yes|no)
# [default: yes]
check_controller="yes"
# specify whether to check the BBU (battery) state (yes|no)
# [default: yes]
check_bbu="yes"
# specify whether to check the physical devices (yes|no)
# [default: yes]
check_physical="yes"
# specify whether to check the virtual devices (yes|no)
# [default: yes]
check_virtual="yes"
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,29 @@
#******************************************************************************
# @(#) check_exadata_cell_physicaldisks.conf
#******************************************************************************
# This is a configuration file for the check_exadata_cell_physicaldisks HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify the user account for the dcli session to the cell server
# [default: root]
dcli_user=""
# specify the cell servers to query
# [default: null]
cell_servers=""
# specify which physical disks to exclude from the check (comma-separated)
# [default: null]
excluded_disks=""
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -0,0 +1,33 @@
#******************************************************************************
# @(#) check_exadata_megaraid.conf
#******************************************************************************
# This is a configuration file for the check_exadata_megaraid HC plugin.
# All lines starting with a '#' are comment lines.
# [default: indicates hardcoded script values if no value is defined here]
#******************************************************************************
# specify whether to also log passed health checks
# (warning: this may rapidly grow the HC log)
# [default: no]
log_healthy="yes"
# specify whether to check the controller state (yes|no)
# [default: yes]
check_controller="yes"
# specify whether to check the BBU (battery) state (yes|no)
# [default: yes]
check_bbu="yes"
# specify whether to check the physical devices (yes|no)
# [default: yes]
check_physical="yes"
# specify whether to check the virtual devices (yes|no)
# [default: yes]
check_virtual="yes"
#******************************************************************************
# End of FILE
#******************************************************************************

View File

@ -25,7 +25,7 @@ ssh_opts=""
# specify the ZFS hostname(s), log name(s) & alert levels. Filters # specify the ZFS hostname(s), log name(s) & alert levels. Filters
# should be comma-separated. Following logs are supported (filters in brackets) # should be comma-separated. Following logs are supported (filters in brackets)
# alert (critical,major,minor # alert (critical,major,minor)
# fltlog (critical,major,minor) # fltlog (critical,major,minor)
# system (error) # system (error)
# scrk (failed) # scrk (failed)

View File

@ -38,7 +38,7 @@
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
# define the version (YYYY-MM-DD) # define the version (YYYY-MM-DD)
typeset -r SCRIPT_VERSION="2019-04-03" typeset -r SCRIPT_VERSION="2019-05-14"
# location of parent directory containing KSH functions/HC plugins # location of parent directory containing KSH functions/HC plugins
typeset -r FPATH_PARENT="/opt/hc/lib" typeset -r FPATH_PARENT="/opt/hc/lib"
# location of custom HC configuration files # location of custom HC configuration files
@ -235,32 +235,28 @@ return 0
function check_core function check_core
{ {
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}" (( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set "${DEBUG_OPTS}"
typeset INCLUDE_FILE=""
# check and include core helper libs # check include_core (MUST be present)
if [[ -r ${FPATH_PARENT}/core/include_core.sh && -h ${FPATH_PARENT}/core/include_core ]] if [[ ! -r ${FPATH_PARENT}/core/include_core.sh || ! -h ${FPATH_PARENT}/core/include_core ]]
then then
# shellcheck source=/dev/null print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core(.sh) is not present (tip: run --fix-symlinks)"
. ${FPATH_PARENT}/core/include_core.sh
else
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_core.sh is not present (tip: run --fix-symlinks)"
exit 1 exit 1
fi fi
if [[ -r ${FPATH_PARENT}/core/include_data.sh && -h ${FPATH_PARENT}/core/include_data ]] # include include_*
find ${FPATH_PARENT}/core -name "include_*.sh" -type f -print 2>/dev/null | while read INCLUDE_FILE
do
if [[ -h ${INCLUDE_FILE%%.sh} ]]
then then
# shellcheck source=/dev/null # shellcheck source=/dev/null
. ${FPATH_PARENT}/core/include_data.sh (( ARG_DEBUG > 0 )) && print -u2 "DEBUG: including ${INCLUDE_FILE}"
else
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_data.sh is not present (tip: run --fix-symlinks)"
exit 1
fi
if [[ -r ${FPATH_PARENT}/core/include_os.sh && -h ${FPATH_PARENT}/core/include_os ]]
then
# shellcheck source=/dev/null # shellcheck source=/dev/null
. ${FPATH_PARENT}/core/include_os.sh . ${INCLUDE_FILE}
else else
print -u2 "ERROR: library file ${FPATH_PARENT}/core/include_os.sh is not present (tip: run --fix-symlinks)" print -u2 "ERROR: library file ${INCLUDE_FILE} exists but has no symlink. Run --fix-symlinks"
exit 1 exit 1
fi fi
done
# check for core directories # check for core directories
[[ -d ${ARCHIVE_DIR} ]] || mkdir -p "${ARCHIVE_DIR}" >/dev/null 2>&1 [[ -d ${ARCHIVE_DIR} ]] || mkdir -p "${ARCHIVE_DIR}" >/dev/null 2>&1
@ -519,9 +515,9 @@ case "${KSH_VERSION}" in
if [[ -z "${ERRNO}" ]] if [[ -z "${ERRNO}" ]]
then then
# shellcheck disable=SC2154 # shellcheck disable=SC2154
(( ARG_DEBUG > 0 )) && print "running ksh: ${.sh.version}" (( ARG_DEBUG > 0 )) && debug "running ksh: ${.sh.version}"
else else
(( ARG_DEBUG > 0 )) && print "running ksh: ksh88 or older" (( ARG_DEBUG > 0 )) && debug "running ksh: ksh88 or older"
fi fi
;; ;;
esac esac

View File

@ -0,0 +1,85 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) include_exadata.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: include_exadata
# DOES: helper functions for Exadata related functions
#
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
# @(#) FUNCTION: version_include_core()
# DOES: dummy function for version placeholder
# EXPECTS: n/a
# RETURNS: 0
function version_include_exadata
{
typeset _VERSION="2019-05-14" # YYYY-MM-DD
print "INFO: $0: ${_VERSION#version_*}"
return 0
}
# -----------------------------------------------------------------------------
# @(#) FUNCTION: exadata_exec_dcli()
# DOES: execute a command via dcli
# EXPECTS: 1=options [string], 2=user [string], 3=host(s) [string],
# 4=SSH options [string], 5=command [string]
# RETURNS: exit code of remote command
# OUTPUTS: STDOUT from DCLI call
# REQUIRES: dcli command-line utility
function exadata_exec_dcli
{
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
typeset _DCLI_OPTS="${1}"
typeset _DCLI_USER="${2}"
typeset _DCLI_HOSTS="${3}"
typeset _SSH_OPTS="${4}"
typeset _DCLI_COMMAND="${5}"
typeset _DCLI_BIN=""
if [[ -z "${_DCLI_USER}" || -z "${_DCLI_HOSTS}" || -z "${_DCLI_COMMAND}" ]]
then
return 255
fi
# find dcli
_DCLI_BIN="$(command -v dcli 2>>${HC_STDERR_LOG})"
if [[ -z "${_DCLI_BIN}" || ! -x ${_DCLI_BIN} ]]
then
# don't spoil STDOUT
ARG_VERBOSE=0 warn "could not determine location for {dcli} (or it is not installed here)"
return 255
fi
# execute dcli
if [[ -z "${_SSH_OPTS}" ]]
then
${_DCLI_BIN} ${_DCLI_OPTS} -l ${_DCLI_USER} -c "${_DCLI_HOSTS}" "${_DCLI_COMMAND}" 2>>${HC_STDERR_LOG} </dev/null
else
${_DCLI_BIN} ${_DCLI_OPTS} -l ${_DCLI_USER} -c "${_DCLI_HOSTS}" -s ${_SSH_OPTS} "${_DCLI_COMMAND}" 2>>${HC_STDERR_LOG} </dev/null
fi
return $?
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -16,7 +16,7 @@
# #
# DOCUMENTATION (MAIN) # DOCUMENTATION (MAIN)
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# @(#) MAIN: include_OS # @(#) MAIN: include_os
# DOES: helper functions for OS related functions # DOES: helper functions for OS related functions
# #
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------

View File

@ -0,0 +1,297 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_alerts.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_alerts
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# data_lc(), data_list_contains_string(), data_is_numeric(),
# data_get_lvalue_from_config(), dump_logs(), exadata_exec_dcli(),
# init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_alerts
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm03: name: 2
#celadm03: alertMessage: "VD bad block table cleared on Adapter 0 VD Target 2"
#celadm03: alertSequenceID: 2
#celadm03: alertShortName: Hardware
#celadm03: alertType: Stateless
#celadm03: beginTime: 2019-04-21T08:17:44+02:00
#celadm03: endTime:
#celadm03: examinedBy:
#celadm03: notificationState: non-deliverable
#celadm03: sequenceBeginTime: 2019-04-21T08:17:44+02:00
#celadm03: severity: info
#celadm03: alertAction: Informational.
typeset _CELL_COMMAND="cellcli -e 'LIST ALERTHISTORY DETAIL'"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_ALERT_SEVERITIES=""
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _LAST_SEQUENCE=0
typeset _STATE_FILE=""
typeset _ALERT_DESCRIPTION=""
typeset _ALERT_SEQUENCE=""
typeset _ALERT_SEVERITY=""
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_ALERT_SEVERITIES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'alert_severities')
if [[ -z "${_CFG_ALERT_SEVERITIES}" ]]
then
warn "no alert severities specified in configuration file at ${_CONFIG_FILE}"
return 1
else
_CFG_ALERT_SEVERITIES=$(data_lc "${_CFG_ALERT_SEVERITIES}")
fi
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
# check state file
_STATE_FILE="${STATE_PERM_DIR}/${_CFG_CELL_SERVER}.alerts"
(( ARG_DEBUG > 0 )) && debug "checking/reading state file at ${_STATE_FILE}"
if [[ -r ${_STATE_FILE} ]]
then
_LAST_SEQUENCE=$(<"${_STATE_FILE}")
if [[ -z "${_LAST_SEQUENCE}" ]]
then
(( ARG_DEBUG > 0 )) && debug "no recorded last log entry for ${_CFG_CELL_SERVER}, resetting to 0"
_LAST_SEQUENCE=0
else
(( ARG_DEBUG > 0 )) && debug "recorded last log entry for ${_CFG_CELL_SERVER}: ${_LAST_SEQUENCE}"
fi
else
: >${_STATE_FILE}
# shellcheck disable=SC2181
(( $? > 0 )) && {
warn "failed to create new state file at ${_STATE_FILE}"
return 1
}
log "created new state file at ${_STATE_FILE}"
fi
# execute remote command
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
# empty _CELL_OUTPUT means alert history reset
# shellcheck disable=SC2181
if (( $? > 0 ))
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# empty alert history?
if [[ -z "${_CELL_OUTPUT}" ]]
then
# zero the state file
if (( ARG_LOG > 0 ))
then
warn "null resetting the current log pointer for ${_CFG_CELL_SERVER}"
: >${_STATE_FILE} 2>>${HC_STDERR_LOG}
fi
fi
fi
# perform checks on cell data
print -R "${_CELL_OUTPUT}" | awk '
BEGIN { found = 0; alert_description = ""; alert_sequence = ""; alert_severity = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /alertDescription/ ) {
found = 1;
alert_description = cell_line[3];
# strip leading spaces & quotes
gsub (/^[[:space:]]*/, "", alert_description);
gsub (/\"/, "", alert_description);
}
if ( cell_line[2] ~ /alertSequenceID/ ) {
alert_sequence = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", alert_sequence);
};
if ( cell_line[2] ~ /severity/ ) {
alert_severity = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", alert_severity);
};
if ( alert_description != "" && alert_sequence != "" && alert_severity != "" && found ) {
printf "%s|%s|%s\n", alert_description, alert_sequence, tolower (alert_severity)
found = 0; alert_description = ""; alert_sequence = ""; alert_severity = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _ALERT_DESCRIPTION _ALERT_SEQUENCE _ALERT_SEVERITY
do
# check for numeric
data_is_numeric "${_ALERT_SEQUENCE}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
warn "non-numeric sequence ID encountered: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]"
continue
fi
if (( _ALERT_SEQUENCE > _LAST_SEQUENCE ))
then
# check severities list
data_list_contains_string "${_CFG_ALERT_SEVERITIES}" "${_ALERT_SEVERITY}"
# shellcheck disable=SC2181
if (( $? == 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring alert because of severity: [${_CFG_CELL_SERVER}/${_ALERT_SEVERITY}/${_ALERT_SEQUENCE}/${_ALERT_DESCRIPTION}]"
continue
else
_MSG="ID=${_ALERT_SEQUENCE} (${_ALERT_SEVERITY}) ${_ALERT_DESCRIPTION}"
if (( _LOG_HEALTHY > 0 ))
then
log_hc "$0" 1 "${_CFG_CELL_SERVER}: ${_MSG}"
fi
fi
else
if (( _LOG_HEALTHY > 0 ))
then
_MSG="no (new) messages discovered from ${_CFG_CELL_SERVER}"
log_hc "$0" 0 "${_MSG}"
fi
fi
# rewrite log pointer from the last log entry we discovered
if (( ARG_LOG > 0 ))
then
(( _ALERT_SEQUENCE == 0 )) && _ALERT_SEQUENCE=${_LAST_SEQUENCE}
(( ARG_DEBUG > 0 )) && debug "updating last log entry for ${_CFG_CELL_SERVER} to ${_ALERT_SEQUENCE}"
print "${_ALERT_SEQUENCE}" >${_STATE_FILE} 2>>${HC_STDERR_LOG}
fi
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
done
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
alert_severities=<list_of_severities_to_report_on>
PURPOSE : Checks the alert history on cell servers (via dcli)
dcli> cellcli -e 'LIST ALERTHISTORY DETAIL'
CAVEAT : Requires a working dcli setup for the root user
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,246 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_celldisks.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_celldisks
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_celldisks
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm01:name: CD_00_celadm01
#celadm01:comment:
#celadm01:creationTime: 2017-08-29T12:46:36+02:00
#celadm01:deviceName: /dev/sda
#celadm01:devicePartition: /dev/sda3
#celadm01:diskType: HardDisk
#celadm01:errorCount: 0
#celadm01:freeSpace: 379.3125G
#celadm01:id: a6ef7710-6fe7-456e-9571-9ee3c8e53c3f
#celadm01:physicalDisk: P6KRUV
#celadm01:size: 7.1194915771484375T
#celadm01:status: normal
typeset _CELL_COMMAND="cellcli -e 'LIST CELLDISK DETAIL'"
typeset _TARGET_STATUS="normal"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_EXCLUDED_DISKS=""
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _CELL_DISK=""
typeset _DISK_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
then
log "excluding following cell disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
fi
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found = 0; cell_disk = ""; disk_status = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /name/ ) {
found = 1;
cell_disk = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", cell_disk);
}
if ( cell_line[2] ~ /status/ ) {
disk_status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", disk_status);
};
if ( cell_disk != "" && disk_status != "" && found ) {
printf "%s|%s|%s\n", cell_line[1], cell_disk, disk_status
found = 0; cell_disk = ""; disk_status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _CELL_DISK _DISK_STATUS
do
# check exclusion list
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_CELL_DISK}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring cell disk ${_CELL_DISK}"
else
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="status of cell disk ${_CELL_SERVER}:/${_CELL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
fi
fi
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
excluded_disks=<list_of_cell_disks_to_exclude>
PURPOSE : Checks the status of cell disks on cell servers (via dcli)
dcli> cellcli -e 'LIST CELLDISK DETAIL'
Target attributes:
* Status: normal
CAVEAT : Requires a working dcli setup for the root user
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,308 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_flash.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_flash
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_flash
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm01: name: celadm01_FLASHCACHE
#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01
#celadm01: creationTime: 2017-06-07T18:48:54+02:00
#celadm01: degradedCelldisks:
#celadm01: effectiveCacheSize: 5.821319580078125T
#celadm01: id: 42423718-e520-4d14-95df-cefc798f528f
#celadm01: size: 5.821319580078125T
#celadm01: status: normal
typeset _CELL_FLASHCACHE_COMMAND="cellcli -e 'LIST FLASHCACHE DETAIL'"
# cell query command -- DO NOT CHANGE --
#celadm01: name: celadm01_FLASHLOG
#celadm01: cellDisk: FD_00_celadm01,FD_01_#celadm01
#celadm01: creationTime: 2017-06-07T18:48:52+02:00
#celadm01: degradedCelldisks:
#celadm01: effectiveSize: 512M
#celadm01: efficiency: 99.37209135951484
#celadm01: id: 40de35b1-84c7-45db-82ec-9eea5f38b40b
#celadm01: size: 512M
#celadm01: status: normal
typeset _CELL_FLASHLOG_COMMAND="cellcli -e 'LIST FLASHLOG DETAIL'"
typeset _TARGET_STATUS="normal"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_CHECK_FLASHCACHE=""
typeset _CHECK_FLASHCACHE=0
typeset _CFG_CHECK_FLASHLOG=""
typeset _CHECK_FLASHLOG=0
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _FLASH_DEVICE=""
typeset _FLASH_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_EXCLUDED_DEVICES=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_devices')
if [[ -n "${_CFG_EXCLUDED_DEVICES}" ]]
then
log "excluding following flash devices from the check: ${_CFG_EXCLUDED_DEVICES}"
fi
_CFG_CHECK_FLASHCACHE=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashcache')
case "${_CFG_CHECK_FLASHCACHE}" in
yes|YES|Yes)
_CHECK_FLASHCACHE=1
;;
*)
_CHECK_FLASHCACHE=0
;;
esac
(( _CHECK_FLASHCACHE > 0 )) || log "checking flash cache has been disabled"
_CFG_CHECK_FLASHLOG=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_flashlog')
case "${_CFG_CHECK_FLASHLOG}" in
yes|YES|Yes)
_CHECK_FLASHLOG=1
;;
*)
_CHECK_FLASHLOG=0
;;
esac
(( _CHECK_FLASHLOG > 0 )) || log "checking flash log has been disabled"
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
# flash cache
if (( _CHECK_FLASHCACHE > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash cache) on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHCACHE_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash cache)"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
fi
# flash log
if (( _CHECK_FLASHLOG > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "executing remote cell script (flash log) on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_FLASHLOG_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER} (flash log)"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found = 0; flash_device = ""; flash_status = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /name/ ) {
found = 1;
flash_device = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", flash_device);
}
if ( cell_line[2] ~ /status/ ) {
flash_status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", flash_status);
};
if ( flash_device != "" && flash_status != "" && found ) {
printf "%s|%s|%s\n", cell_line[1], flash_device, flash_status
found = 0; flash_device = ""; flash_status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _FLASH_DEVICE _FLASH_STATUS
do
# check exclusion list
data_list_contains_string "${_CFG_EXCLUDED_DEVICES}" "${_FLASH_DEVICE}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring flash device ${_FLASH_DEVICE}"
else
if [[ "${_FLASH_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is NOK (${_FLASH_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="status of flash device ${_CELL_SERVER}:/${_FLASH_DEVICE} is OK (${_FLASH_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_FLASH_STATUS}" "${_TARGET_STATUS}"
fi
fi
done
# add dcli output to stdout log
if (( _CHECK_FLASHCACHE > 0 ))
then
print "==== {dcli ${_CELL_FLASHCACHE_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
fi
if (( _CHECK_FLASHLOG > 0 ))
then
print "==== {dcli ${_CELL_FLASHLOG_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
fi
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
excluded_devices=<list_of_flash_devices_to_exclude>
check_flashcache=<yes|no>
check_flashlog=<yes|no>
PURPOSE : Checks the status of the flash cache/log devices on cell servers (via dcli)
dcli> cellcli -e 'LIST FLASHCACHE'
dcli> cellcli -e 'LIST FLASHLOG'
Target attributes:
* Flash cache: normal [optional]
* Flash log: normal [optional]
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,288 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_griddisks.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_griddisks
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_griddisks
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm01: name: RECOC5_CD_03_celadm01
#celadm01: asmDiskGroupName: RECOC5
#celadm01: asmDiskName: RECOC5_CD_03_CELADM01
#celadm01: asmFailGroupName: CELADM01
#celadm01: availableTo:
#celadm01: cachedBy:
#celadm01: cachingPolicy: none
#celadm01: cellDisk: CD_03_celadm01
#celadm01: comment: "Cluster DB diskgroup RECOC5"
#celadm01: creationTime: 2017-09-14T16:21:31+02:00
#celadm01: diskType: HardDisk
#celadm01: errorCount: 0
#celadm01: id: 67c31489-1ab4-4649-85f5-1e65ef9af213
#celadm01: size: 118G
#celadm01: status: active
typeset _CELL_COMMAND="cellcli -e 'LIST GRIDDISK DETAIL'"
typeset _TARGET_STATUS="active"
typeset _TARGET_ERRORS=0
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_CHECK_ERRORCOUNT=""
typeset _CHECK_ERRORCOUNT=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_EXCLUDED_DISKS=""
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _GRID_DISK=""
typeset _DISK_ERRORS=0
typeset _DISK_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
then
log "excluding following grid disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
fi
_CFG_CHECK_ERRORCOUNT=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_errorcount')
case "${_CFG_CHECK_ERRORCOUNT}" in
no|NO|No)
_CHECK_ERRORCOUNT=0
;;
*)
_CHECK_ERRORCOUNT=1
;;
esac
(( _CHECK_ERRORCOUNT > 0 )) || log "checking errorCount has been disabled"
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found = 0; grid_disk = ""; disk_errors = 0; disk_status = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /name/ ) {
found = 1;
grid_disk = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", grid_disk);
}
if ( cell_line[2] ~ /errorCount/ ) {
disk_errors = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", disk_errors);
};
if ( cell_line[2] ~ /status/ ) {
disk_status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", disk_status);
};
if ( grid_disk != "" && disk_status != "" && found ) {
printf "%s|%s|%s|%s\n", cell_line[1], grid_disk, disk_errors, disk_status
found = 0; grid_disk = ""; disk_errors = 0; disk_status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _GRID_DISK _DISK_ERRORS _DISK_STATUS
do
# check exclusion list
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_GRID_DISK}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring grid disk ${_GRID_DISK}"
else
# errorCount
if (( _CHECK_ERRORCOUNT > 0 ))
then
if (( _DISK_ERRORS > _TARGET_COUNT ))
then
_MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_ERRORS}!=${_TARGET_ERRORS})"
_STC=1
else
_MSG="error count of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_ERRORS}==${_TARGET_ERRORS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_ERRORS}" "${_TARGET_ERRORS}"
fi
fi
# status
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="status of grid disk ${_CELL_SERVER}:/${_GRID_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
fi
fi
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
excluded_disks=<list_of_grid_disks_to_exclude>
check_errorcount=<yes|no>
PURPOSE : Checks the status of grid disks on cell servers (via dcli)
dcli> cellcli -e 'LIST GRIDDISK DETAIL'
Target attributes:
* Status: normal
* Error count: 0 [optional]
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,241 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_luns.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: ccheck_exadata_cell_luns
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_luns
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm01: name: 0_2
#celadm01: deviceName: /dev/sdc
#celadm01: diskType: HardDisk
#celadm01: id: 0_2
#celadm01: isSystemLun: FALSE
#celadm01: lunSize: 7.1522655487060546875T
#celadm01: lunUID: 0_2
#celadm01: physicalDrives: 8:2
#celadm01: raidLevel: 0
#celadm01: lunWriteCacheMode: "WriteBack, ReadAheadNone, Direct, No Write Cache if Bad BBU"
#celadm01: status: normal
typeset _CELL_COMMAND="cellcli -e 'LIST LUN DETAIL'"
typeset _TARGET_STATUS="normal"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_EXCLUDED_LUNS=""
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _LUN=""
typeset _LUN_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_EXCLUDED_LUNS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_luns')
[[ -n "${_CFG_EXCLUDED_LUNS}" ]] && log "excluding following LUNs from the check: ${_CFG_EXCLUDED_LUNS}"
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found = 0; lun = ""; lun_status = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /name/ ) {
found = 1;
lun = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", lun);
}
if ( cell_line[2] ~ /status/ ) {
lun_status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", lun_status);
};
if ( lun != "" && lun_status != "" && found ) {
printf "%s|%s|%s\n", cell_line[1], lun, lun_status
found = 0; lun = ""; lun_status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _LUN _LUN_STATUS
do
# check exclusion list
data_list_contains_string "${_CFG_EXCLUDED_LUNS}" "${_LUN}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring LUN ${_LUN}"
else
if [[ "${_LUN_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is NOK (${_LUN_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="status of LUN ${_CELL_SERVER}:/${_LUN} is OK (${_LUN_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_LUN_STATUS}" "${_TARGET_STATUS}"
fi
fi
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
excluded_luns=<list_of_luns_to_exclude>
PURPOSE : Checks the status of LUNs on cell servers (via dcli)
dcli> cellcli -e 'LIST LUN DETAIL'
Target attributes:
* Status: normal
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,392 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_megaraid.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_megaraid
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_megaraid
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
typeset _CELL_COMMAND="/opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_CHECK_CONTROLLER=""
typeset _CHECK_CONTROLLER=0
typeset _CFG_CHECK_BBU=""
typeset _CHECK_BBU=0
typeset _CFG_CHECK_PHYSICAL=""
typeset _CHECK_PHYSICAL=0
typeset _CFG_CHECK_VIRTUAL=""
typeset _CHECK_VIRTUAL=0
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _RAID_DEVICE=""
typeset _RAID_DEVICE_TYPE=""
typeset _RAID_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller')
case "${_CFG_CHECK_CONTROLLER}" in
no|NO|No)
_CHECK_CONTROLLER=0
;;
*)
_CHECK_CONTROLLER=1
;;
esac
(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled"
_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu')
case "${_CFG_CHECK_BBU}" in
no|NO|No)
_CHECK_BBU=0
;;
*)
_CHECK_BBU=1
;;
esac
(( _CHECK_BBU > 0 )) || log "checking bbu has been disabled"
_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical')
case "${_CFG_CHECK_PHYSICAL}" in
no|NO|No)
_CHECK_PHYSICAL=0
;;
*)
_CHECK_PHYSICAL=1
;;
esac
(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled"
_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual')
case "${_CFG_CHECK_VIRTUAL}" in
no|NO|No)
_CHECK_VIRTUAL=0
;;
*)
_CHECK_VIRTUAL=1
;;
esac
(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled"
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found_controller = 0; controller_status = "";
found_bbu = 0; bbu_status = "";
found_physical = 0; physical_device = ""; physical_status = "";
found_virtual = 0; vitual_device = ""; virtual_status = "";
status = "";
}
{
# split cell data line
split ($0, cell_line, ":");
# find markers
if ( cell_line[2] ~ /Controller/ ) {
found_controller = 1;
}
if ( cell_line[2] ~ /BBU/ ) {
found_bbu = 1;
}
if ( cell_line[2] ~ /Connector/ ) {
found_physical = 1;
physical_device = cell_line[4];
# strip leading spaces
gsub (/^[[:space:]]*/, "", physical_device);
}
if ( cell_line[2] ~ /Virtual drive/ ) {
found_virtual = 1;
virtual_device = cell_line[3];
# strip leading spaces
gsub (/^[[:space:]]*/, "", virtual_device);
}
# find attributes
if ( cell_line[2] ~ /Status/ ) {
status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", status);
if (found_controller > 0 ) { controller_status = status }
if (found_bbu > 0 ) {
# delete the PITA "PD" string
gsub (/[[:space:]]*PD[[:space:]]*/, "", status);
bbu_status = status;
}
};
if ( cell_line[2] ~ /State/ ) {
status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", status);
if (found_physical > 0 ) { physical_status = status }
if (found_virtual > 0 ) { virtual_status = status }
};
# report results
if ( controller_status != "" && found_controller ) {
printf "%s|%s|%s|%s\n", cell_line[1], "CONTROLLER", "", controller_status
found_controller = 0; controller_status = ""; status = "";
}
if ( bbu_status != "" && found_bbu ) {
printf "%s|%s|%s|%s\n", cell_line[1], "BBU", "", bbu_status
found_bbu = 0; bbu_status = ""; status = "";
}
if ( physical_device != "" && physical_status != "" && found_physical ) {
printf "%s|%s|%s|%s\n", cell_line[1], "PHYSICAL", physical_device, physical_status
found_physical = 0; physical_device = ""; physical_status = ""; status = "";
}
if ( virtual_device != "" && virtual_status != "" && found_virtual ) {
printf "%s|%s|%s|%s\n", cell_line[1], "VIRTUAL", virtual_device, virtual_status
found_virtual = 0; virtual_device = ""; virtual_status = ""; status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS
do
case "${_RAID_DEVICE_TYPE}" in
CONTROLLER)
if (( _CHECK_CONTROLLER > 0 ))
then
_TARGET_STATUS="Optimal"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of controller on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of controller on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for controller (disabled)"
fi
;;
BBU)
if (( _CHECK_BBU > 0 ))
then
_TARGET_STATUS="Healthy"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of bbu on ${_CELL_SERVER} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of bbu on ${_CELL_SERVER} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for bbu (disabled)"
fi
;;
PHYSICAL)
if (( _CHECK_PHYSICAL > 0 ))
then
_TARGET_STATUS="Online"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of physical device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of physical device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)"
fi
;;
VIRTUAL)
if (( _CHECK_VIRTUAL > 0 ))
then
_TARGET_STATUS="Optimal"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of virtual device ${_CELL_SERVER}:/${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of virtual device on ${_CELL_SERVER}:/${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_CELL_SERVER}:/${_RAID_DEVICE}] (disabled)"
fi
;;
esac
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
check_controller=<yes|no>
check_bbu=<yes|no>
check_physical=<yes|no>
check_virtual=<yes|no>
PURPOSE : Checks the status of MegaRAID device(s) on cell servers (via dcli)
dcli> /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL
Target attributes:
* Controller: Optimal [optional]
* BBU: Healthy [optional]
* Physical devices: Online [optional]
* Virtual devices: Optimal [optional]
CAVEAT : Requires a working dcli setup for the root user
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,252 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_cell_physicaldisks.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_cell_physicaldisks
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), exadata_exec_dcli(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_cell_physicaldisks
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# cell query command -- DO NOT CHANGE --
#celadm01: name: 8:5
#celadm01: deviceId: 12
#celadm01: deviceName: /dev/sdf
#celadm01: diskType: HardDisk
#celadm01: enclosureDeviceId: 8
#celadm01: errOtherCount: 0
#celadm01: luns: 0_5
#celadm01: makeModel: "HGST H7280A520SUN8.0T"
#celadm01: physicalFirmware: PD51
#celadm01: physicalInsertTime: 2017-06-07T14:24:51+02:00
#celadm01: physicalInterface: sas
#celadm01: physicalSerial: P9MG6V
#celadm01: physicalSize: 7.1536639072000980377197265625T
#celadm01: slotNumber: 5
#celadm01: status: normal
typeset _CELL_COMMAND="cellcli -e 'LIST PHYSICALDISK DETAIL'"
typeset _TARGET_STATUS="normal"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_DCLI_USER=""
typeset _CFG_CELL_SERVERS=""
typeset _CFG_CELL_SERVER=""
typeset _CFG_EXCLUDED_DISKS=""
typeset _CELL_OUTPUT=""
typeset _CELL_DATA=""
typeset _PHYSICAL_DISK=""
typeset _DISK_STATUS=""
typeset _CELL_ALL_RC=0
typeset _CELL_RC=0
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_DCLI_USER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'dcli_user')
if [[ -z "${_CFG_DCLI_USER}" ]]
then
_CFG_DCLI_USER="root"
log "will use DCLI user ${_CFG_DCLI_USER}"
fi
_CFG_CELL_SERVERS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'cell_servers')
if [[ -z "${_CFG_CELL_SERVERS}" ]]
then
warn "no cell servers specified in configuration file at ${_CONFIG_FILE}"
return 1
fi
_CFG_EXCLUDED_DISKS=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'excluded_disks')
if [[ -n "${_CFG_EXCLUDED_DISKS}" ]]
then
log "excluding following physical disk(s) from the check: ${_CFG_EXCLUDED_DISKS}"
fi
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# gather cell data (serialized way to have better control of output & errors)
data_comma2newline "${_CFG_CELL_SERVERS}" | while read -r _CFG_CELL_SERVER
do
(( ARG_DEBUG > 0 )) && debug "executing remote cell script on ${_CFG_CELL_SERVER}"
_CELL_OUTPUT=$(exadata_exec_dcli "" "${_CFG_DCLI_USER}" "${_CFG_CELL_SERVER}" "" "${_CELL_COMMAND}" 2>>${HC_STDERR_LOG})
_CELL_RC=$?
if (( _CELL_RC > 0 )) || [[ -z "${_CELL_OUTPUT}" ]]
then
_CELL_ALL_RC=$(( _CELL_ALL_RC + _CELL_RC ))
warn "unable to discover cell data on ${_CFG_CELL_SERVER}"
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
continue
else
# _CELL_OUTPUT is always prefixed by cell server name, so no mangling needed
# shellcheck disable=SC1117
_CELL_DATA=$(printf "%s\n%s\n" "${_CELL_DATA}" "${_CELL_OUTPUT}")
fi
done
# validate cell data
if (( _CELL_ALL_RC > 0 )) || [[ -z "${_CELL_DATA}" ]]
then
_MSG="did not discover cell data or one of the discoveries failed"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
return 1
fi
# perform checks on cell data
print -R "${_CELL_DATA}" | awk '
BEGIN { found = 0; physical_disk = ""; disk_status = ""; }
{
# split cell data line
split ($0, cell_line, ":");
if ( cell_line[2] ~ /name/ ) {
found = 1;
physical_disk = cell_line[3];
if (cell_line[4] != "") {
physical_disk = physical_disk ":" cell_line[4];
}
# strip spaces
gsub (/[[:space:]]/, "", physical_disk);
}
if ( cell_line[2] ~ /status/ ) {
disk_status = cell_line[3];
# strip spaces
gsub (/[[:space:]]/, "", disk_status);
};
if ( physical_disk != "" && disk_status != "" && found ) {
printf "%s|%s|%s\n", cell_line[1], physical_disk, disk_status
found = 0; physical_disk = ""; disk_status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _CELL_SERVER _PHYSICAL_DISK _DISK_STATUS
do
# check exclusion list
data_list_contains_string "${_CFG_EXCLUDED_DISKS}" "${_PHYSICAL_DISK}"
# shellcheck disable=SC2181
if (( $? > 0 ))
then
(( ARG_DEBUG > 0 )) && debug "ignoring physical disk ${_PHYSICAL_DISK}"
else
# status
if [[ "${_DISK_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is NOK (${_DISK_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="status of physical disk ${_CELL_SERVER}:/${_PHYSICAL_DISK} is OK (${_DISK_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_DISK_STATUS}" "${_TARGET_STATUS}"
fi
fi
done
# add dcli output to stdout log
print "==== {dcli ${_CELL_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CELL_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
dlci_user=<dlci_user_account>
cell_servers=<list_of_cell_servers>
excluded_disks=<list_of_physical_disks_to_exclude>
PURPOSE : Checks the status of physical disks on cell servers (via dcli)
dcli> cellcli -e 'LIST PHYSICALDISK DETAIL'
Target attributes:
* Status: normal
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -0,0 +1,365 @@
#!/usr/bin/env ksh
#******************************************************************************
# @(#) check_exadata_megaraid.sh
#******************************************************************************
# @(#) Copyright (C) 2019 by KUDOS BVBA (info@kudos.be). All rights reserved.
#
# This program is a free software; you can redistribute it and/or modify
# it under the same terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#******************************************************************************
#
# DOCUMENTATION (MAIN)
# -----------------------------------------------------------------------------
# @(#) MAIN: check_exadata_megaraid
# DOES: see _show_usage()
# EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), data_comma2newline(), data_get_lvalue_from_config,
# dump_logs(), init_hc(), log_hc(), warn()
#
# @(#) HISTORY:
# @(#) 2019-05-14: initial version [Patrick Van der Veken]
# -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#******************************************************************************
# -----------------------------------------------------------------------------
function check_exadata_megaraid
{
# ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
typeset _MEGACLI_BIN="/opt/MegaRAID/MegaCli/MegaCli64"
typeset _MEGACLI_COMMAND="-ShowSummary -aALL"
# ------------------------- CONFIGURATION ends here ---------------------------
# set defaults
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && set ${DEBUG_OPTS}
init_hc "$0" "${_SUPPORTED_PLATFORMS}" "${_VERSION}"
typeset _ARGS=$(data_comma2space "$*")
typeset _ARG=""
typeset _MSG=""
typeset _STC=0
typeset _CFG_HEALTHY=""
typeset _LOG_HEALTHY=0
typeset _CFG_CHECK_CONTROLLER=""
typeset _CHECK_CONTROLLER=0
typeset _CFG_CHECK_BBU=""
typeset _CHECK_BBU=0
typeset _CFG_CHECK_PHYSICAL=""
typeset _CHECK_PHYSICAL=0
typeset _CFG_CHECK_VIRTUAL=""
typeset _CHECK_VIRTUAL=0
typeset _CLI_OUTPUT=""
typeset _CLI_DATA=""
typeset _RAID_DEVICE=""
typeset _RAID_DEVICE_TYPE=""
typeset _RAID_STATUS=""
# handle arguments (originally comma-separated)
for _ARG in ${_ARGS}
do
case "${_ARG}" in
help)
_show_usage $0 ${_VERSION} ${_CONFIG_FILE} && return 0
;;
esac
done
# handle configuration file
[[ -n "${ARG_CONFIG_FILE}" ]] && _CONFIG_FILE="${ARG_CONFIG_FILE}"
if [[ ! -r ${_CONFIG_FILE} ]]
then
warn "unable to read configuration file at ${_CONFIG_FILE}"
return 1
fi
# read configuration values
_CFG_HEALTHY=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'log_healthy')
case "${_CFG_HEALTHY}" in
yes|YES|Yes)
_LOG_HEALTHY=1
;;
*)
# do not override hc_arg
(( _LOG_HEALTHY > 0 )) || _LOG_HEALTHY=0
;;
esac
_CFG_CHECK_CONTROLLER=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_controller')
case "${_CFG_CHECK_CONTROLLER}" in
no|NO|No)
_CHECK_CONTROLLER=0
;;
*)
_CHECK_CONTROLLER=1
;;
esac
(( _CHECK_CONTROLLER > 0 )) || log "checking controller has been disabled"
_CFG_CHECK_BBU=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_bbu')
case "${_CFG_CHECK_BBU}" in
no|NO|No)
_CHECK_BBU=0
;;
*)
_CHECK_BBU=1
;;
esac
(( _CHECK_BBU > 0 )) || log "checking bbu has been disabled"
_CFG_CHECK_PHYSICAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_physical')
case "${_CFG_CHECK_PHYSICAL}" in
no|NO|No)
_CHECK_PHYSICAL=0
;;
*)
_CHECK_PHYSICAL=1
;;
esac
(( _CHECK_PHYSICAL > 0 )) || log "checking physical has been disabled"
_CFG_CHECK_VIRTUAL=$(_CONFIG_FILE="${_CONFIG_FILE}" data_get_lvalue_from_config 'check_virtual')
case "${_CFG_CHECK_VIRTUAL}" in
no|NO|No)
_CHECK_VIRTUAL=0
;;
*)
_CHECK_VIRTUAL=1
;;
esac
(( _CHECK_VIRTUAL > 0 )) || log "checking virtual has been disabled"
# log_healthy
(( ARG_LOG_HEALTHY > 0 )) && _LOG_HEALTHY=1
if (( _LOG_HEALTHY > 0 ))
then
if (( ARG_LOG > 0 ))
then
log "logging/showing passed health checks"
else
log "showing passed health checks (but not logging)"
fi
else
log "not logging/showing passed health checks"
fi
# check megacli
if [[ ! -x ${_MEGACLI_BIN} || -z "${_MEGACLI_BIN}" ]]
then
warn "MegaCLI is not installed here. This is not an Exadata compute node?"
return 1
fi
# gather MegaCLI data
(( ARG_DEBUG > 0 )) && debug "executing MegaCLI command"
_CLI_OUTPUT=$(${_MEGACLI_BIN} "${_MEGACLI_COMMAND}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $?> 0 )) || [[ -z "${_CLI_OUTPUT}" ]]
then
_MSG="unable to query MegaRAID controller"
_STC=2
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}"
fi
(( ARG_DEBUG > 0 && ARG_DEBUG_LEVEL > 0 )) && dump_logs
return 1
fi
# perform checks on cell data
print -R "${_CLI_OUTPUT}" | awk '
BEGIN { found_controller = 0; controller_status = "";
found_bbu = 0; bbu_status = "";
found_physical = 0; physical_device = ""; physical_status = "";
found_virtual = 0; vitual_device = ""; virtual_status = "";
status = "";
}
{
# split cell data line
split ($0, cell_line, ":");
# find markers
if ( cell_line[1] ~ /Controller/ ) {
found_controller = 1;
}
if ( cell_line[1] ~ /BBU/ ) {
found_bbu = 1;
}
if ( cell_line[1] ~ /Connector/ ) {
found_physical = 1;
physical_device = cell_line[3];
# strip leading & trailing spaces
gsub (/^[[:space:]]*/, "", physical_device);
gsub (/[[:space:]]*$/, "", physical_device);
}
if ( cell_line[1] ~ /Virtual drive/ ) {
found_virtual = 1;
virtual_device = cell_line[2];
# strip leading spaces
gsub (/^[[:space:]]*/, "", virtual_device);
}
# find attributes
if ( cell_line[1] ~ /Status/ ) {
status = cell_line[2];
# strip spaces
gsub (/[[:space:]]/, "", status);
if (found_controller > 0 ) { controller_status = status }
if (found_bbu > 0 ) {
# delete the PITA "PD" string
gsub (/[[:space:]]*PD[[:space:]]*/, "", status);
bbu_status = status;
}
};
if ( cell_line[1] ~ /State/ ) {
status = cell_line[2];
# strip spaces
gsub (/[[:space:]]/, "", status);
if (found_physical > 0 ) { physical_status = status }
if (found_virtual > 0 ) { virtual_status = status }
};
# report results
if ( controller_status != "" && found_controller ) {
printf "%s|%s|%s\n", "CONTROLLER", "", controller_status
found_controller = 0; controller_status = ""; status = "";
}
if ( bbu_status != "" && found_bbu ) {
printf "%s|%s|%s\n", "BBU", "", bbu_status
found_bbu = 0; bbu_status = ""; status = "";
}
if ( physical_device != "" && physical_status != "" && found_physical ) {
printf "%s|%s|%s\n", "PHYSICAL", physical_device, physical_status
found_physical = 0; physical_device = ""; physical_status = ""; status = "";
}
if ( virtual_device != "" && virtual_status != "" && found_virtual ) {
printf "%s|%s|%s\n", "VIRTUAL", virtual_device, virtual_status
found_virtual = 0; virtual_device = ""; virtual_status = ""; status = "";
}
}' 2>>${HC_STDERR_LOG} | while IFS='|' read -r _RAID_DEVICE_TYPE _RAID_DEVICE _RAID_STATUS
do
case "${_RAID_DEVICE_TYPE}" in
CONTROLLER)
if (( _CHECK_CONTROLLER > 0 ))
then
_TARGET_STATUS="Optimal"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of controller is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of controller is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for controller (disabled)"
fi
;;
BBU)
if (( _CHECK_BBU > 0 ))
then
_TARGET_STATUS="Healthy"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of bbu is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of bbu is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for bbu (disabled)"
fi
;;
PHYSICAL)
if (( _CHECK_PHYSICAL > 0 ))
then
_TARGET_STATUS="Online"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of physical device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of physical device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for physical device [${_RAID_DEVICE}] (disabled)"
fi
;;
VIRTUAL)
if (( _CHECK_VIRTUAL > 0 ))
then
_TARGET_STATUS="Optimal"
if [[ "${_RAID_STATUS}" != "${_TARGET_STATUS}" ]]
then
_MSG="state of virtual device ${_RAID_DEVICE} is NOK (${_RAID_STATUS}!=${_TARGET_STATUS})"
_STC=1
else
_MSG="state of virtual device on ${_RAID_DEVICE} is OK (${_RAID_STATUS}==${_TARGET_STATUS})"
_STC=0
fi
if (( _LOG_HEALTHY > 0 || _STC > 0 ))
then
log_hc "$0" ${_STC} "${_MSG}" "${_RAID_STATUS}" "${_TARGET_STATUS}"
fi
else
(( ARG_DEBUG > 0 )) && debug "skipping check for virtual device [${_RAID_DEVICE}] (disabled)"
fi
;;
esac
done
# add dcli output to stdout log
print "==== {${_MEGACLI_COMMAND}} ====" >>${HC_STDOUT_LOG}
print "${_CLI_DATA}" >>${HC_STDOUT_LOG}
return 0
}
# -----------------------------------------------------------------------------
function _show_usage
{
cat <<- EOT
NAME : $1
VERSION : $2
CONFIG : $3 with parameters:
log_healthy=<yes|no>
check_controller=<yes|no>
check_bbu=<yes|no>
check_physical=<yes|no>
check_virtual=<yes|no>
PURPOSE : Checks the status of MegaRAID device(s)
# /opt/MegaRAID/MegaCli/MegaCli64 -ShowSummary -aALL
Target attributes:
* Controller: Optimal [optional]
* BBU: Healthy [optional]
* Physical devices: Online [optional]
* Virtual devices: Optimal [optional]
LOG HEALTHY : Supported
EOT
return 0
}
#******************************************************************************
# END of script
#******************************************************************************

View File

@ -19,12 +19,13 @@
# @(#) MAIN: check_exadata_zfs_logs # @(#) MAIN: check_exadata_zfs_logs
# DOES: see _show_usage() # DOES: see _show_usage()
# EXPECTS: see _show_usage() # EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), # REQUIRES: data_comma2space(), data_get_lvalue_from_config(), dump_logs(),
# log_hc(), warn() # init_hc(), linux_exec_ssh(), log_hc(), warn()
# #
# @(#) HISTORY: # @(#) HISTORY:
# @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-02-18: initial version [Patrick Van der Veken]
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
# @(#) 2019-05-14: _STC fix [Patrick Van der Veken]
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#****************************************************************************** #******************************************************************************
@ -34,7 +35,7 @@ function check_exadata_zfs_logs
{ {
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-03-16" # YYYY-MM-DD typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# ------------------------- CONFIGURATION ends here --------------------------- # ------------------------- CONFIGURATION ends here ---------------------------
@ -275,6 +276,7 @@ do
fi fi
else else
: >${_STATE_FILE} : >${_STATE_FILE}
# shellcheck disable=SC2181
(( $? > 0 )) && { (( $? > 0 )) && {
warn "failed to create new state file at ${_STATE_FILE}" warn "failed to create new state file at ${_STATE_FILE}"
return 1 return 1
@ -284,6 +286,7 @@ do
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST} for log ${_ZFS_LOG}" (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST} for log ${_ZFS_LOG}"
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
then then
warn "unable to discover ${_ZFS_LOG} log data on ${_CFG_ZFS_HOST}" warn "unable to discover ${_ZFS_LOG} log data on ${_CFG_ZFS_HOST}"
@ -420,7 +423,7 @@ do
;; ;;
esac esac
else else
if (( _LOG_HEALTHY > 0 || _STC > 0 )) if (( _LOG_HEALTHY > 0 ))
then then
_MSG="no (new) messages discovered from ${_CFG_ZFS_HOST}:/${_ZFS_LOG}" _MSG="no (new) messages discovered from ${_CFG_ZFS_HOST}:/${_ZFS_LOG}"
log_hc "$0" 0 "${_MSG}" log_hc "$0" 0 "${_MSG}"
@ -453,14 +456,14 @@ CONFIG : $3 with parameters:
ssh_key_file=<ssh_private_key_file> ssh_key_file=<ssh_private_key_file>
and formatted stanzas of: and formatted stanzas of:
zfs:<host_name>:<alert|fltlog|scrk|system>:<filters> zfs:<host_name>:<alert|fltlog|scrk|system>:<filters>
PURPOSE : checks the ZFS logs for (new) entries with particular alert level(s) PURPOSE : Checks the ZFS logs for (new) entries with particular alert level(s)
Following logs are supported (filters in brackets): Following logs are supported (filters in brackets):
* alert (critical,major,minor) * alert (critical,major,minor)
* fltlog (critical,major,minor) * fltlog (critical,major,minor)
* system (error) * system (error)
* scrk (failed) * scrk (failed)
CLI: zfs > maintenance > logs > select (log) > show CLI: zfs > maintenance > logs > select (log) > show
CAVEAT: plugin will use state files to track 'seen' messages. However each CAVEAT: Plugin will use state files to track 'seen' messages. However each
check will only retrieve the default 100 last log entries. So it check will only retrieve the default 100 last log entries. So it
is possible that log entries are lost between health checks (this is possible that log entries are lost between health checks (this
can be avoided by scheduling the check quicker than the likely can be avoided by scheduling the check quicker than the likely

View File

@ -19,11 +19,12 @@
# @(#) MAIN: check_exadata_zfs_pool_usage # @(#) MAIN: check_exadata_zfs_pool_usage
# DOES: see _show_usage() # DOES: see _show_usage()
# EXPECTS: see _show_usage() # EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), # REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(),
# log_hc(), warn() # init_hc(), linux_exec_ssh(), log_hc(), warn()
# #
# @(#) HISTORY: # @(#) HISTORY:
# @(#) 2019-04-12: initial version [Patrick Van der Veken] # @(#) 2019-04-12: initial version [Patrick Van der Veken]
# @(#) 2019-05-14: small fixes [Patrick Van der Veken]
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#****************************************************************************** #******************************************************************************
@ -33,7 +34,7 @@ function check_exadata_zfs_pool_usage
{ {
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-04-12" # YYYY-MM-DD typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# usage query script -- DO NOT CHANGE -- # usage query script -- DO NOT CHANGE --
# prj1:share1:16 # prj1:share1:16
@ -166,6 +167,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST
do do
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}"
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
then then
warn "unable to discover usage data on ${_CFG_ZFS_HOST}" warn "unable to discover usage data on ${_CFG_ZFS_HOST}"
@ -180,7 +182,7 @@ do
_ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}"
else else
# shellcheck disable=SC1117 # shellcheck disable=SC1117
_ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}")
fi fi
done done
fi fi
@ -230,6 +232,7 @@ do
if [[ -n "${_CFG_SPACE_THRESHOLD}" ]] if [[ -n "${_CFG_SPACE_THRESHOLD}" ]]
then then
data_is_numeric "${_CFG_SPACE_THRESHOLD}" data_is_numeric "${_CFG_SPACE_THRESHOLD}"
# shellcheck disable=SC2181
if (( $? > 0 )) if (( $? > 0 ))
then then
warn "value for <max_space_threshold> is not numeric in configuration file ${_CONFIG_FILE}" warn "value for <max_space_threshold> is not numeric in configuration file ${_CONFIG_FILE}"

View File

@ -19,12 +19,13 @@
# @(#) MAIN: check_exadata_zfs_services # @(#) MAIN: check_exadata_zfs_services
# DOES: see _show_usage() # DOES: see _show_usage()
# EXPECTS: see _show_usage() # EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), # REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(),
# log_hc(), warn() # init_hc(), linux_exec_ssh(), log_hc(), warn()
# #
# @(#) HISTORY: # @(#) HISTORY:
# @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-02-18: initial version [Patrick Van der Veken]
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
# @(#) 2019-05-14: small fixes [Patrick Van der Veken]
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#****************************************************************************** #******************************************************************************
@ -34,7 +35,7 @@ function check_exadata_zfs_services
{ {
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-03-16" # YYYY-MM-DD typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# usage query script -- DO NOT CHANGE -- # usage query script -- DO NOT CHANGE --
# svc1:online # svc1:online
@ -158,6 +159,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST
do do
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}"
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
then then
warn "unable to discover services data on ${_CFG_ZFS_HOST}" warn "unable to discover services data on ${_CFG_ZFS_HOST}"
@ -172,7 +174,7 @@ do
_ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}"
else else
# shellcheck disable=SC1117 # shellcheck disable=SC1117
_ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}")
fi fi
done done
fi fi
@ -213,10 +215,10 @@ do
then then
if [[ $(data_lc "${_SERVICE_STATE}") != $(data_lc "${_CFG_SERVICE_STATE}") ]] if [[ $(data_lc "${_SERVICE_STATE}") != $(data_lc "${_CFG_SERVICE_STATE}") ]]
then then
_MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is incorrect (${_SERVICE_STATE}!=${_CFG_SERVICE_STATE})" _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is NOK (${_SERVICE_STATE}!=${_CFG_SERVICE_STATE})"
_STC=1 _STC=1
else else
_MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is correct (${_SERVICE_STATE}=${_CFG_SERVICE_STATE})" _MSG="state of ${_CFG_ZFS_HOST}/${_CFG_SERVICE_NAME} is OK (${_SERVICE_STATE}==${_CFG_SERVICE_STATE})"
_STC=0 _STC=0
fi fi
if (( _LOG_HEALTHY > 0 || _STC > 0 )) if (( _LOG_HEALTHY > 0 || _STC > 0 ))

View File

@ -27,6 +27,7 @@
# @(#) 2019-02-19: fix for <unknown> replication value [Patrick Van der Veken] # @(#) 2019-02-19: fix for <unknown> replication value [Patrick Van der Veken]
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
# @(#) 2019-04-12: small fixes [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken]
# @(#) 2019-05-14: small fixes [Patrick Van der Veken]
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#****************************************************************************** #******************************************************************************
@ -36,7 +37,7 @@ function check_exadata_zfs_share_replication
{ {
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-04-12" # YYYY-MM-DD typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# replication query script -- DO NOT CHANGE -- # replication query script -- DO NOT CHANGE --
# prj1/share1:true:idle:success:111 # prj1/share1:true:idle:success:111
@ -170,6 +171,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST
do do
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}"
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
then then
warn "unable to discover replication data on ${_CFG_ZFS_HOST}" warn "unable to discover replication data on ${_CFG_ZFS_HOST}"
@ -184,7 +186,7 @@ do
_ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}"
else else
# shellcheck disable=SC1117 # shellcheck disable=SC1117
_ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}")
fi fi
done done
fi fi
@ -240,6 +242,7 @@ do
if [[ -n "${_CFG_REPLICATION_LAG}" ]] if [[ -n "${_CFG_REPLICATION_LAG}" ]]
then then
data_is_numeric "${_CFG_REPLICATION_LAG}" data_is_numeric "${_CFG_REPLICATION_LAG}"
# shellcheck disable=SC2181
if (( $? > 0 )) if (( $? > 0 ))
then then
warn "value for <max_replication_lag> is not numeric in configuration file ${_CONFIG_FILE}" warn "value for <max_replication_lag> is not numeric in configuration file ${_CONFIG_FILE}"
@ -263,10 +266,10 @@ do
# check replication enabled state (active or not?) # check replication enabled state (active or not?)
if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]] if [[ $(data_lc "${_REPLICATION_ENABLED}") != $(data_lc "${_CFG_REPLICATION_ENABLED}") ]]
then then
_MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is incorrect [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]" _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_ENABLED}!=${_CFG_REPLICATION_ENABLED}]"
_STC=1 _STC=1
else else
_MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is correct [${_REPLICATION_ENABLED}=${_CFG_REPLICATION_ENABLED}]" _MSG="state for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_ENABLED}==${_CFG_REPLICATION_ENABLED}]"
_STC=0 _STC=0
fi fi
if (( _LOG_HEALTHY > 0 || _STC > 0 )) if (( _LOG_HEALTHY > 0 || _STC > 0 ))
@ -276,10 +279,10 @@ do
# check replication last result (success or not?) # check replication last result (success or not?)
if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]] if [[ $(data_lc "${_REPLICATION_RESULT}") != $(data_lc "${_CFG_REPLICATION_RESULT}") ]]
then then
_MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is incorrect [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]" _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is NOK [${_REPLICATION_RESULT}!=${_CFG_REPLICATION_RESULT}]"
_STC=1 _STC=1
else else
_MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is correct [${_REPLICATION_RESULT}=${_CFG_REPLICATION_RESULT}]" _MSG="result for ${_ZFS_HOST}:${_REPLICATION_NAME} is OK [${_REPLICATION_RESULT}==${_CFG_REPLICATION_RESULT}]"
_STC=0 _STC=0
fi fi
if (( _LOG_HEALTHY > 0 || _STC > 0 )) if (( _LOG_HEALTHY > 0 || _STC > 0 ))
@ -289,6 +292,7 @@ do
# check replication lag # check replication lag
# caveat: replication lag is <unknown> at initial replication # caveat: replication lag is <unknown> at initial replication
data_contains_string "${_REPLICATION_LAG}" "unknown" data_contains_string "${_REPLICATION_LAG}" "unknown"
# shellcheck disable=SC2181
if (( $? > 0 )) if (( $? > 0 ))
then then
_MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown" _MSG="lag for ${_ZFS_HOST}:${_REPLICATION_NAME} is unknown"

View File

@ -19,14 +19,15 @@
# @(#) MAIN: check_exadata_zfs_share_usage # @(#) MAIN: check_exadata_zfs_share_usage
# DOES: see _show_usage() # DOES: see _show_usage()
# EXPECTS: see _show_usage() # EXPECTS: see _show_usage()
# REQUIRES: data_comma2space(), dump_logs(), init_hc(), linux_exec_ssh(), # REQUIRES: data_comma2space(), data_get_lvalue_from_config, dump_logs(),
# log_hc(), warn() # init_hc(), linux_exec_ssh(), log_hc(), warn()
# #
# @(#) HISTORY: # @(#) HISTORY:
# @(#) 2019-02-18: initial version [Patrick Van der Veken] # @(#) 2019-02-18: initial version [Patrick Van der Veken]
# @(#) 2019-03-16: replace 'which' [Patrick Van der Veken] # @(#) 2019-03-16: replace 'which' [Patrick Van der Veken]
# @(#) 2019-04-09: fix bad math in ZFS script & HC message [Patrick Van der Veken] # @(#) 2019-04-09: fix bad math in ZFS script & HC message [Patrick Van der Veken]
# @(#) 2019-04-12: small fixes [Patrick Van der Veken] # @(#) 2019-04-12: small fixes [Patrick Van der Veken]
# @(#) 2019-05-14: small fixes [Patrick Van der Veken]
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING! # DO NOT CHANGE THIS FILE UNLESS YOU KNOW WHAT YOU ARE DOING!
#****************************************************************************** #******************************************************************************
@ -36,7 +37,7 @@ function check_exadata_zfs_share_usage
{ {
# ------------------------- CONFIGURATION starts here ------------------------- # ------------------------- CONFIGURATION starts here -------------------------
typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf" typeset _CONFIG_FILE="${CONFIG_DIR}/$0.conf"
typeset _VERSION="2019-04-12" # YYYY-MM-DD typeset _VERSION="2019-05-14" # YYYY-MM-DD
typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match typeset _SUPPORTED_PLATFORMS="Linux" # uname -s match
# usage query script -- DO NOT CHANGE -- # usage query script -- DO NOT CHANGE --
# prj1:share1:16 # prj1:share1:16
@ -178,6 +179,7 @@ print "${_CFG_ZFS_HOSTS}" | while read -r _CFG_ZFS_HOST
do do
(( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}" (( ARG_DEBUG > 0 )) && debug "executing remote ZFS script on ${_CFG_ZFS_HOST}"
_SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG}) _SSH_OUTPUT=$(linux_exec_ssh "${_CFG_SSH_OPTS}" "${_CFG_SSH_USER}" "${_CFG_ZFS_HOST}" "${_ZFS_SCRIPT}" 2>>${HC_STDERR_LOG})
# shellcheck disable=SC2181
if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]] if (( $? > 0 )) || [[ -z "${_SSH_OUTPUT}" ]]
then then
warn "unable to discover usage data on ${_CFG_ZFS_HOST}" warn "unable to discover usage data on ${_CFG_ZFS_HOST}"
@ -192,7 +194,7 @@ do
_ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA="${_CFG_ZFS_HOST}:${_SSH_LINE}"
else else
# shellcheck disable=SC1117 # shellcheck disable=SC1117
_ZFS_DATA="${_ZFS_DATA}\n${_CFG_ZFS_HOST}:${_SSH_LINE}" _ZFS_DATA=$(printf "%s\n%s:%s" "${_ZFS_DATA}" "${_CFG_ZFS_HOST}" "${_SSH_LINE}")
fi fi
done done
fi fi
@ -242,6 +244,7 @@ do
if [[ -n "${_CFG_SPACE_THRESHOLD}" ]] if [[ -n "${_CFG_SPACE_THRESHOLD}" ]]
then then
data_is_numeric "${_CFG_SPACE_THRESHOLD}" data_is_numeric "${_CFG_SPACE_THRESHOLD}"
# shellcheck disable=SC2181
if (( $? > 0 )) if (( $? > 0 ))
then then
warn "value for <max_space_threshold> is not numeric in configuration file ${_CONFIG_FILE}" warn "value for <max_space_threshold> is not numeric in configuration file ${_CONFIG_FILE}"