aboutsummaryrefslogtreecommitdiffstats
path: root/net-mgmt
diff options
context:
space:
mode:
authorpgollucci <pgollucci@FreeBSD.org>2010-12-10 12:51:19 +0800
committerpgollucci <pgollucci@FreeBSD.org>2010-12-10 12:51:19 +0800
commit0d5ed470858d6c2e09dd39eb501b2d25e5c541db (patch)
tree61f726cd55847aa6a6479c8f22ec5eff026922cc /net-mgmt
parent11d5fb3f38ff655c13ad1f161f65254f4a986604 (diff)
downloadfreebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.tar.gz
freebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.tar.zst
freebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.zip
check_hdd_health is a Nagios plug-in written in shell to check HDD health.
This script check HDD from S.M.A.R.T this values: - Spin Retry Count - Reallocated Sector Ct - Reallocated Event Count - Current Pending Sector - Offline Uncorrectable - Total health test PR: ports/152916 Submitted by: jamrich.majo at gmail.com
Diffstat (limited to 'net-mgmt')
-rw-r--r--net-mgmt/Makefile1
-rw-r--r--net-mgmt/nagios-check_hdd_health/Makefile24
-rw-r--r--net-mgmt/nagios-check_hdd_health/distinfo2
-rw-r--r--net-mgmt/nagios-check_hdd_health/pkg-descr8
-rw-r--r--net-mgmt/nagios-check_hdd_health/pkg-plist2
-rw-r--r--net-mgmt/nagios-check_hdd_health/src/check_hdd_health172
6 files changed, 209 insertions, 0 deletions
diff --git a/net-mgmt/Makefile b/net-mgmt/Makefile
index 1a0407acd6a6..627487d5367c 100644
--- a/net-mgmt/Makefile
+++ b/net-mgmt/Makefile
@@ -116,6 +116,7 @@
SUBDIR += nagios-check_bacula
SUBDIR += nagios-check_clamav
SUBDIR += nagios-check_cpu_usage
+ SUBDIR += nagios-check_hdd_health
SUBDIR += nagios-check_ice
SUBDIR += nagios-check_kumofs
SUBDIR += nagios-check_memcached_paranoid
diff --git a/net-mgmt/nagios-check_hdd_health/Makefile b/net-mgmt/nagios-check_hdd_health/Makefile
new file mode 100644
index 000000000000..5b3cb4482c28
--- /dev/null
+++ b/net-mgmt/nagios-check_hdd_health/Makefile
@@ -0,0 +1,24 @@
+# New ports collection makefile for: nagios-check_hdd_health
+# Date created: 2010-12-02
+# Whom: jamrich.majo@gmail.com
+#
+# $FreeBSD$
+#
+
+PORTNAME= nagios-check_hdd_health
+PORTVERSION= 1.0
+CATEGORIES= net-mgmt
+MASTER_SITES= http://www.bwelectronics.sk/jamrich/ports/
+
+MAINTAINER= jamrich.majo@gmail.com
+COMMENT= Nagios plug-in to check HDD health from S.M.A.R.T
+
+RUN_DEPENDS= smartmontools>=0:${PORTSDIR}/sysutils/smartmontools
+
+NO_BUILD= yes
+
+do-install:
+ @${MKDIR} ${PREFIX}/libexec/nagios
+ @${INSTALL_SCRIPT} ${.CURDIR}/src/check_hdd_health ${PREFIX}/libexec/nagios
+
+.include <bsd.port.mk>
diff --git a/net-mgmt/nagios-check_hdd_health/distinfo b/net-mgmt/nagios-check_hdd_health/distinfo
new file mode 100644
index 000000000000..a08d6df098de
--- /dev/null
+++ b/net-mgmt/nagios-check_hdd_health/distinfo
@@ -0,0 +1,2 @@
+SHA256 (nagios-check_hdd_health-1.0.tar.gz) = e3dcad96d451bbc978d165682bfb9f1669fedf197fc96af971fe7d026fe47d1c
+SIZE (nagios-check_hdd_health-1.0.tar.gz) = 3445
diff --git a/net-mgmt/nagios-check_hdd_health/pkg-descr b/net-mgmt/nagios-check_hdd_health/pkg-descr
new file mode 100644
index 000000000000..45a144ce23b7
--- /dev/null
+++ b/net-mgmt/nagios-check_hdd_health/pkg-descr
@@ -0,0 +1,8 @@
+check_hdd_health is a Nagios plug-in written in shell to check HDD health.
+This script check HDD from S.M.A.R.T this values:
+- Spin Retry Count
+- Reallocated Sector Ct
+- Reallocated Event Count
+- Current Pending Sector
+- Offline Uncorrectable
+- Total health test
diff --git a/net-mgmt/nagios-check_hdd_health/pkg-plist b/net-mgmt/nagios-check_hdd_health/pkg-plist
new file mode 100644
index 000000000000..b7dc77b9d42b
--- /dev/null
+++ b/net-mgmt/nagios-check_hdd_health/pkg-plist
@@ -0,0 +1,2 @@
+libexec/nagios/check_hdd_health
+@dirrmtry libexec/nagios
diff --git a/net-mgmt/nagios-check_hdd_health/src/check_hdd_health b/net-mgmt/nagios-check_hdd_health/src/check_hdd_health
new file mode 100644
index 000000000000..cb8e7d29819a
--- /dev/null
+++ b/net-mgmt/nagios-check_hdd_health/src/check_hdd_health
@@ -0,0 +1,172 @@
+#!/bin/sh
+#
+PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/sbin:/usr/local/bin
+
+ST_OK=0
+ST_WR=1
+ST_CR=2
+ST_UN=3
+
+smartctl=$(which smartctl)
+
+## Smartmontools
+SMT=Smartmontools
+
+# Plugin name
+PROGNAME=`basename $0`
+
+# Version
+VERSION="Version 1.0"
+
+# Author
+AUTHOR="Marian Jamrich"
+
+TMPFILE=/tmp/smart.nagios.$$
+
+# Clean up when done or when aborting
+trap "rm -f ${TMPFILE}" 0 1 2 3 15
+
+#print_version() {
+# echo "$PROGNAME $VERSION $1"
+#}
+
+mini_help() {
+ echo "Usage $0 --device $device --without [src rsc rec cps ou]"
+}
+
+print_help() {
+ clear;
+ echo "*********************************************************************************"
+ echo "* $PROGNAME $VERSION $1""($AUTHOR) <jamrich.majo@gmail.com> (2010) *"
+ echo "*********************************************************************************"
+ echo "This is Nagios plugin to check HDD health from S.M.A.R.T. by Smartmontools."
+ echo '
+The S.M.A.R.T. attributes are specific properties (parameters) of various parts of a disk.
+S.M.A.R.T. uses attributes to monitor the disk condition and to analyze its reliability.
+
+Script check HDD from S.M.A.R.T with the following properties (if your HDD supports it):
+
+** Spin Retry Count (src) **
+Count of retry of spin start attempts. This attribute stores a total count of the spin start attempts to reach the fully operational speed (under the
+condition that the first attempt was unsuccessful). A decrease of this attribute value is a sign of problems in the hard disk mechanical subsystem.
+
+** Reallocated Sector Count (rsc) **
+Count of reallocated sectors. When the hard drive finds a read/write/verification error, it marks this sector as "reallocated" and transfers data to a
+special reserved area (spare area). This process is also known as remapping and "reallocated" sectors are called remaps. This is why, on a modern hard
+disks, you can not see "bad blocks" while testing the surface - all bad blocks are hidden in reallocated sectors.
+
+** Reallocated Event Count (rec) **
+Count of remap operations (transferring data from a bad sector to a special reserved disk area - spare area). The raw value of this attribute shows the
+total number of attempts to transfer data from reallocated sectors to a spare area. Unsuccessful attempts are counted as well as successful.
+
+** Current Pending Sector (cps) **
+Current count of unstable sectors (waiting for remapping). The raw value of this attribute indicates the total number of sectors waiting for remapping.
+Later, when some of these sectors are read successfully, the value is decreased. If errors still occur when reading some sector, the hard drive will try
+to restore the data, transfer it to the reserved disk area (spare area) and mark this sector as remapped. If this attribute value remains at zero, it
+indicates that the quality of the corresponding surface area is low.
+
+** Offline Uncorrectable (ou) **
+Quantity of uncorrectable errors. The raw value of this attribute indicates the total number of uncorrectable errors when reading/writing a sector.
+A rise in the value of this attribute indicates that there are evident defects of the disk surface and/or there are problems in the hard disk drive
+mechanical subsystem.
+
+** Total health test (pass) **
+This is test provided by Smartmontools. If total disk state is "health", Smartmontools marked as "PASSED".
+ '
+ echo "Nagios states:"
+ echo
+ echo "OK - if all values are \"0\"."
+ echo "Warning - if one or both values \"Spin Retry Count\" and \"Reallocated Event Count\" is between the values 1 to 9."
+ echo "Critical - if some value is greater than \"0\" except \"Spin Retry Count (>=10)\" and \"Reallocated Event Count (>=10)\"."
+ echo -e "\n---------------------------------------------------------------------"
+ echo "Usage:"
+ echo "$0 --device /dev/ad0 [ --without [src rsc rec cps ou]]"
+ echo "---------------------------------------------------------------------"
+ exit $ST_UN
+}
+
+case "$1" in
+ --help|-h|--usage|-u)
+ print_help
+ exit $ST_UN
+ ;;
+ -d | --device)
+ device=$2
+ ;;
+ -V)
+ print_version
+ exit
+ ;;
+ *)
+ echo "Unknown argument: $1"
+ echo "For more information please try -h or --help!"
+ exit $ST_UN
+ ;;
+esac
+shift
+
+test -z $device && echo -e "\nYou forgot to define device! Please try \"-h or --help\" to help." && exit $ST_UN
+test `uname` != "FreeBSD" && echo "This plugin is only for FreeBSD." && exit $ST_UN
+
+if [ ! -e $device ]; then
+ echo
+ echo "Unknown device \"$device\"!"
+ exit $ST_UK
+fi
+
+if [ -z $smartctl ]; then
+ echo -e "\nYou don't have installed $SMT. Please install it at http://smartmontools.sourceforge.net or pkg_add -r \"smartmontools\"..."
+ exit $ST_UN
+fi
+
+$smartctl -a $device > ${TMPFILE}
+SMART_SUPPORT=`awk '/SMART support is/ {print $4}' ${TMPFILE} | tail -n 1`
+
+if [ "${SMART_SUPPORT}" = "Unavailable" ]; then
+ echo -e "\nS.M.A.R.T support is Unavailable for $device !!! You should enable it \"smartctl -s on $device\"."
+ exit $ST_UN
+elif [ "${SMART_SUPPORT}" != "Enabled" ]; then
+ echo -e "\nMaybe you don't have enabled S.M.A.R.T support in $SMT! Please type \"smartctl -s on $device\" that you have it turned on. Or device does not support S.M.A.R.T function."
+ exit $ST_UN
+fi
+
+## start S.M.A.R.T test and set variables
+src=`awk '/Spin_Retry_Count/ {print $10}' ${TMPFILE} `
+rsc=`awk '/Reallocated_Sector_Ct/ {print $10}' ${TMPFILE} `
+rec=`awk '/Reallocated_Event_Count/ {print $10}' ${TMPFILE} `
+cps=`awk '/Current_Pending_Sector/ {print $10}' ${TMPFILE} `
+ou=`awk '/Offline_Uncorrectable/ {print $10}' ${TMPFILE} `
+pass=`awk -F\: '/test result/ { if ( $2 == " PASSED") print "PASSED"; else print "FAILED" }' ${TMPFILE} `
+
+## if one or more S.M.A.R.T function is not supported by your HDD, then you define --without variable and then value is set to "0"
+args=`getopt w:without: $*`
+for arg; do
+ case "$arg" in
+ src) src=0;;
+ rsc) rsc=0;;
+ rec) rec=0;;
+ cps) cps=0;;
+ ou) ou=0;;
+ esac
+done
+
+# test if your HDD support all parameters:
+[ -z "$src" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Spin_Retry_Count. Please try \"--without src\"." && mini_help && exit $ST_UN
+[ -z "$rsc" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Reallocated_Sector_Ct. Please try \"--without rsc\"." && mini_help && exit $ST_UN
+[ -z "$rec" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Reallocated_Event_Count. Please try --without rec." && mini_help && exit $ST_UN
+[ -z "$cps" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Current_Pending_Sector. Please try --without cps." && mini_help && exit $ST_UN
+[ -z "$ou" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Offline_Uncorrectable. Please try \"--without ou\"." && mini_help && exit $ST_UN
+
+perfdata="smart=src=$src; rsc=$rsc; rec=$rec; cps=$cps; ou=$ou; pass=$pass"
+
+##### finally run test, print result and set exit code #####
+if [ $src -eq 0 ] && [ $rsc -eq 0 ] && [ $rec -eq 0 ] && [ $cps -eq 0 ] && [ $ou -eq 0 ] && [ "$pass" = "PASSED" ]; then
+ echo "OK - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALTH_STATUS=$pass for $device. |${perfdata}"
+ exit $ST_OK
+elif [ $src -gt 1 -a $src -lt 10 ] && [ $rsc -gt 0 ] && [ $rec -gt 1 -a $rec -lt 10 ] && [ $cps -eq 0 ] && [ $ou -eq 0 ] && [ "$pass" = "PASSED" ]; then
+ echo "WARNING - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALTH_STATUS=$pass for $device. |${perfdata}"
+ exit $ST_WR
+else
+ echo "CRITICAL - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALT_STATUS=$pass for $device. |${perfdata}"
+ exit $ST_CR
+fi