diff options
author | pgollucci <pgollucci@FreeBSD.org> | 2010-12-10 12:51:19 +0800 |
---|---|---|
committer | pgollucci <pgollucci@FreeBSD.org> | 2010-12-10 12:51:19 +0800 |
commit | 0d5ed470858d6c2e09dd39eb501b2d25e5c541db (patch) | |
tree | 61f726cd55847aa6a6479c8f22ec5eff026922cc /net-mgmt | |
parent | 11d5fb3f38ff655c13ad1f161f65254f4a986604 (diff) | |
download | freebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.tar.gz freebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.tar.zst freebsd-ports-gnome-0d5ed470858d6c2e09dd39eb501b2d25e5c541db.zip |
check_hdd_health is a Nagios plug-in written in shell to check HDD health.
This script check HDD from S.M.A.R.T this values:
- Spin Retry Count
- Reallocated Sector Ct
- Reallocated Event Count
- Current Pending Sector
- Offline Uncorrectable
- Total health test
PR: ports/152916
Submitted by: jamrich.majo at gmail.com
Diffstat (limited to 'net-mgmt')
-rw-r--r-- | net-mgmt/Makefile | 1 | ||||
-rw-r--r-- | net-mgmt/nagios-check_hdd_health/Makefile | 24 | ||||
-rw-r--r-- | net-mgmt/nagios-check_hdd_health/distinfo | 2 | ||||
-rw-r--r-- | net-mgmt/nagios-check_hdd_health/pkg-descr | 8 | ||||
-rw-r--r-- | net-mgmt/nagios-check_hdd_health/pkg-plist | 2 | ||||
-rw-r--r-- | net-mgmt/nagios-check_hdd_health/src/check_hdd_health | 172 |
6 files changed, 209 insertions, 0 deletions
diff --git a/net-mgmt/Makefile b/net-mgmt/Makefile index 1a0407acd6a6..627487d5367c 100644 --- a/net-mgmt/Makefile +++ b/net-mgmt/Makefile @@ -116,6 +116,7 @@ SUBDIR += nagios-check_bacula SUBDIR += nagios-check_clamav SUBDIR += nagios-check_cpu_usage + SUBDIR += nagios-check_hdd_health SUBDIR += nagios-check_ice SUBDIR += nagios-check_kumofs SUBDIR += nagios-check_memcached_paranoid diff --git a/net-mgmt/nagios-check_hdd_health/Makefile b/net-mgmt/nagios-check_hdd_health/Makefile new file mode 100644 index 000000000000..5b3cb4482c28 --- /dev/null +++ b/net-mgmt/nagios-check_hdd_health/Makefile @@ -0,0 +1,24 @@ +# New ports collection makefile for: nagios-check_hdd_health +# Date created: 2010-12-02 +# Whom: jamrich.majo@gmail.com +# +# $FreeBSD$ +# + +PORTNAME= nagios-check_hdd_health +PORTVERSION= 1.0 +CATEGORIES= net-mgmt +MASTER_SITES= http://www.bwelectronics.sk/jamrich/ports/ + +MAINTAINER= jamrich.majo@gmail.com +COMMENT= Nagios plug-in to check HDD health from S.M.A.R.T + +RUN_DEPENDS= smartmontools>=0:${PORTSDIR}/sysutils/smartmontools + +NO_BUILD= yes + +do-install: + @${MKDIR} ${PREFIX}/libexec/nagios + @${INSTALL_SCRIPT} ${.CURDIR}/src/check_hdd_health ${PREFIX}/libexec/nagios + +.include <bsd.port.mk> diff --git a/net-mgmt/nagios-check_hdd_health/distinfo b/net-mgmt/nagios-check_hdd_health/distinfo new file mode 100644 index 000000000000..a08d6df098de --- /dev/null +++ b/net-mgmt/nagios-check_hdd_health/distinfo @@ -0,0 +1,2 @@ +SHA256 (nagios-check_hdd_health-1.0.tar.gz) = e3dcad96d451bbc978d165682bfb9f1669fedf197fc96af971fe7d026fe47d1c +SIZE (nagios-check_hdd_health-1.0.tar.gz) = 3445 diff --git a/net-mgmt/nagios-check_hdd_health/pkg-descr b/net-mgmt/nagios-check_hdd_health/pkg-descr new file mode 100644 index 000000000000..45a144ce23b7 --- /dev/null +++ b/net-mgmt/nagios-check_hdd_health/pkg-descr @@ -0,0 +1,8 @@ +check_hdd_health is a Nagios plug-in written in shell to check HDD health. +This script check HDD from S.M.A.R.T this values: +- Spin Retry Count +- Reallocated Sector Ct +- Reallocated Event Count +- Current Pending Sector +- Offline Uncorrectable +- Total health test diff --git a/net-mgmt/nagios-check_hdd_health/pkg-plist b/net-mgmt/nagios-check_hdd_health/pkg-plist new file mode 100644 index 000000000000..b7dc77b9d42b --- /dev/null +++ b/net-mgmt/nagios-check_hdd_health/pkg-plist @@ -0,0 +1,2 @@ +libexec/nagios/check_hdd_health +@dirrmtry libexec/nagios diff --git a/net-mgmt/nagios-check_hdd_health/src/check_hdd_health b/net-mgmt/nagios-check_hdd_health/src/check_hdd_health new file mode 100644 index 000000000000..cb8e7d29819a --- /dev/null +++ b/net-mgmt/nagios-check_hdd_health/src/check_hdd_health @@ -0,0 +1,172 @@ +#!/bin/sh +# +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/sbin:/usr/local/bin + +ST_OK=0 +ST_WR=1 +ST_CR=2 +ST_UN=3 + +smartctl=$(which smartctl) + +## Smartmontools +SMT=Smartmontools + +# Plugin name +PROGNAME=`basename $0` + +# Version +VERSION="Version 1.0" + +# Author +AUTHOR="Marian Jamrich" + +TMPFILE=/tmp/smart.nagios.$$ + +# Clean up when done or when aborting +trap "rm -f ${TMPFILE}" 0 1 2 3 15 + +#print_version() { +# echo "$PROGNAME $VERSION $1" +#} + +mini_help() { + echo "Usage $0 --device $device --without [src rsc rec cps ou]" +} + +print_help() { + clear; + echo "*********************************************************************************" + echo "* $PROGNAME $VERSION $1""($AUTHOR) <jamrich.majo@gmail.com> (2010) *" + echo "*********************************************************************************" + echo "This is Nagios plugin to check HDD health from S.M.A.R.T. by Smartmontools." + echo ' +The S.M.A.R.T. attributes are specific properties (parameters) of various parts of a disk. +S.M.A.R.T. uses attributes to monitor the disk condition and to analyze its reliability. + +Script check HDD from S.M.A.R.T with the following properties (if your HDD supports it): + +** Spin Retry Count (src) ** +Count of retry of spin start attempts. This attribute stores a total count of the spin start attempts to reach the fully operational speed (under the +condition that the first attempt was unsuccessful). A decrease of this attribute value is a sign of problems in the hard disk mechanical subsystem. + +** Reallocated Sector Count (rsc) ** +Count of reallocated sectors. When the hard drive finds a read/write/verification error, it marks this sector as "reallocated" and transfers data to a +special reserved area (spare area). This process is also known as remapping and "reallocated" sectors are called remaps. This is why, on a modern hard +disks, you can not see "bad blocks" while testing the surface - all bad blocks are hidden in reallocated sectors. + +** Reallocated Event Count (rec) ** +Count of remap operations (transferring data from a bad sector to a special reserved disk area - spare area). The raw value of this attribute shows the +total number of attempts to transfer data from reallocated sectors to a spare area. Unsuccessful attempts are counted as well as successful. + +** Current Pending Sector (cps) ** +Current count of unstable sectors (waiting for remapping). The raw value of this attribute indicates the total number of sectors waiting for remapping. +Later, when some of these sectors are read successfully, the value is decreased. If errors still occur when reading some sector, the hard drive will try +to restore the data, transfer it to the reserved disk area (spare area) and mark this sector as remapped. If this attribute value remains at zero, it +indicates that the quality of the corresponding surface area is low. + +** Offline Uncorrectable (ou) ** +Quantity of uncorrectable errors. The raw value of this attribute indicates the total number of uncorrectable errors when reading/writing a sector. +A rise in the value of this attribute indicates that there are evident defects of the disk surface and/or there are problems in the hard disk drive +mechanical subsystem. + +** Total health test (pass) ** +This is test provided by Smartmontools. If total disk state is "health", Smartmontools marked as "PASSED". + ' + echo "Nagios states:" + echo + echo "OK - if all values are \"0\"." + echo "Warning - if one or both values \"Spin Retry Count\" and \"Reallocated Event Count\" is between the values 1 to 9." + echo "Critical - if some value is greater than \"0\" except \"Spin Retry Count (>=10)\" and \"Reallocated Event Count (>=10)\"." + echo -e "\n---------------------------------------------------------------------" + echo "Usage:" + echo "$0 --device /dev/ad0 [ --without [src rsc rec cps ou]]" + echo "---------------------------------------------------------------------" + exit $ST_UN +} + +case "$1" in + --help|-h|--usage|-u) + print_help + exit $ST_UN + ;; + -d | --device) + device=$2 + ;; + -V) + print_version + exit + ;; + *) + echo "Unknown argument: $1" + echo "For more information please try -h or --help!" + exit $ST_UN + ;; +esac +shift + +test -z $device && echo -e "\nYou forgot to define device! Please try \"-h or --help\" to help." && exit $ST_UN +test `uname` != "FreeBSD" && echo "This plugin is only for FreeBSD." && exit $ST_UN + +if [ ! -e $device ]; then + echo + echo "Unknown device \"$device\"!" + exit $ST_UK +fi + +if [ -z $smartctl ]; then + echo -e "\nYou don't have installed $SMT. Please install it at http://smartmontools.sourceforge.net or pkg_add -r \"smartmontools\"..." + exit $ST_UN +fi + +$smartctl -a $device > ${TMPFILE} +SMART_SUPPORT=`awk '/SMART support is/ {print $4}' ${TMPFILE} | tail -n 1` + +if [ "${SMART_SUPPORT}" = "Unavailable" ]; then + echo -e "\nS.M.A.R.T support is Unavailable for $device !!! You should enable it \"smartctl -s on $device\"." + exit $ST_UN +elif [ "${SMART_SUPPORT}" != "Enabled" ]; then + echo -e "\nMaybe you don't have enabled S.M.A.R.T support in $SMT! Please type \"smartctl -s on $device\" that you have it turned on. Or device does not support S.M.A.R.T function." + exit $ST_UN +fi + +## start S.M.A.R.T test and set variables +src=`awk '/Spin_Retry_Count/ {print $10}' ${TMPFILE} ` +rsc=`awk '/Reallocated_Sector_Ct/ {print $10}' ${TMPFILE} ` +rec=`awk '/Reallocated_Event_Count/ {print $10}' ${TMPFILE} ` +cps=`awk '/Current_Pending_Sector/ {print $10}' ${TMPFILE} ` +ou=`awk '/Offline_Uncorrectable/ {print $10}' ${TMPFILE} ` +pass=`awk -F\: '/test result/ { if ( $2 == " PASSED") print "PASSED"; else print "FAILED" }' ${TMPFILE} ` + +## if one or more S.M.A.R.T function is not supported by your HDD, then you define --without variable and then value is set to "0" +args=`getopt w:without: $*` +for arg; do + case "$arg" in + src) src=0;; + rsc) rsc=0;; + rec) rec=0;; + cps) cps=0;; + ou) ou=0;; + esac +done + +# test if your HDD support all parameters: +[ -z "$src" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Spin_Retry_Count. Please try \"--without src\"." && mini_help && exit $ST_UN +[ -z "$rsc" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Reallocated_Sector_Ct. Please try \"--without rsc\"." && mini_help && exit $ST_UN +[ -z "$rec" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Reallocated_Event_Count. Please try --without rec." && mini_help && exit $ST_UN +[ -z "$cps" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Current_Pending_Sector. Please try --without cps." && mini_help && exit $ST_UN +[ -z "$ou" ] && echo -e "***********\n** ERROR **\n***********\n${device} don't support Offline_Uncorrectable. Please try \"--without ou\"." && mini_help && exit $ST_UN + +perfdata="smart=src=$src; rsc=$rsc; rec=$rec; cps=$cps; ou=$ou; pass=$pass" + +##### finally run test, print result and set exit code ##### +if [ $src -eq 0 ] && [ $rsc -eq 0 ] && [ $rec -eq 0 ] && [ $cps -eq 0 ] && [ $ou -eq 0 ] && [ "$pass" = "PASSED" ]; then + echo "OK - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALTH_STATUS=$pass for $device. |${perfdata}" + exit $ST_OK +elif [ $src -gt 1 -a $src -lt 10 ] && [ $rsc -gt 0 ] && [ $rec -gt 1 -a $rec -lt 10 ] && [ $cps -eq 0 ] && [ $ou -eq 0 ] && [ "$pass" = "PASSED" ]; then + echo "WARNING - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALTH_STATUS=$pass for $device. |${perfdata}" + exit $ST_WR +else + echo "CRITICAL - HDD S.M.A.R.T health: src=$src, rsc=$rsc, rec=$rec, cps=$cps, ou=$ou, HEALT_STATUS=$pass for $device. |${perfdata}" + exit $ST_CR +fi |