Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions check_zpools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# Copyright (c) 2022 @waoki - Trap zpool command errors (2022-03-01)
# Copyright (c) 2022 @mrdsam - Improvement (2022-05-24)
# Copyright (c) 2023 @kresike - Improvement (2023-02-22)
# Copyright (c) 2025 @amahr - Add soft fail option (2025-11-25)
#########################################################################
# History/Changelog:
# 2006-09-01 Original first version
Expand All @@ -43,20 +44,24 @@
# 2023-02-15 Bugfix in single pool CRITICAL output (issue #13)
# 2023-02-22 Improve message consistency and display all issues found in pool
# 2023-09-28 Add license
# 2025-11-25 Add soft fail option
#########################################################################
### Begin vars
STATE_OK=0 # define the exit code if status is OK
STATE_WARNING=1 # define the exit code if status is Warning
STATE_CRITICAL=2 # define the exit code if status is Critical
STATE_UNKNOWN=3 # define the exit code if status is Unknown
SOFT_FAIL=0 # define the override of STATE_CRITICAL with STATE_WARNING
# Set path
PATH=$PATH:/usr/sbin:/sbin
export PATH
### End vars
#########################################################################
help="check_zpools.sh (c) 2006-2023 multiple authors\n
Usage: $0 -p (poolname|ALL) [-w warnpercent] [-c critpercent]\n
Example: $0 -p ALL -w 80 -c 90"
Usage: $0 -p (poolname|ALL) [-w warnpercent] [-c critpercent] [-s]\n
-s: Soft fail - report critical errors as warnings (exit code 1 instead of 2)\n
Example: $0 -p ALL -w 80 -c 90\n
Example: $0 -p ALL -w 80 -c 90 -s"
#########################################################################
# Check necessary commands are available
for cmd in zpool [
Expand All @@ -76,12 +81,13 @@ if [ "${1}" = "--help" ] || [ "${#}" = "0" ];
fi
#########################################################################
# Get user-given variables
while getopts "p:w:c:" Input;
while getopts "p:w:c:s" Input;
do
case ${Input} in
p) pool=${OPTARG};;
w) warn=${OPTARG};;
c) crit=${OPTARG};;
s) soft_fail=1;;
*) echo -e "$help"
exit $STATE_UNKNOWN
;;
Expand All @@ -96,6 +102,11 @@ if [[ -n $warn ]] && [[ -z $crit ]]; then echo "Both warning and critical thresh
if [[ -z $warn ]] && [[ -n $crit ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi
if [[ $warn -gt $crit ]]; then echo "Warning threshold cannot be greater than critical"; exit $STATE_UNKNOWN; fi
#########################################################################
# Override critical exit code if soft fail is enabled
if [[ $soft_fail -eq 1 ]]; then
STATE_CRITICAL=$STATE_WARNING
fi
#########################################################################
# What needs to be checked?
## Check all pools
if [ "$pool" = "ALL" ]
Expand All @@ -116,13 +127,14 @@ then
# Check with thresholds
if [[ -n $warn ]] && [[ -n $crit ]]
then
if [ "$HEALTH" != "ONLINE" ]; then error[${p}]="$POOL health is $HEALTH // "; fcrit=1; fi
if [[ $CAPACITY -ge $crit ]]; then error[${p}]+="POOL $POOL usage is CRITICAL (${CAPACITY}%) // "; fcrit=1; fi
fcrit=$STATE_WARNING
if [ "$HEALTH" != "ONLINE" ]; then error[${p}]="$POOL health is $HEALTH // "; fcrit=$STATE_CRITICAL; fi
if [[ $CAPACITY -ge $crit ]]; then error[${p}]+="POOL $POOL usage is CRITICAL (${CAPACITY}%) // "; fcrit=$STATE_CRITICAL; fi
if [[ $CAPACITY -ge $warn && $CAPACITY -lt $crit ]]; then error[$p]+="POOL $POOL usage is WARNING (${CAPACITY}%)"; fi
# Check without thresholds
else
if [ "$HEALTH" != "ONLINE" ]
then error[${p}]="$POOL health is $HEALTH"; fcrit=1
then error[${p}]="$POOL health is $HEALTH"; fcrit=$STATE_CRITICAL
fi
fi
perfdata[$p]="$POOL=${CAPACITY}% "
Expand All @@ -131,9 +143,8 @@ then

if [[ ${#error[*]} -gt 0 ]]
then
if [[ $fcrit -eq 1 ]]; then exit_code=2; else exit_code=1; fi
echo "ZFS POOL ALARM: ${error[*]}|${perfdata[*]}"; exit ${exit_code}
else echo "ALL ZFS POOLS OK (${POOLS[*]})|${perfdata[*]}"; exit 0
echo "ZFS POOL ALARM: ${error[*]}|${perfdata[*]}"; exit ${fcrit}
else echo "ALL ZFS POOLS OK (${POOLS[*]})|${perfdata[*]}"; exit ${STATE_OK}
fi

## Check single pool
Expand Down Expand Up @@ -171,3 +182,4 @@ fi

echo "UNKNOWN - Should never reach this part"
exit ${STATE_UNKNOWN}