@@ -33,7 +33,6 @@ usage() {
3333 - Unbound Persistent Volumes in unboundpvs mode; default is 5
3434 - Job failed count in jobs mode; default is 2
3535 - Pvc storage utilization; default is 90%
36- -b Brief mode (more suitable for Zabbix)
3736 -M EXIT_CODE Exit code when resource is missing; default is 2 (CRITICAL)
3837 -h Show this help and exit
3938
@@ -49,25 +48,20 @@ usage() {
4948 tls Check for tls secrets expiration dates
5049 pvc Check for pvc utilization
5150 unboundpvs Check for unbound persistent volumes
52- components Check for health of k8s components (deprecated in K8s 1.19+)
5351 EOF
5452
5553 exit 2
5654}
5755
58- BRIEF=0
5956TIMEOUT=15
57+ unset NAME
6058
6159die () {
62- if [ " $BRIEF " = 1 ]; then
63- echo " -1"
64- else
6560 echo " $1 "
66- fi
6761 exit " ${2:- 2} "
6862}
6963
70- while getopts " :m:M:H:T:t:K:N:n:o:c:w:bh " arg; do
64+ while getopts " :m:M:H:T:t:K:N:n:o:c:w:h " arg; do
7165 case $arg in
7266 h) usage ;;
7367 m) MODE=" $OPTARG " ;;
@@ -81,7 +75,6 @@ while getopts ":m:M:H:T:t:K:N:n:o:c:w:bh" arg; do
8175 n) NAME=" $OPTARG " ;;
8276 w) WARN=" $OPTARG " ;;
8377 c) CRIT=" $OPTARG " ;;
84- b) BRIEF=1 ;;
8578 * ) usage ;;
8679 esac
8780done
@@ -151,7 +144,7 @@ mode_apiserver() {
151144 data=$( getJSON " " " healthz" )
152145 [ $? -gt 0 ] && die " $data "
153146 if [ " $data " = ok ]; then
154- OUTPUT=" OK. Kubernetes apiserver health is OK "
147+ OUTPUT=" OK. Kubernetes apiserver is healthy "
155148 EXITCODE=0
156149 else
157150 data=$( echo " $data " | grep " \[\-\]" )
@@ -171,15 +164,15 @@ mode_nodes() {
171164 .status" ) "
172165 if [ " $ready " != True ]; then
173166 EXITCODE=2
174- OUTPUT=" ${OUTPUT} Node $node not ready. "
167+ OUTPUT=" ERROR. ${OUTPUT} Node $node not ready\n "
175168 fi
176169 for condition in OutOfDisk MemoryPressure DiskPressure; do
177170 state=" $( echo " $data " | jq -r " .items[] | select(.metadata.name==\" $node \" ) | \
178171 .status.conditions[] | select(.type==\" $condition \" ) | \
179172 .status" ) "
180173 if [ " $state " = True ]; then
181174 [ $EXITCODE -lt 1 ] && EXITCODE=1
182- OUTPUT=" $ OUTPUT $node $condition . "
175+ OUTPUT=" WARN. ${ OUTPUT} $node $condition \n "
183176 fi
184177 done
185178 done
@@ -189,43 +182,8 @@ mode_nodes() {
189182 OUTPUT=" No nodes found"
190183 EXITCODE=" $MISSING_EXITCODE "
191184 else
192- OUTPUT=" OK. ${# nodes[@]} nodes are Ready"
193- BRIEF_OUTPUT=" ${# nodes[@]} "
185+ OUTPUT=" OK. ${# nodes[@]} nodes are ready"
194186 fi
195- else
196- BRIEF_OUTPUT=" -1"
197- fi
198- }
199-
200- mode_components () {
201- healthy_comps=" "
202- unhealthy_comps=" "
203- data=" $( getJSON " get cs" " api/v1/componentstatuses" ) "
204- [ $? -gt 0 ] && die " $data "
205- components=($( echo " $data " | jq -r " .items[].metadata.name" ) )
206-
207- for comp in " ${components[@]} " ; do
208- healthy=$( echo " $data " | jq -r " .items[] | select(.metadata.name==\" $comp \" ) | \
209- .conditions[] | select(.type==\" Healthy\" ) | \
210- .status" )
211- if [ " $healthy " != True ]; then
212- EXITCODE=2
213- unhealthy_comps=" $unhealthy_comps $comp "
214- else
215- healthy_comps=" $healthy_comps $comp "
216- fi
217- done
218-
219- BRIEF_OUTPUT=" $healthy_comps "
220- if [ $EXITCODE = 0 ]; then
221- if [ -z " ${components[*]} " ]; then
222- OUTPUT=" No components found"
223- EXITCODE=" $MISSING_EXITCODE "
224- else
225- OUTPUT=" OK. Healthy: $healthy_comps "
226- fi
227- else
228- OUTPUT=" CRITICAL. Unhealthy: $unhealthy_comps ; Healthy: $healthy_comps "
229187 fi
230188}
231189
@@ -246,9 +204,7 @@ mode_unboundpvs() {
246204 select(.status.phase!=\" Bound\" ) | \
247205 \" \(.metadata.name):\(.status.phase):\(.spec.claimRef.uid)\" " )
248206
249- BRIEF_OUTPUT=" ${# pvsArr[*]} "
250207 if [ ${# unboundPvsArr[*]} -gt 0 ]; then
251- BRIEF_OUTPUT=" -${# unboundPvsArr[*]} "
252208 if [ ${# unboundPvsArr[*]} -ge " $CRIT " ]; then
253209 OUTPUT=" CRITICAL. Unbound persistentvolumes:\n$OUTPUT "
254210 EXITCODE=2
@@ -387,7 +343,6 @@ mode_tls() {
387343 done
388344 done
389345
390- BRIEF_OUTPUT=" $count_ok "
391346 if [ $EXITCODE = 0 ]; then
392347 if [ -z " $ns " ]; then
393348 OUTPUT=" No TLS certs found"
@@ -465,28 +420,30 @@ mode_pods() {
465420 else
466421 (( count_failed++ ))
467422 fi
423+ if [ " $restart_count " -ge " $WARN " ]; then
424+ OUTPUT=" ${OUTPUT} Container $bad_container : $restart_count restarts.\n"
425+ EXITCODE=1
426+ if [ " $restart_count " -ge " $CRIT " ]; then
427+ EXITCODE=2
428+ fi
429+ fi
468430 done
469431 done
470432
471- if [ " $max_restart_count " -ge " $WARN " ]; then
472- BRIEF_OUTPUT=" -$max_restart_count "
473- else
474- BRIEF_OUTPUT=" $count_ready "
475- fi
476-
433+ if [ $EXITCODE = 0 ]; then
477434 if [ -z " $ns " ]; then
478435 OUTPUT=" No pods found"
479436 EXITCODE=" $MISSING_EXITCODE "
480437 else
481- if [ " $max_restart_count " -ge " $WARN " ]; then
482- OUTPUT=" Container $bad_container : $max_restart_count restarts. "
483- EXITCODE=1
484- if [ " $max_restart_count " -ge " $CRIT " ]; then
485- EXITCODE=2
438+ OUTPUT=" OK. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
439+ fi
440+ else
441+ if [ $EXITCODE = 1 ]; then
442+ OUTPUT=" WARNING. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
443+ else
444+ OUTPUT=" ERROR. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
486445 fi
487446 fi
488- OUTPUT=" $OUTPUT$count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready"
489- fi
490447}
491448
492449mode_deployments () {
@@ -525,7 +482,6 @@ mode_deployments() {
525482 done
526483 done
527484
528- BRIEF_OUTPUT=" $count_avail "
529485 if [ $EXITCODE = 0 ]; then
530486 if [ -z " $ns " ]; then
531487 OUTPUT=" No deployments found"
@@ -586,7 +542,6 @@ mode_daemonsets() {
586542 done
587543 done
588544
589- BRIEF_OUTPUT=" $count_avail "
590545 if [ $EXITCODE = 0 ]; then
591546 if [ -z " $ns " ]; then
592547 OUTPUT=" No daemonsets found"
@@ -648,7 +603,6 @@ mode_replicasets() {
648603 done
649604 done
650605
651- BRIEF_OUTPUT=" $count_avail "
652606 if [ $EXITCODE = 0 ]; then
653607 if [ -z " $ns " ]; then
654608 OUTPUT=" No replicasets found"
@@ -701,7 +655,7 @@ mode_statefulsets() {
701655 done < <( echo " $data " | \
702656 jq -r " .items[] | select(.metadata.namespace==\" $ns \" and .metadata.name==\" $rs \" ) | \
703657 .status | to_entries | map(\" \(.key)=\(.value)\" ) | .[]" )
704- OUTPUT=" Statefulset $ns /$rs ${statusArr[readyReplicas]} /${statusArr[currentReplicas]} ready"
658+ OUTPUT=" ${OUTPUT} Statefulset $ns /$rs ${statusArr[readyReplicas]} /${statusArr[currentReplicas]} ready\n "
705659 if [ " ${statusArr[readyReplicas]} " != " ${statusArr[currentReplicas]} " ]; then
706660 (( count_failed++ ))
707661 EXITCODE=2
@@ -711,7 +665,6 @@ mode_statefulsets() {
711665 done
712666 done
713667
714- BRIEF_OUTPUT=" $count_avail "
715668 if [ $EXITCODE = 0 ]; then
716669 if [ -z " $ns " ]; then
717670 OUTPUT=" No statefulsets found"
@@ -766,7 +719,7 @@ mode_jobs() {
766719 job_fail_count=$( echo " $data " | jq -r " .items[] | select(.status.failed and .metadata.name==\" $job \" ) | .status.failed" )
767720 total_failed_count=" $(( total_failed_count+ job_fail_count)) "
768721 if [ " $job_fail_count " -ge " ${WARN} " ]; then
769- OUTPUT=" ${OUTPUT} Job $job has $job_fail_count failures. "
722+ OUTPUT=" ${OUTPUT} Job $job has $job_fail_count failures\n "
770723 EXITCODE=1
771724 elif [ " $job_fail_count " -ge " ${CRIT} " ]; then
772725 EXITCODE=2
@@ -783,7 +736,7 @@ mode_jobs() {
783736 if [ -z " $ns " ]; then
784737 OUTPUT=" No jobs found"
785738 else
786- OUTPUT=" OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold"
739+ OUTPUT=" OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold\n "
787740 fi
788741 else
789742 if [ " $EXITCODE " -eq 1 ] ; then
@@ -792,14 +745,13 @@ mode_jobs() {
792745 OUTPUT=" CRITICAL. ${OUTPUT} "
793746 fi
794747 if [ -z " $NAME " ] && [ " $EXITCODE " -ge 1 ] ; then
795- OUTPUT=" ${OUTPUT}${total_failed_count} jobs in total have failed"
748+ OUTPUT=" ${OUTPUT}${total_failed_count} jobs have failed"
796749 fi
797750 fi
798751}
799752
800753case " $MODE " in
801754 (apiserver) mode_apiserver ;;
802- (components) mode_components ;;
803755 (daemonsets) mode_daemonsets ;;
804756 (deployments) mode_deployments ;;
805757 (nodes) mode_nodes ;;
@@ -813,16 +765,6 @@ case "$MODE" in
813765 (* ) usage ;;
814766esac
815767
816- if [ " $BRIEF " = 1 ]; then
817- if [ " $EXITCODE " = 0 ]; then
818- echo " ${BRIEF_OUTPUT:- 1} "
819- elif [ -z " $BRIEF_FAIL_OUTPUT " ]; then
820- echo " ${BRIEF_OUTPUT:- 0} "
821- else
822- echo " ${BRIEF_FAIL_OUTPUT} "
823- fi
824- else
825- echo " $OUTPUT "
826- fi
768+ printf " $OUTPUT "
827769
828770exit $EXITCODE
0 commit comments