-
Notifications
You must be signed in to change notification settings - Fork 57
/
check_ceph_health
executable file
·200 lines (169 loc) · 6.65 KB
/
check_ceph_health
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013-2016 SWITCH http://www.switch.ch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import argparse
import os
import subprocess
import sys
import re
import json
__version__ = '1.7.0'
# default ceph values
CEPH_ADM_COMMAND = '/usr/sbin/cephadm'
CEPH_COMMAND = '/usr/bin/ceph'
# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3
def main():
# parse args
parser = argparse.ArgumentParser(description="'ceph health' nagios plugin.")
parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
parser.add_argument('-A','--admexe', help='cephadm executable [%s]' % CEPH_ADM_COMMAND)
parser.add_argument('--cluster', help='ceph cluster name')
parser.add_argument('-c','--conf', help='alternative ceph conf file')
parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
parser.add_argument('-i','--id', help='ceph client id')
parser.add_argument('-n','--name', help='ceph client name')
parser.add_argument('-k','--keyring', help='ceph client keyring file')
parser.add_argument('--check', help='regexp of which check(s) to check (luminous+) '
"Can be inverted, e.g. '^((?!(PG_DEGRADED|OBJECT_MISPLACED)$).)*$'")
parser.add_argument('-w','--whitelist', help='whitelist regexp for ceph health warnings')
parser.add_argument('-d','--detail', help="exec 'ceph health detail'", action='store_true')
parser.add_argument('-V','--version', help='show version and exit', action='store_true')
parser.add_argument('-a','--cephadm', help='uses cephadm to execute the command', action='store_true')
parser.add_argument('-s','--skip-muted', help='skip muted checks', action='store_true')
args = parser.parse_args()
# validate args
cephadm_exec = args.admexe if args.admexe else CEPH_ADM_COMMAND
ceph_exec = args.exe if args.exe else CEPH_COMMAND
if args.cephadm:
if not os.path.exists(cephadm_exec):
print("ERROR: cephadm executable '%s' doesn't exist" % cephadm_exec)
return STATUS_UNKNOWN
else:
if not os.path.exists(ceph_exec):
print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
return STATUS_UNKNOWN
if args.version:
print('version %s' % __version__)
return STATUS_OK
if args.conf and not os.path.exists(args.conf):
print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
return STATUS_UNKNOWN
if args.keyring and not os.path.exists(args.keyring):
print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
return STATUS_UNKNOWN
# build command
ceph_health = [ceph_exec]
if args.cephadm:
# Prepend the command with the cephadm binary and the shell command
ceph_health = [cephadm_exec, 'shell'] + ceph_health
if args.monaddress:
ceph_health.append('-m')
ceph_health.append(args.monaddress)
if args.cluster:
ceph_health.append('--cluster')
ceph_health.append(args.cluster)
if args.conf:
ceph_health.append('-c')
ceph_health.append(args.conf)
if args.id:
ceph_health.append('--id')
ceph_health.append(args.id)
if args.name:
ceph_health.append('--name')
ceph_health.append(args.name)
if args.keyring:
ceph_health.append('--keyring')
ceph_health.append(args.keyring)
ceph_health.append('health')
if args.detail:
ceph_health.append('detail')
ceph_health.append('--format')
ceph_health.append('json')
#print(ceph_health)
# exec command
p = subprocess.Popen(ceph_health,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()
try:
output = json.loads(output)
except ValueError:
output = dict()
# parse output
# print "output:", output
#print "err:", err
if output:
ret = STATUS_OK
msg = ""
extended = []
if 'checks' in output:
#luminous
for check,status in output['checks'].items():
# skip check if not selected
if args.check and not re.search(args.check, check):
continue
if args.skip_muted and ('muted' in status and status['muted']):
continue
check_detail = "%s( %s )" % (check, status['summary']['message'])
if status["severity"] == "HEALTH_ERR":
extended.append(msg)
msg = "CRITICAL: %s" % check_detail
ret = STATUS_ERROR
continue
if args.whitelist and re.search(args.whitelist,status['summary']['message']):
continue
check_msg = "CRITICAL: %s" % check_detail
if not msg:
msg = check_msg
ret = STATUS_ERROR
else:
extended.append(check_msg)
else:
#pre-luminous
for status in output["summary"]:
if status != "HEALTH_OK":
if status == "HEALTH_ERROR":
msg = "CRITICAL: %s" % status['summary']
ret = STATUS_ERROR
continue
if args.whitelist and re.search(args.whitelist,status['summary']):
continue
if not msg:
msg = "CRITICAL: %s" % status['summary']
ret = STATUS_ERROR
else:
extended.append("CRITICAL: %s" % status['summary'])
if msg:
print(msg)
else:
print("HEALTH OK")
if extended: print('\n'.join(extended))
return ret
elif err:
# read only first line of error
one_line = err.split('\n')[0]
if '-1 ' in one_line:
idx = one_line.rfind('-1 ')
print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
else:
print(one_line)
return STATUS_UNKNOWN
if __name__ == "__main__":
sys.exit(main())