Skip to content

Commit f1f018c

Browse files
[Ceph] Add Integration Package with Cluster Status data stream (#5216)
* add cluster status data stream * update changelog.yml
1 parent c9f3c19 commit f1f018c

File tree

18 files changed

+966
-19
lines changed

18 files changed

+966
-19
lines changed

packages/ceph/_dev/build/docs/README.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,20 @@
66

77
Use the Ceph integration to:
88

9-
- Collect metrics related to the cluster disk, cluster health, Object Storage Daemons (OSD) performance, Object Storage Daemons (OSD) pool stats, Object Storage Daemons (OSD) tree and pool disk.
9+
- Collect metrics related to the cluster disk, cluster health, cluster status, Object Storage Daemons (OSD) performance, Object Storage Daemons (OSD) pool stats, Object Storage Daemons (OSD) tree and pool disk.
1010
- Create visualizations to monitor, measure and analyze the usage trend and key data, and derive business insights.
1111
- Create alerts to reduce the MTTD and also the MTTR by referencing relevant logs when troubleshooting an issue.
1212

1313
## Data streams
1414

1515
The Ceph integration collects metrics data.
1616

17-
Metrics give you insight into the statistics of the Ceph. The Metric data streams collected by the Ceph integration are `cluster_disk`, `cluster_health`, `osd_performance`, `osd_pool_stats`, `osd_tree` and `pool_disk`, so that the user can monitor and troubleshoot the performance of the Ceph instance.
17+
Metrics give you insight into the statistics of the Ceph. The Metric data streams collected by the Ceph integration are `cluster_disk`, `cluster_health`, `cluster_status`, `osd_performance`, `osd_pool_stats`, `osd_tree` and `pool_disk`, so that the user can monitor and troubleshoot the performance of the Ceph instance.
1818

19-
Data stream:
19+
Data streams:
2020
- `cluster_disk`: Collects information related to overall storage of the cluster.
2121
- `cluster_health`: Collects information related to health of the cluster.
22+
- `cluster_status`: Collects information related to status of the cluster.
2223
- `osd_performance`: Collects information related to Object Storage Daemons (OSD) performance.
2324
- `osd_pool_stats`: Collects information related to client I/O rates.
2425
- `osd_tree`: Collects information related to structure of the Object Storage Daemons (OSD) tree.
@@ -104,6 +105,14 @@ This is the `cluster_health` data stream. This data stream collects metrics rela
104105

105106
{{fields "cluster_health"}}
106107

108+
### Cluster Status
109+
110+
This is the `cluster_status` data stream. This data stream collects metrics related to cluster health status, number of monitors in the cluster, cluster version, cluster placement group (pg) count, cluster osd states and cluster storage.
111+
112+
{{event "cluster_status"}}
113+
114+
{{fields "cluster_status"}}
115+
107116
### OSD Performance
108117

109118
This is the `osd_performance` data stream. This data stream collects metrics related to Object Storage Daemon (OSD) id, commit latency and apply latency.

packages/ceph/_dev/deploy/docker/files/config.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,53 +3,53 @@ rules:
33
query_params:
44
wait: 1
55
methods: ['POST']
6-
request_body: '{"format":"json","prefix":"time-sync-status"}'
6+
request_body: '{"format":"json","prefix":"osd perf"}'
77
responses:
88
- status_code: 200
99
body: |-
10-
{ "failed":[],"finished":[{"command":"time-sync-status format=json","outb":"{\"timechecks\":{\"epoch\":7,\"round\":0,\"round_status\":\"finished\"}}","outs":""}],"has_failed":false,"id":"140055401700304","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
10+
{"failed":[],"finished":[{"command":"osd perf format=json","outb":"{\"pg_ready\":true,\"osdstats\":{\"osd_perf_infos\":[{\"id\":1,\"perf_stats\":{\"commit_latency_ms\":5.621,\"apply_latency_ms\":3.495,\"commit_latency_ns\":5621000,\"apply_latency_ns\":3495000}},{\"id\":0,\"perf_stats\":{\"commit_latency_ms\":0,\"apply_latency_ms\":0,\"commit_latency_ns\":0,\"apply_latency_ns\":0}}]}\n}","outs":""}],"has_failed":false,"id":"140008221633504","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
1111
- path: /request
1212
query_params:
1313
wait: 1
1414
methods: ['POST']
15-
request_body: '{"format":"json","prefix":"osd perf"}'
15+
request_body: '{"format":"json","prefix":"time-sync-status"}'
1616
responses:
1717
- status_code: 200
1818
body: |-
19-
{"failed":[],"finished":[{"command":"osd perf format=json","outb":"{\"pg_ready\":true,\"osdstats\":{\"osd_perf_infos\":[{\"id\":1,\"perf_stats\":{\"commit_latency_ms\":5.621,\"apply_latency_ms\":3.495,\"commit_latency_ns\":5621000,\"apply_latency_ns\":3495000}},{\"id\":0,\"perf_stats\":{\"commit_latency_ms\":0,\"apply_latency_ms\":0,\"commit_latency_ns\":0,\"apply_latency_ns\":0}}]}\n}","outs":""}],"has_failed":false,"id":"140008221633504","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
19+
{"failed":[],"finished":[{"command":"time-sync-status format=json","outb":"{\"timechecks\":{\"epoch\":7,\"round\":0,\"round_status\":\"finished\"}}","outs":""}],"has_failed":false,"id":"140055401700304","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
2020
- path: /request
2121
query_params:
2222
wait: 1
2323
methods: ['POST']
24-
request_body: '{"format":"json","prefix":"osd pool stats"}'
24+
request_body: '{"format":"json","prefix":"df"}'
2525
responses:
2626
- status_code: 200
2727
body: |-
28-
{"failed":[],"finished":[{"command":"osd pool stats format=json","outb":"[{\"pool_name\":\"device_health_metrics\",\"pool_id\":1,\"recovery\":{},\"recovery_rate\":{},\"client_io_rate\":{\"read_bytes_sec\":6622518,\"write_bytes_sec\":6622518,\"read_op_per_sec\":11,\"write_op_per_sec\":11}},{\"pool_name\":\"elastic\",\"pool_id\":9,\"recovery\":{},\"recovery_rate\":{},\"client_io_rate\":{}}]","outs":""}],"has_failed":false,"id":"140222622573952","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
28+
{"failed":[],"finished":[{"command":"df format=json","outb":"{\"stats\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249,\"num_osds\":4,\"num_per_pool_osds\":4,\"num_per_pool_omap_osds\":4},\"stats_by_class\":{\"hdd\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249}},\"pools\":[{\"name\":\"device_health_metrics\",\"id\":1,\"stats\":{\"stored\":2142673,\"objects\":4,\"kb_used\":6336,\"bytes_used\":6488064,\"percent_used\":8.4362458437681198e-05,\"max_avail\":25633505280}},{\"name\":\"elk\",\"id\":4,\"stats\":{\"stored\":1176572,\"objects\":3,\"kb_used\":3648,\"bytes_used\":3735552,\"percent_used\":4.8574063839623705e-05,\"max_avail\":25633505280}},{\"name\":\"elastic\",\"id\":9,\"stats\":{\"stored\":1349210,\"objects\":5,\"kb_used\":4224,\"bytes_used\":4325376,\"percent_used\":5.6243221479235217e-05,\"max_avail\":25633505280}}]}\n","outs":""}],"has_failed":false,"id":"140222991325696","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
2929
- path: /request
3030
query_params:
3131
wait: 1
3232
methods: ['POST']
33-
request_body: '{"format":"json","prefix":"df"}'
33+
request_body: '{"format":"json","prefix":"osd tree"}'
3434
responses:
3535
- status_code: 200
3636
body: |-
37-
{"failed":[],"finished":[{"command":"df format=json","outb":"{\"stats\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249,\"num_osds\":4,\"num_per_pool_osds\":4,\"num_per_pool_omap_osds\":4},\"stats_by_class\":{\"hdd\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249}},\"pools\":[{\"name\":\"device_health_metrics\",\"id\":1,\"stats\":{\"stored\":2142673,\"objects\":4,\"kb_used\":6336,\"bytes_used\":6488064,\"percent_used\":8.4362458437681198e-05,\"max_avail\":25633505280}},{\"name\":\"elk\",\"id\":4,\"stats\":{\"stored\":1176572,\"objects\":3,\"kb_used\":3648,\"bytes_used\":3735552,\"percent_used\":4.8574063839623705e-05,\"max_avail\":25633505280}},{\"name\":\"elastic\",\"id\":9,\"stats\":{\"stored\":1349210,\"objects\":5,\"kb_used\":4224,\"bytes_used\":4325376,\"percent_used\":5.6243221479235217e-05,\"max_avail\":25633505280}}]}\n","outs":""}],"has_failed":false,"id":"140222991325696","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
37+
{"failed":[],"finished":[{"command":"osd tree format=json","outb":"{\"nodes\":[{\"id\":0,\"device_class\":\"hdd\",\"name\":\"osd.0\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-7,\"name\":\"node02\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[2]},{\"id\":2,\"device_class\":\"hdd\",\"name\":\"osd.2\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-9,\"name\":\"node03\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[3]},{\"id\":3,\"device_class\":\"hdd\",\"name\":\"osd.3\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-5,\"name\":\"node04\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[1]},{\"id\":1,\"device_class\":\"hdd\",\"name\":\"osd.1\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1}],\"stray\":[{\"id\":4,\"name\":\"osd.4\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0,\"depth\":0,\"exists\":1,\"status\":\"destroyed\",\"reweight\":0,\"primary_affinity\":1},{\"id\":5,\"name\":\"osd.5\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0,\"depth\":0,\"exists\":1,\"status\":\"destroyed\",\"reweight\":0,\"primary_affinity\":1}]}\n","outs":""}],"has_failed":false,"id":"140222986406304","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
3838
- path: /request
3939
query_params:
4040
wait: 1
4141
methods: ['POST']
42-
request_body: '{"format":"json","prefix":"osd tree"}'
42+
request_body: '{"format":"json","prefix":"osd pool stats"}'
4343
responses:
4444
- status_code: 200
4545
body: |-
46-
{"failed":[],"finished":[{"command":"osd tree format=json","outb":"{\"nodes\":[{\"id\":0,\"device_class\":\"hdd\",\"name\":\"osd.0\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-7,\"name\":\"node02\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[2]},{\"id\":2,\"device_class\":\"hdd\",\"name\":\"osd.2\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-9,\"name\":\"node03\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[3]},{\"id\":3,\"device_class\":\"hdd\",\"name\":\"osd.3\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1},{\"id\":-5,\"name\":\"node04\",\"type\":\"host\",\"type_id\":1,\"pool_weights\":{},\"children\":[1]},{\"id\":1,\"device_class\":\"hdd\",\"name\":\"osd.1\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0.0194854736328125,\"depth\":2,\"pool_weights\":{},\"exists\":1,\"status\":\"up\",\"reweight\":1,\"primary_affinity\":1}],\"stray\":[{\"id\":4,\"name\":\"osd.4\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0,\"depth\":0,\"exists\":1,\"status\":\"destroyed\",\"reweight\":0,\"primary_affinity\":1},{\"id\":5,\"name\":\"osd.5\",\"type\":\"osd\",\"type_id\":0,\"crush_weight\":0,\"depth\":0,\"exists\":1,\"status\":\"destroyed\",\"reweight\":0,\"primary_affinity\":1}]}\n","outs":""}],"has_failed":false,"id":"140222986406304","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
46+
{"failed":[],"finished":[{"command":"osd pool stats format=json","outb":"[{\"pool_name\":\"device_health_metrics\",\"pool_id\":1,\"recovery\":{},\"recovery_rate\":{},\"client_io_rate\":{\"read_bytes_sec\":6622518,\"write_bytes_sec\":6622518,\"read_op_per_sec\":11,\"write_op_per_sec\":11}},{\"pool_name\":\"elastic\",\"pool_id\":9,\"recovery\":{},\"recovery_rate\":{},\"client_io_rate\":{}}]","outs":""}],"has_failed":false,"id":"140222622573952","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
4747
- path: /request
4848
query_params:
4949
wait: 1
5050
methods: ['POST']
51-
request_body: '{"format":"json","prefix":"df"}'
51+
request_body: '{"format":"json","prefix":"status"}'
5252
responses:
5353
- status_code: 200
5454
body: |-
55-
{"failed":[],"finished":[{"command":"df format=json","outb":"{\"stats\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249,\"num_osds\":4,\"num_per_pool_osds\":4,\"num_per_pool_omap_osds\":4},\"stats_by_class\":{\"hdd\":{\"total_bytes\":85882568704,\"total_avail_bytes\":81199562752,\"total_used_bytes\":388038656,\"total_used_raw_bytes\":4683005952,\"total_used_raw_ratio\":0.054528012871742249}},\"pools\":[{\"name\":\"device_health_metrics\",\"id\":1,\"stats\":{\"stored\":2142673,\"objects\":4,\"kb_used\":6336,\"bytes_used\":6488064,\"percent_used\":8.4362458437681198e-05,\"max_avail\":25633505280}},{\"name\":\"elk\",\"id\":4,\"stats\":{\"stored\":1176572,\"objects\":3,\"kb_used\":3648,\"bytes_used\":3735552,\"percent_used\":4.8574063839623705e-05,\"max_avail\":25633505280}},{\"name\":\"elastic\",\"id\":9,\"stats\":{\"stored\":1349210,\"objects\":5,\"kb_used\":4224,\"bytes_used\":4325376,\"percent_used\":5.6243221479235217e-05,\"max_avail\":25633505280}}]}\n","outs":""}],"has_failed":false,"id":"140222991325696","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}
55+
{"failed":[],"finished":[{"command":"status format=json","outb":"{\"fsid\":\"72840c24-3a82-4e28-be87-cf9f905918fb\",\"health\":{\"status\":\"HEALTH_WARN\",\"checks\":{\"OSD_DOWN\":{\"severity\":\"HEALTH_WARN\",\"summary\":{\"message\":\"1 osds down\",\"count\":1},\"muted\":false},\"OSD_HOST_DOWN\":{\"severity\":\"HEALTH_WARN\",\"summary\":{\"message\":\"1 host (1 osds) down\",\"count\":1},\"muted\":false},\"PG_DEGRADED\":{\"severity\":\"HEALTH_WARN\",\"summary\":{\"message\":\"Degraded data redundancy: 9/36 objects degraded (25.000%), 9 pgs degraded, 65 pgs undersized\",\"count\":74},\"muted\":false}},\"mutes\":[]},\"election_epoch\":9,\"quorum\":[0],\"quorum_names\":[\"node01\"],\"quorum_age\":2395803,\"monmap\":{\"epoch\":2,\"min_mon_release_name\":\"octopus\",\"num_mons\":1},\"osdmap\":{\"epoch\":958,\"num_osds\":6,\"num_up_osds\":3,\"osd_up_since\":1674808261,\"num_in_osds\":4,\"osd_in_since\":1672393287,\"num_remapped_pgs\":0},\"pgmap\":{\"pgs_by_state\":[{\"state_name\":\"active+undersized\",\"count\":56},{\"state_name\":\"active+clean\",\"count\":31},{\"state_name\":\"active+undersized+degraded\",\"count\":9}],\"num_pgs\":96,\"num_pools\":3,\"num_objects\":12,\"data_bytes\":134217728,\"bytes_used\":3775201280,\"bytes_avail\":60636725248,\"bytes_total\":64411926528,\"degraded_objects\":9,\"degraded_total\":36,\"degraded_ratio\":0.25,\"read_bytes_sec\":0,\"write_bytes_sec\":0,\"read_op_per_sec\":50,\"write_op_per_sec\":55},\"fsmap\":{\"epoch\":1,\"by_rank\":[],\"up:standby\":0},\"mgrmap\":{\"available\":true,\"num_standbys\":0,\"modules\":[\"cli_api\",\"dashboard\",\"iostat\",\"prometheus\",\"restful\",\"rook\"],\"services\":{\"dashboard\":\"https://node01.cheftest.local:8443/\",\"prometheus\":\"http://node01.cheftest.local:9283/\",\"restful\":\"https://10.50.3.155:8003/\"}},\"servicemap\":{\"epoch\":9675,\"modified\":\"2023-02-06T06:30:50.727008+0000\",\"services\":{}},\"progress_events\":{}}\n","outs":""}],"has_failed":false,"id":"140314752352112","is_finished":true,"is_waiting":false,"running":[],"state":"success","waiting":[]}

packages/ceph/changelog.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# newer versions go on top
2+
- version: "0.7.0"
3+
changes:
4+
- description: Ceph integration package with "cluster_status" data stream.
5+
type: enhancement
6+
link: https://github.com/elastic/integrations/pull/5216
27
- version: "0.6.0"
38
changes:
49
- description: Ceph integration package with "cluster_disk" data stream.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{ "command": "status format=json", "outb": {"fsid":"71bb7c52-360c-410a-9847-717e1483a05f","health":{"checks":{"OSD_DOWN":{"severity":"HEALTH_WARN","summary":{"message":"1 osds down"}},"OSD_HOST_DOWN":{"severity":"HEALTH_WARN","summary":{"message":"1 host (1 osds) down"}},"PG_DEGRADED":{"severity":"HEALTH_WARN","summary":{"message":"Degraded data redundancy: 148/222 objects degraded (66.667%), 33 pgs degraded, 328 pgs undersized"}},"POOL_TOO_FEW_PGS":{"severity":"HEALTH_WARN","summary":{"message":"1 pools have too few placement groups"}},"POOL_TOO_MANY_PGS":{"severity":"HEALTH_WARN","summary":{"message":"2 pools have too many placement groups"}}},"status":"HEALTH_WARN"},"election_epoch":19,"quorum":[0],"quorum_names":["master"],"quorum_age":184768,"monmap":{"epoch":2,"min_mon_release_name":"14","num_mons":1},"osdmap":{"osdmap":{"epoch":114,"num_osds":4,"num_up_osds":1,"num_in_osds":2,"num_remapped_pgs":0}},"pgmap":{"pgs_by_state":[{"state_name":"active+undersized","count":295},{"state_name":"active+undersized+degraded","count":33}],"num_pgs":328,"num_pools":5,"num_objects":74,"data_bytes":145064680,"bytes_used":1239285760,"bytes_avail":20231356416,"bytes_total":21470642176,"degraded_objects":148,"degraded_total":222,"degraded_ratio":0.66666666666666652,"read_bytes_sec":1738015,"write_bytes_sec":2543437,"read_op_per_sec":3,"write_op_per_sec":4},"fsmap":{"epoch":1,"by_rank":[],"up:standby":0},"services":{"dashboard":"https://master.29053.local:8443/","restful":"https://master.29053.local:8003/"},"always_on_modules":{"nautilus":["balancer","crash","devicehealth","orchestrator_cli","progress","rbd_support","status","volumes"]}},"servicemap":{"epoch":6072,"modified":"2023-02-08 16:02:26.086357","services":{}},"progress_events":{}}, "outs": "" }

0 commit comments

Comments
 (0)