From 36a0a300703992c763f6403df8ca98aa786a4658 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Tue, 8 Oct 2024 12:26:14 +0200 Subject: [PATCH] fixup post review --- monitoring/mongodb/alerts.test.yaml | 43 +++++++++++++++-------------- monitoring/mongodb/alerts.yaml | 4 +-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/monitoring/mongodb/alerts.test.yaml b/monitoring/mongodb/alerts.test.yaml index c6d3bf337..1a6c3423c 100644 --- a/monitoring/mongodb/alerts.test.yaml +++ b/monitoring/mongodb/alerts.test.yaml @@ -329,28 +329,31 @@ tests: description: "MongoDB pod `data-db-mongodb-sharded-mongos-0` has been in the 'STARTUP2' state for more than 1 hour. Please ensure that the instance is running properly." summary: MongoDB node in STARTUP2 state for too long - - name: MongoDbRSNotSynced interval: 1m input_series: - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 1x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2x10 - - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} - values: 2 _ _ _ _ _ _ _ _ _ + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: _x10 8x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: _x10 8x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-0"} + values: 1x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-1"} + values: 2x10 stale + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="shard0-data-2"} + values: 2x10 stale alert_rule_test: - alertname: MongoDbRSNotSynced @@ -360,5 +363,5 @@ tests: severity: warning rs_nm: data-db-mongodb-sharded-shard-0 exp_annotations: - description: "MongoDB replica set `data-db-mongodb-sharded-shard-0` is not in the expected state. It currently has `1` SECONDARY members instead of the expected number. Please ensure that all instances are running properly." + description: "MongoDB replica set `data-db-mongodb-sharded-shard-0` is not in the expected state. It does not have the expected number of SECONDARY members. Please ensure that all instances are running properly." summary: MongoDB replica set out of sync diff --git a/monitoring/mongodb/alerts.yaml b/monitoring/mongodb/alerts.yaml index f5acde22f..cdbe2b3c8 100644 --- a/monitoring/mongodb/alerts.yaml +++ b/monitoring/mongodb/alerts.yaml @@ -183,10 +183,10 @@ groups: - alert: MongoDbRSNotSynced expr: | - floor(avg by(rs_nm)(count by (rs_nm, pod)(mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}))) != (${replicas} - 1) + group by(rs_nm) ( count by(rs_nm, pod) (mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}) != (${replicas} - 1) ) for: 10m labels: severity: warning annotations: - description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. It currently has `{{ $value }}` SECONDARY members instead of the expected number. Please ensure that all instances are running properly." + description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. It does not have the expected number of SECONDARY members. Please ensure that all instances are running properly." summary: MongoDB replica set out of sync