Skip to content

Commit eeafb43

Browse files
committed
Add AM aggregation group metrics
Signed-off-by: Emmanuel Lodovice <lodovice@amazon.com>
1 parent 7812330 commit eeafb43

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

pkg/alertmanager/alertmanager_metrics.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ type alertmanagerMetrics struct {
6060
persistFailed *prometheus.Desc
6161

6262
notificationRateLimited *prometheus.Desc
63+
dispatcherAggregationGroups *prometheus.Desc
64+
dispatcherProcessingDuration *prometheus.Desc
6365
dispatcherAggregationGroupsLimitReached *prometheus.Desc
6466
insertAlertFailures *prometheus.Desc
6567
alertsLimiterAlertsCount *prometheus.Desc
@@ -217,6 +219,14 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
217219
"cortex_alertmanager_dispatcher_aggregation_group_limit_reached_total",
218220
"Number of times when dispatcher failed to create new aggregation group due to limit.",
219221
[]string{"user"}, nil),
222+
dispatcherAggregationGroups: prometheus.NewDesc(
223+
"cortex_alertmanager_dispatcher_aggregation_groups",
224+
"Number of active aggregation groups.",
225+
[]string{"user"}, nil),
226+
dispatcherProcessingDuration: prometheus.NewDesc(
227+
"cortex_alertmanager_dispatcher_alert_processing_duration_seconds",
228+
"Summary of latencies for the processing of alerts.",
229+
[]string{"user"}, nil),
220230
insertAlertFailures: prometheus.NewDesc(
221231
"cortex_alertmanager_alerts_insert_limited_total",
222232
"Total number of failures to store alert due to hitting alertmanager limits.",
@@ -279,6 +289,8 @@ func (m *alertmanagerMetrics) Describe(out chan<- *prometheus.Desc) {
279289
out <- m.persistTotal
280290
out <- m.persistFailed
281291
out <- m.notificationRateLimited
292+
out <- m.dispatcherAggregationGroups
293+
out <- m.dispatcherProcessingDuration
282294
out <- m.dispatcherAggregationGroupsLimitReached
283295
out <- m.insertAlertFailures
284296
out <- m.alertsLimiterAlertsCount
@@ -330,6 +342,8 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
330342
data.SendSumOfCounters(out, m.persistFailed, "alertmanager_state_persist_failed_total")
331343

332344
data.SendSumOfCountersPerUserWithLabels(out, m.notificationRateLimited, "alertmanager_notification_rate_limited_total", "integration")
345+
data.SendSumOfGaugesPerUser(out, m.dispatcherAggregationGroups, "alertmanager_dispatcher_aggregation_groups")
346+
data.SendSumOfSummariesPerUser(out, m.dispatcherProcessingDuration, "alertmanager_dispatcher_alert_processing_duration_seconds")
333347
data.SendSumOfCountersPerUser(out, m.dispatcherAggregationGroupsLimitReached, "alertmanager_dispatcher_aggregation_group_limit_reached_total")
334348
data.SendSumOfCountersPerUser(out, m.insertAlertFailures, "alertmanager_alerts_insert_limited_total")
335349
data.SendSumOfGaugesPerUser(out, m.alertsLimiterAlertsCount, "alertmanager_alerts_limiter_current_alerts")

pkg/alertmanager/alertmanager_metrics_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
6060
cortex_alertmanager_config_hash{user="user1"} 0
6161
cortex_alertmanager_config_hash{user="user2"} 0
6262
cortex_alertmanager_config_hash{user="user3"} 0
63+
# HELP cortex_alertmanager_dispatcher_alert_processing_duration_seconds Summary of latencies for the processing of alerts.
64+
# TYPE cortex_alertmanager_dispatcher_alert_processing_duration_seconds summary
65+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user1"} 0
66+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user1"} 0
67+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user2"} 0
68+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user2"} 0
69+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user3"} 0
70+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user3"} 0
6371
# HELP cortex_alertmanager_nflog_gc_duration_seconds Duration of the last notification log garbage collection cycle.
6472
# TYPE cortex_alertmanager_nflog_gc_duration_seconds summary
6573
cortex_alertmanager_nflog_gc_duration_seconds_sum 111
@@ -354,6 +362,14 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
354362
cortex_alertmanager_config_hash{user="user1"} 0
355363
cortex_alertmanager_config_hash{user="user2"} 0
356364
cortex_alertmanager_config_hash{user="user3"} 0
365+
# HELP cortex_alertmanager_dispatcher_alert_processing_duration_seconds Summary of latencies for the processing of alerts.
366+
# TYPE cortex_alertmanager_dispatcher_alert_processing_duration_seconds summary
367+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user1"} 0
368+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user1"} 0
369+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user2"} 0
370+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user2"} 0
371+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user3"} 0
372+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user3"} 0
357373
358374
# HELP cortex_alertmanager_nflog_gc_duration_seconds Duration of the last notification log garbage collection cycle.
359375
# TYPE cortex_alertmanager_nflog_gc_duration_seconds summary
@@ -649,6 +665,12 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
649665
# TYPE cortex_alertmanager_config_hash gauge
650666
cortex_alertmanager_config_hash{user="user1"} 0
651667
cortex_alertmanager_config_hash{user="user2"} 0
668+
# HELP cortex_alertmanager_dispatcher_alert_processing_duration_seconds Summary of latencies for the processing of alerts.
669+
# TYPE cortex_alertmanager_dispatcher_alert_processing_duration_seconds summary
670+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user1"} 0
671+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user1"} 0
672+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_sum{user="user2"} 0
673+
cortex_alertmanager_dispatcher_alert_processing_duration_seconds_count{user="user2"} 0
652674
653675
# HELP cortex_alertmanager_nflog_gc_duration_seconds Duration of the last notification log garbage collection cycle.
654676
# TYPE cortex_alertmanager_nflog_gc_duration_seconds summary

0 commit comments

Comments
 (0)