Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion api/v2/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/go-openapi/runtime"
"github.com/go-openapi/runtime/middleware"
"github.com/go-openapi/strfmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -84,7 +85,7 @@ var (
)

func newSilences(t *testing.T) *silence.Silences {
silences, err := silence.New(silence.Options{})
silences, err := silence.New(silence.Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)

return silences
Expand Down
30 changes: 11 additions & 19 deletions inhibit/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package inhibit

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

// InhibitorMetrics represents metrics associated to an inhibitor.
Expand All @@ -35,49 +36,52 @@ type InhibitorMetrics struct {

// NewInhibitorMetrics returns a new InhibitorMetrics.
func NewInhibitorMetrics(reg prometheus.Registerer) *InhibitorMetrics {
if reg == nil {
return nil
}
metrics := &InhibitorMetrics{
sourceAlertsCacheItems: prometheus.NewGauge(
sourceAlertsCacheItems: promauto.With(reg).NewGauge(
prometheus.GaugeOpts{
Name: "alertmanager_inhibitor_source_alerts_cache_items",
Help: "Number of source alerts cached in inhibition rules.",
},
),
sourceAlertsIndexItems: prometheus.NewGauge(
sourceAlertsIndexItems: promauto.With(reg).NewGauge(
prometheus.GaugeOpts{
Name: "alertmanager_inhibitor_source_alerts_index_items",
Help: "Number of source alerts indexed in inhibition rules.",
},
),
mutesDuration: prometheus.NewSummaryVec(
mutesDuration: promauto.With(reg).NewSummaryVec(
prometheus.SummaryOpts{
Name: "alertmanager_inhibitor_mutes_duration_seconds",
Help: "Summary of latencies for the muting of alerts by inhibition rules.",
},
[]string{"muted"},
),

ruleSourceAlertsCacheItems: prometheus.NewGaugeVec(
ruleSourceAlertsCacheItems: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "alertmanager_inhibit_rule_source_alerts_cache_items",
Help: "Number of source alerts cached in inhibition rules.",
},
[]string{"rule"},
),
ruleSourceAlertsIndexItems: prometheus.NewGaugeVec(
ruleSourceAlertsIndexItems: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "alertmanager_inhibit_rule_source_alerts_index_items",
Help: "Number of source alerts indexed in inhibition rules.",
},
[]string{"rule"},
),
ruleMatchesDuration: prometheus.NewSummaryVec(
ruleMatchesDuration: promauto.With(reg).NewSummaryVec(
prometheus.SummaryOpts{
Name: "alertmanager_inhibit_rule_matches_duration_seconds",
Help: "Summary of latencies for the matching of alerts by inhibition rules.",
},
[]string{"rule", "matched"},
),
ruleMutesDuration: prometheus.NewSummaryVec(
ruleMutesDuration: promauto.With(reg).NewSummaryVec(
prometheus.SummaryOpts{
Name: "alertmanager_inhibit_rule_mutes_duration_seconds",
Help: "Summary of latencies for the muting of alerts by inhibition rules.",
Expand All @@ -89,18 +93,6 @@ func NewInhibitorMetrics(reg prometheus.Registerer) *InhibitorMetrics {
metrics.mutesDurationMuted = metrics.mutesDuration.With(prometheus.Labels{"muted": "true"})
metrics.mutesDurationNotMuted = metrics.mutesDuration.With(prometheus.Labels{"muted": "false"})

if reg != nil {
reg.MustRegister(
metrics.sourceAlertsCacheItems,
metrics.sourceAlertsIndexItems,
metrics.mutesDuration,
metrics.ruleSourceAlertsCacheItems,
metrics.ruleSourceAlertsIndexItems,
metrics.ruleMatchesDuration,
metrics.ruleMutesDuration,
)
}

metrics.sourceAlertsCacheItems.Set(0)
metrics.sourceAlertsIndexItems.Set(0)

Expand Down
2 changes: 1 addition & 1 deletion notify/notify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ func TestMuteStage(t *testing.T) {
}

func TestMuteStageWithSilences(t *testing.T) {
silences, err := silence.New(silence.Options{Retention: time.Hour})
silences, err := silence.New(silence.Options{Metrics: prometheus.NewRegistry(), Retention: time.Hour})
if err != nil {
t.Fatal(err)
}
Expand Down
41 changes: 14 additions & 27 deletions silence/silence.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
uuid "github.com/gofrs/uuid"
"github.com/matttproud/golang_protobuf_extensions/pbutil"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"

Expand Down Expand Up @@ -251,29 +252,29 @@ func newSilenceMetricByState(s *Silences, st types.SilenceState) prometheus.Gaug
func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
m := &metrics{}

m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
m.gcDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
Name: "alertmanager_silences_gc_duration_seconds",
Help: "Duration of the last silence garbage collection cycle.",
Objectives: map[float64]float64{},
})
m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
m.snapshotDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
Name: "alertmanager_silences_snapshot_duration_seconds",
Help: "Duration of the last silence snapshot.",
Objectives: map[float64]float64{},
})
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
m.snapshotSize = promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_silences_snapshot_size_bytes",
Help: "Size of the last silence snapshot in bytes.",
})
m.maintenanceTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.maintenanceTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_maintenance_total",
Help: "How many maintenances were executed for silences.",
})
m.maintenanceErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.maintenanceErrorsTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_maintenance_errors_total",
Help: "How many maintenances were executed for silences that failed.",
})
matcherCompileErrorsTotal := prometheus.NewCounterVec(
matcherCompileErrorsTotal := promauto.With(r).NewCounterVec(
prometheus.CounterOpts{
Name: "alertmanager_silences_matcher_compile_errors_total",
Help: "How many silence matcher compilations failed.",
Expand All @@ -282,23 +283,23 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
)
m.matcherCompileCacheSilenceErrorsTotal = matcherCompileErrorsTotal.WithLabelValues("cache_silence")
m.matcherCompileLoadSnapshotErrorsTotal = matcherCompileErrorsTotal.WithLabelValues("load_snapshot")
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.queriesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_queries_total",
Help: "How many silence queries were received.",
})
m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.queryErrorsTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_query_errors_total",
Help: "How many silence received queries did not succeed.",
})
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
m.queryDuration = promauto.With(r).NewHistogram(prometheus.HistogramOpts{
Name: "alertmanager_silences_query_duration_seconds",
Help: "Duration of silence query evaluation.",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.propagatedMessagesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_gossip_messages_propagated_total",
Help: "Number of received gossip messages that have been further gossiped.",
})
Expand All @@ -308,23 +309,6 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
m.silencesExpired = newSilenceMetricByState(s, types.SilenceStateExpired)
}

if r != nil {
r.MustRegister(
m.gcDuration,
m.snapshotDuration,
m.snapshotSize,
m.queriesTotal,
m.queryErrorsTotal,
m.queryDuration,
m.silencesActive,
m.silencesPending,
m.silencesExpired,
m.propagatedMessagesTotal,
m.maintenanceTotal,
m.maintenanceErrorsTotal,
matcherCompileErrorsTotal,
)
}
return m
}

Expand Down Expand Up @@ -368,6 +352,9 @@ func New(o Options) (*Silences, error) {
broadcast: func([]byte) {},
st: state{},
}
if o.Metrics == nil {
return nil, errors.New("Options.Metrics is nil")
}
s.metrics = newMetrics(o.Metrics, s)

if o.Logger != nil {
Expand Down
30 changes: 19 additions & 11 deletions silence/silence_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,19 @@ func TestOptionsValidate(t *testing.T) {
}{
{
options: &Options{
Metrics: prometheus.NewRegistry(),
SnapshotReader: &bytes.Buffer{},
},
},
{
options: &Options{
Metrics: prometheus.NewRegistry(),
SnapshotFile: "test.bkp",
},
},
{
options: &Options{
Metrics: prometheus.NewRegistry(),
SnapshotFile: "test bkp",
SnapshotReader: &bytes.Buffer{},
},
Expand All @@ -86,7 +89,7 @@ func TestOptionsValidate(t *testing.T) {

func TestSilenceGCOverTime(t *testing.T) {
t.Run("GC does not remove active silences", func(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)
s.clock = quartz.NewMock(t)
now := s.nowUTC()
Expand All @@ -109,7 +112,7 @@ func TestSilenceGCOverTime(t *testing.T) {
})

t.Run("GC does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)
clock := quartz.NewMock(t)
s.clock = clock
Expand All @@ -136,7 +139,7 @@ func TestSilenceGCOverTime(t *testing.T) {
})

t.Run("replacing a silences does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)
clock := quartz.NewMock(t)
s.clock = clock
Expand Down Expand Up @@ -175,7 +178,7 @@ func TestSilenceGCOverTime(t *testing.T) {
// This test checks for a memory leak that occurred in the matcher cache when
// updating an existing silence.
t.Run("updating a silence does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)
clock := quartz.NewMock(t)
s.clock = clock
Expand Down Expand Up @@ -360,6 +363,7 @@ alertmanager_silences_maintenance_total 2

func TestSilencesSetSilence(t *testing.T) {
s, err := New(Options{
Metrics: prometheus.NewRegistry(),
Retention: time.Minute,
})
require.NoError(t, err)
Expand Down Expand Up @@ -411,6 +415,7 @@ func TestSilencesSetSilence(t *testing.T) {

func TestSilenceSet(t *testing.T) {
s, err := New(Options{
Metrics: prometheus.NewRegistry(),
Retention: time.Hour,
})
require.NoError(t, err)
Expand Down Expand Up @@ -608,6 +613,7 @@ func TestSilenceLimits(t *testing.T) {
MaxSilences: func() int { return 1 },
MaxSilenceSizeBytes: func() int { return 2 << 11 }, // 4KB
},
Metrics: prometheus.NewRegistry(),
})
require.NoError(t, err)

Expand Down Expand Up @@ -725,7 +731,8 @@ func TestSilenceLimits(t *testing.T) {

func TestSilenceNoLimits(t *testing.T) {
s, err := New(Options{
Limits: Limits{},
Limits: Limits{},
Metrics: prometheus.NewRegistry(),
})
require.NoError(t, err)

Expand All @@ -742,6 +749,7 @@ func TestSilenceNoLimits(t *testing.T) {

func TestSetActiveSilence(t *testing.T) {
s, err := New(Options{
Metrics: prometheus.NewRegistry(),
Retention: time.Hour,
})
require.NoError(t, err)
Expand Down Expand Up @@ -791,7 +799,7 @@ func TestSetActiveSilence(t *testing.T) {
}

func TestSilencesSetFail(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)

clock := quartz.NewMock(t)
Expand Down Expand Up @@ -948,7 +956,7 @@ func TestQMatches(t *testing.T) {
}

func TestSilencesQuery(t *testing.T) {
s, err := New(Options{})
s, err := New(Options{Metrics: prometheus.NewRegistry()})
require.NoError(t, err)

s.st = state{
Expand Down Expand Up @@ -1158,7 +1166,7 @@ func TestSilenceCanUpdate(t *testing.T) {
}

func TestSilenceExpire(t *testing.T) {
s, err := New(Options{Retention: time.Hour})
s, err := New(Options{Metrics: prometheus.NewRegistry(), Retention: time.Hour})
require.NoError(t, err)

clock := quartz.NewMock(t)
Expand Down Expand Up @@ -1255,7 +1263,7 @@ func TestSilenceExpire(t *testing.T) {
// retention time, a silence explicitly set to expired will also immediately
// expire from the silence storage.
func TestSilenceExpireWithZeroRetention(t *testing.T) {
s, err := New(Options{Retention: 0})
s, err := New(Options{Metrics: prometheus.NewRegistry(), Retention: 0})
require.NoError(t, err)

clock := quartz.NewMock(t)
Expand Down Expand Up @@ -1330,7 +1338,7 @@ func TestSilenceExpireWithZeroRetention(t *testing.T) {

// This test checks that invalid silences can be expired.
func TestSilenceExpireInvalid(t *testing.T) {
s, err := New(Options{Retention: time.Hour})
s, err := New(Options{Metrics: prometheus.NewRegistry(), Retention: time.Hour})
require.NoError(t, err)

clock := quartz.NewMock(t)
Expand Down Expand Up @@ -1369,7 +1377,7 @@ func TestSilenceExpireInvalid(t *testing.T) {
}

func TestSilencer(t *testing.T) {
ss, err := New(Options{Retention: time.Hour})
ss, err := New(Options{Metrics: prometheus.NewRegistry(), Retention: time.Hour})
require.NoError(t, err)

clock := quartz.NewMock(t)
Expand Down
Loading