@@ -215,18 +215,19 @@ type Limits struct {
215215type MaintenanceFunc func () (int64 , error )
216216
217217type metrics struct {
218- gcDuration prometheus.Summary
219- snapshotDuration prometheus.Summary
220- snapshotSize prometheus.Gauge
221- queriesTotal prometheus.Counter
222- queryErrorsTotal prometheus.Counter
223- queryDuration prometheus.Histogram
224- silencesActive prometheus.GaugeFunc
225- silencesPending prometheus.GaugeFunc
226- silencesExpired prometheus.GaugeFunc
227- propagatedMessagesTotal prometheus.Counter
228- maintenanceTotal prometheus.Counter
229- maintenanceErrorsTotal prometheus.Counter
218+ gcDuration prometheus.Summary
219+ snapshotDuration prometheus.Summary
220+ snapshotSize prometheus.Gauge
221+ queriesTotal prometheus.Counter
222+ queryErrorsTotal prometheus.Counter
223+ queryDuration prometheus.Histogram
224+ silencesActive prometheus.GaugeFunc
225+ silencesPending prometheus.GaugeFunc
226+ silencesExpired prometheus.GaugeFunc
227+ propagatedMessagesTotal prometheus.Counter
228+ maintenanceTotal prometheus.Counter
229+ maintenanceErrorsTotal prometheus.Counter
230+ matcherCompileErrorsTotal * prometheus.CounterVec
230231}
231232
232233func newSilenceMetricByState (s * Silences , st types.SilenceState ) prometheus.GaugeFunc {
@@ -271,6 +272,13 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
271272 Name : "alertmanager_silences_maintenance_errors_total" ,
272273 Help : "How many maintenances were executed for silences that failed." ,
273274 })
275+ m .matcherCompileErrorsTotal = prometheus .NewCounterVec (
276+ prometheus.CounterOpts {
277+ Name : "alertmanager_silences_matcher_compile_errors_total" ,
278+ Help : "How many silence matcher compilations failed." ,
279+ },
280+ []string {"stage" },
281+ )
274282 m .queriesTotal = prometheus .NewCounter (prometheus.CounterOpts {
275283 Name : "alertmanager_silences_queries_total" ,
276284 Help : "How many silence queries were received." ,
@@ -311,6 +319,7 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
311319 m .propagatedMessagesTotal ,
312320 m .maintenanceTotal ,
313321 m .maintenanceErrorsTotal ,
322+ m .matcherCompileErrorsTotal ,
314323 )
315324 }
316325 return m
@@ -562,7 +571,11 @@ func (s *Silences) checkSizeLimits(msil *pb.MeshSilence) error {
562571
563572func (s * Silences ) silenceAdded (sil * pb.Silence ) {
564573 s .version ++
565- s .mc .add (sil )
574+ _ , err := s .mc .add (sil )
575+ if err != nil {
576+ s .metrics .matcherCompileErrorsTotal .WithLabelValues ("silence_added" ).Inc ()
577+ s .logger .Error ("Failed to compile silence matchers" , "silence_id" , sil .Id , "err" , err )
578+ }
566579}
567580
568581func (s * Silences ) getSilence (id string ) (* pb.Silence , bool ) {
@@ -886,8 +899,13 @@ func (s *Silences) loadSnapshot(r io.Reader) error {
886899 e .Silence .CreatedBy = e .Silence .Comments [0 ].Author
887900 e .Silence .Comments = nil
888901 }
889- st [e .Silence .Id ] = e
890- s .mc .add (e .Silence )
902+ // Add to matcher cache, and only if successful, to the new state.
903+ if _ , err := s .mc .add (e .Silence ); err != nil {
904+ s .metrics .matcherCompileErrorsTotal .WithLabelValues ("load_snapshot" ).Inc ()
905+ s .logger .Error ("Failed to compile silence matchers during snapshot load" , "silence_id" , e .Silence .Id , "err" , err )
906+ } else {
907+ st [e .Silence .Id ] = e
908+ }
891909 }
892910 s .mtx .Lock ()
893911 s .st = st
0 commit comments