Skip to content

Commit 32e49b3

Browse files
committed
Add for tenant silences limit
Signed-off-by: SungJin1212 <tjdwls1201@gmail.com>
1 parent fc6c40d commit 32e49b3

File tree

7 files changed

+129
-4
lines changed

7 files changed

+129
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* [FEATURE] Query Frontend: Add dynamic interval size for query splitting. This is enabled by configuring experimental flags `querier.max-shards-per-query` and/or `querier.max-fetched-data-duration-per-query`. The split interval size is dynamically increased to maintain a number of shards and total duration fetched below the configured values. #6458
66
* [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526
77
* [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590
8+
* [ENHANCEMENT] Alertmanager: Add new limits `-alertmanager.max-silences-count` and `-alertmanager.max-silences-size-bytes` for limiting silences per tenant.
89
* [ENHANCEMENT] Add `compactor.auto-forget-delay` for compactor to auto forget compactors after X minutes without heartbeat. #6533
910
* [ENHANCEMENT] StoreGateway: Emit more histogram buckets on the `cortex_querier_storegateway_refetches_per_query` metric. #6570
1011
* [ENHANCEMENT] Querier: Apply bytes limiter to LabelNames and LabelValuesForLabelNames. #6568

docs/configuration/config-file-reference.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3751,6 +3751,15 @@ query_rejection:
37513751
# CLI flag: -alertmanager.max-alerts-size-bytes
37523752
[alertmanager_max_alerts_size_bytes: <int> | default = 0]
37533753

3754+
# Maximum number of silences that a single user can have, including expired
3755+
# silences. 0 = no limit.
3756+
# CLI flag: -alertmanager.max-silences-count
3757+
[alertmanager_max_silences_count: <int> | default = 0]
3758+
3759+
# Maximum size of individual silences that a single user can have. 0 = no limit.
3760+
# CLI flag: -alertmanager.max-silences-size-bytes
3761+
[alertmanager_max_silences_size_bytes: <int> | default = 0]
3762+
37543763
# list of rule groups to disable
37553764
[disabled_rule_groups: <list of DisabledRuleGroup> | default = []]
37563765
```

pkg/alertmanager/alertmanager.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,16 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
228228
am.groupMarker = memMarker
229229

230230
silencesFile := filepath.Join(cfg.TenantDataDir, silencesSnapshot)
231+
231232
am.silences, err = silence.New(silence.Options{
232233
SnapshotFile: silencesFile,
233234
Retention: cfg.Retention,
234-
Logger: util_log.GoKitLogToSlog(log.With(am.logger, "component", "silences")),
235-
Metrics: am.registry,
235+
Limits: silence.Limits{
236+
MaxSilences: func() int { return cfg.Limits.AlertmanagerMaxSilencesCount(cfg.UserID) },
237+
MaxSilenceSizeBytes: func() int { return cfg.Limits.AlertmanagerMaxSilenceSizeBytes(cfg.UserID) },
238+
},
239+
Logger: util_log.GoKitLogToSlog(log.With(am.logger, "component", "silences")),
240+
Metrics: am.registry,
236241
})
237242
if err != nil {
238243
return nil, fmt.Errorf("failed to create silences: %v", err)

pkg/alertmanager/alertmanager_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/go-kit/log"
1111
"github.com/prometheus/alertmanager/config"
12+
"github.com/prometheus/alertmanager/silence/silencepb"
1213
"github.com/prometheus/alertmanager/types"
1314
"github.com/prometheus/client_golang/prometheus"
1415
"github.com/prometheus/client_golang/prometheus/testutil"
@@ -19,6 +20,75 @@ import (
1920
"github.com/cortexproject/cortex/pkg/util/test"
2021
)
2122

23+
func TestSilencesLimits(t *testing.T) {
24+
user := "test"
25+
26+
reg := prometheus.NewPedanticRegistry()
27+
maxSilencesCount := 3
28+
maxSilencesSizeBytes := 500
29+
am, err := New(&Config{
30+
UserID: user,
31+
Logger: log.NewNopLogger(),
32+
Limits: &mockAlertManagerLimits{maxSilencesCount: maxSilencesCount, maxSilencesSizeBytes: maxSilencesSizeBytes},
33+
TenantDataDir: t.TempDir(),
34+
ExternalURL: &url.URL{Path: "/am"},
35+
ShardingEnabled: false,
36+
GCInterval: 30 * time.Minute,
37+
}, reg)
38+
require.NoError(t, err)
39+
defer am.StopAndWait()
40+
41+
t.Run("Test maxSilencesCount", func(t *testing.T) {
42+
createSilences := func() *silencepb.Silence {
43+
return &silencepb.Silence{
44+
Matchers: []*silencepb.Matcher{{Name: "name", Pattern: "pattern"}},
45+
StartsAt: time.Now(),
46+
EndsAt: time.Now().Add(time.Minute * 30),
47+
}
48+
}
49+
50+
// create silences up to maxSilencesCount
51+
for i := 0; i < maxSilencesCount; i++ {
52+
err := am.silences.Set(createSilences())
53+
require.NoError(t, err)
54+
}
55+
56+
// exceeds limit
57+
err = am.silences.Set(createSilences())
58+
require.Error(t, err)
59+
require.Equal(t, fmt.Sprintf("exceeded maximum number of silences: %d (limit: %d)", maxSilencesCount, maxSilencesCount), err.Error())
60+
61+
// expire whole silences
62+
silences, _, err := am.silences.Query()
63+
require.NoError(t, err)
64+
for _, s := range silences {
65+
err := am.silences.Expire(s.Id)
66+
require.NoError(t, err)
67+
}
68+
69+
// check maxSilencesCount includes expired silences
70+
err = am.silences.Set(createSilences())
71+
require.Error(t, err)
72+
require.Equal(t, fmt.Sprintf("exceeded maximum number of silences: %d (limit: %d)", maxSilencesCount, maxSilencesCount), err.Error())
73+
74+
// GC
75+
n, err := am.silences.GC()
76+
require.NoError(t, err)
77+
require.Equal(t, maxSilencesCount, n)
78+
})
79+
t.Run("Test maxSilencesSizeBytes", func(t *testing.T) {
80+
bigSilences := &silencepb.Silence{
81+
Matchers: []*silencepb.Matcher{{Name: strings.Repeat("a", maxSilencesSizeBytes/2+1), Pattern: strings.Repeat("b", maxSilencesSizeBytes/2+1)}},
82+
StartsAt: time.Now(),
83+
EndsAt: time.Now().Add(time.Minute * 30),
84+
}
85+
86+
err = am.silences.Set(bigSilences)
87+
require.Error(t, err)
88+
require.True(t, strings.Contains(err.Error(), "silence exceeded maximum size"))
89+
})
90+
}
91+
2292
func TestDispatcherGroupLimits(t *testing.T) {
2393
for name, tc := range map[string]struct {
2494
groups int

pkg/alertmanager/multitenant.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ type Limits interface {
228228
// AlertmanagerMaxAlertsSizeBytes returns total max size of alerts that tenant can have active at the same time. 0 = no limit.
229229
// Size of the alert is computed from alert labels, annotations and generator URL.
230230
AlertmanagerMaxAlertsSizeBytes(tenant string) int
231+
232+
// AlertmanagerMaxSilencesCount returns max number of silences that tenant can have, including expired silences. 0 = no limit.
233+
AlertmanagerMaxSilencesCount(tenant string) int
234+
235+
// AlertmanagerMaxSilenceSizeBytes returns the maximum size of an individual silence. 0 = no limit.
236+
AlertmanagerMaxSilenceSizeBytes(tenant string) int
231237
}
232238

233239
// A MultitenantAlertmanager manages Alertmanager instances for multiple

pkg/alertmanager/multitenant_test.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,8 +1777,14 @@ func TestAlertmanager_StateReplicationWithSharding(t *testing.T) {
17771777
amConfig.ShardingEnabled = true
17781778
}
17791779

1780+
var limits validation.Limits
1781+
flagext.DefaultValues(&limits)
1782+
1783+
overrides, err := validation.NewOverrides(limits, nil)
1784+
require.NoError(t, err)
1785+
17801786
reg := prometheus.NewPedanticRegistry()
1781-
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
1787+
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, overrides, log.NewNopLogger(), reg)
17821788
require.NoError(t, err)
17831789
defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
17841790

@@ -1969,8 +1975,14 @@ func TestAlertmanager_StateReplicationWithSharding_InitialSyncFromPeers(t *testi
19691975

19701976
amConfig.ShardingEnabled = true
19711977

1978+
var limits validation.Limits
1979+
flagext.DefaultValues(&limits)
1980+
1981+
overrides, err := validation.NewOverrides(limits, nil)
1982+
require.NoError(t, err)
1983+
19721984
reg := prometheus.NewPedanticRegistry()
1973-
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
1985+
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, overrides, log.NewNopLogger(), reg)
19741986
require.NoError(t, err)
19751987

19761988
clientPool.setServer(amConfig.ShardingRing.InstanceAddr+":0", am)
@@ -2285,6 +2297,8 @@ type mockAlertManagerLimits struct {
22852297
maxDispatcherAggregationGroups int
22862298
maxAlertsCount int
22872299
maxAlertsSizeBytes int
2300+
maxSilencesCount int
2301+
maxSilencesSizeBytes int
22882302
}
22892303

22902304
func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
@@ -2326,3 +2340,11 @@ func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsCount(_ string) int {
23262340
func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsSizeBytes(_ string) int {
23272341
return m.maxAlertsSizeBytes
23282342
}
2343+
2344+
func (m *mockAlertManagerLimits) AlertmanagerMaxSilencesCount(_ string) int {
2345+
return m.maxSilencesCount
2346+
}
2347+
2348+
func (m *mockAlertManagerLimits) AlertmanagerMaxSilenceSizeBytes(_ string) int {
2349+
return m.maxSilencesSizeBytes
2350+
}

pkg/util/validation/limits.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ type Limits struct {
218218
AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"`
219219
AlertmanagerMaxAlertsCount int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"`
220220
AlertmanagerMaxAlertsSizeBytes int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"`
221+
AlertmanagerMaxSilencesCount int `yaml:"alertmanager_max_silences_count" json:"alertmanager_max_silences_count"`
222+
AlertmanagerMaxSilencesSizeBytes int `yaml:"alertmanager_max_silences_size_bytes" json:"alertmanager_max_silences_size_bytes"`
221223
DisabledRuleGroups DisabledRuleGroups `yaml:"disabled_rule_groups" json:"disabled_rule_groups" doc:"nocli|description=list of rule groups to disable"`
222224
}
223225

@@ -310,6 +312,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
310312
f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.")
311313
f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
312314
f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
315+
f.IntVar(&l.AlertmanagerMaxSilencesCount, "alertmanager.max-silences-count", 0, "Maximum number of silences that a single user can have, including expired silences. 0 = no limit.")
316+
f.IntVar(&l.AlertmanagerMaxSilencesSizeBytes, "alertmanager.max-silences-size-bytes", 0, "Maximum size of individual silences that a single user can have. 0 = no limit.")
313317
}
314318

315319
// Validate the limits config and returns an error if the validation
@@ -971,6 +975,14 @@ func (o *Overrides) AlertmanagerMaxAlertsSizeBytes(userID string) int {
971975
return o.GetOverridesForUser(userID).AlertmanagerMaxAlertsSizeBytes
972976
}
973977

978+
func (o *Overrides) AlertmanagerMaxSilencesCount(userID string) int {
979+
return o.GetOverridesForUser(userID).AlertmanagerMaxSilencesCount
980+
}
981+
982+
func (o *Overrides) AlertmanagerMaxSilenceSizeBytes(userID string) int {
983+
return o.GetOverridesForUser(userID).AlertmanagerMaxSilencesSizeBytes
984+
}
985+
974986
func (o *Overrides) DisabledRuleGroups(userID string) DisabledRuleGroups {
975987
if o.tenantLimits != nil {
976988
l := o.tenantLimits.ByUserID(userID)

0 commit comments

Comments
 (0)