Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ func run() int {
}

var (
configFile = kingpin.Flag("config.file", "Alertmanager configuration file name.").Default("alertmanager.yml").String()
dataDir = kingpin.Flag("storage.path", "Base path for data storage.").Default("data/").String()
retention = kingpin.Flag("data.retention", "How long to keep data for.").Default("120h").Duration()
maintenanceInterval = kingpin.Flag("data.maintenance-interval", "Interval between garbage collection and snapshotting to disk of the silences and the notification logs.").Default("15m").Duration()
maxSilences = kingpin.Flag("silences.max-silences", "Maximum number of silences, including expired silences. If negative or zero, no limit is set.").Default("0").Int()
maxSilenceSizeBytes = kingpin.Flag("silences.max-silence-size-bytes", "Maximum silence size in bytes. If negative or zero, no limit is set.").Default("0").Int()
alertGCInterval = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
configFile = kingpin.Flag("config.file", "Alertmanager configuration file name.").Default("alertmanager.yml").String()
dataDir = kingpin.Flag("storage.path", "Base path for data storage.").Default("data/").String()
retention = kingpin.Flag("data.retention", "How long to keep data for.").Default("120h").Duration()
maintenanceInterval = kingpin.Flag("data.maintenance-interval", "Interval between garbage collection and snapshotting to disk of the silences and the notification logs.").Default("15m").Duration()
maxSilences = kingpin.Flag("silences.max-silences", "Maximum number of silences, including expired silences. If negative or zero, no limit is set.").Default("0").Int()
maxSilenceSizeBytes = kingpin.Flag("silences.max-silence-size-bytes", "Maximum silence size in bytes. If negative or zero, no limit is set.").Default("0").Int()
alertGCInterval = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
dispatchMaintenanceInterval = kingpin.Flag("dispatch.maintenance-interval", "Interval between maintenance of aggregation groups in the dispatcher.").Default("30s").Duration()

webConfig = webflag.AddFlags(kingpin.CommandLine, ":9093")
externalURL = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
Expand Down Expand Up @@ -492,7 +493,7 @@ func run() int {
silencer.Mutes(labels)
})

disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, nil, logger, dispMetrics)
disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, *dispatchMaintenanceInterval, nil, logger, dispMetrics)
routes.Walk(func(r *dispatch.Route) {
if r.RouteOpts.RepeatInterval > *retention {
configLogger.Warn(
Expand Down
27 changes: 15 additions & 12 deletions dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ type Dispatcher struct {
aggrGroupsPerRoute map[*Route]map[model.Fingerprint]*aggrGroup
aggrGroupsNum int

done chan struct{}
ctx context.Context
cancel func()
maintenanceInterval time.Duration
done chan struct{}
ctx context.Context
cancel func()

logger *slog.Logger
}
Expand All @@ -109,6 +110,7 @@ func NewDispatcher(
s notify.Stage,
mk types.GroupMarker,
to func(time.Duration) time.Duration,
mi time.Duration,
lim Limits,
l *slog.Logger,
m *DispatcherMetrics,
Expand All @@ -118,14 +120,15 @@ func NewDispatcher(
}

disp := &Dispatcher{
alerts: ap,
stage: s,
route: r,
marker: mk,
timeout: to,
logger: l.With("component", "dispatcher"),
metrics: m,
limits: lim,
alerts: ap,
stage: s,
route: r,
marker: mk,
timeout: to,
maintenanceInterval: mi,
logger: l.With("component", "dispatcher"),
metrics: m,
limits: lim,
}
return disp
}
Expand All @@ -146,7 +149,7 @@ func (d *Dispatcher) Run() {
}

func (d *Dispatcher) run(it provider.AlertIterator) {
maintenance := time.NewTicker(30 * time.Second)
maintenance := time.NewTicker(d.maintenanceInterval)
defer maintenance.Stop()

defer it.Close()
Expand Down
12 changes: 7 additions & 5 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
"github.com/prometheus/alertmanager/types"
)

const testMaintenanceInterval = 30 * time.Second

func TestAggrGroup(t *testing.T) {
lset := model.LabelSet{
"a": "v1",
Expand Down Expand Up @@ -399,7 +401,7 @@ route:

timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, testMaintenanceInterval, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
go dispatcher.Run()
defer dispatcher.Stop()

Expand Down Expand Up @@ -551,7 +553,7 @@ route:
recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
lim := limits{groups: 6}
m := NewDispatcherMetrics(true, prometheus.NewRegistry())
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, lim, logger, m)
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, testMaintenanceInterval, lim, logger, m)
go dispatcher.Run()
defer dispatcher.Stop()

Expand Down Expand Up @@ -669,7 +671,7 @@ func TestDispatcherRace(t *testing.T) {
defer alerts.Close()

timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
dispatcher := NewDispatcher(alerts, nil, nil, marker, timeout, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
dispatcher := NewDispatcher(alerts, nil, nil, marker, timeout, testMaintenanceInterval, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
go dispatcher.Run()
dispatcher.Stop()
}
Expand Down Expand Up @@ -697,7 +699,7 @@ func TestDispatcherRaceOnFirstAlertNotDeliveredWhenGroupWaitIsZero(t *testing.T)

timeout := func(d time.Duration) time.Duration { return d }
recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, testMaintenanceInterval, nil, logger, NewDispatcherMetrics(false, prometheus.NewRegistry()))
go dispatcher.Run()
defer dispatcher.Stop()

Expand Down Expand Up @@ -749,7 +751,7 @@ func TestDispatcher_DoMaintenance(t *testing.T) {
recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}

ctx := context.Background()
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, promslog.NewNopLogger(), NewDispatcherMetrics(false, r))
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, testMaintenanceInterval, nil, promslog.NewNopLogger(), NewDispatcherMetrics(false, r))
aggrGroups := make(map[*Route]map[model.Fingerprint]*aggrGroup)
aggrGroups[route] = make(map[model.Fingerprint]*aggrGroup)

Expand Down