Skip to content

Commit

Permalink
Add missing rule manager metrics
Browse files Browse the repository at this point in the history
In the 2.10 base upgrade the API had changed for the registry and was
missed. While fixing that I've gone ahead and added the new CLI flags
for outageTolerance, gracePeriod, and resendDelay
  • Loading branch information
jacksontj committed Sep 27, 2019
1 parent f1ec778 commit d09f33b
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions cmd/promxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,11 @@ type cliOpts struct {
QueryMaxSamples int `long:"query.max-samples" description:"Maximum number of samples a single query can load into memory. Note that queries will fail if they would load more samples than this into memory, so this also limits the number of samples a query can return." default:"50000000"`
QueryLookbackDelta time.Duration `long:"query.lookback-delta" description:"The maximum lookback duration for retrieving metrics during expression evaluations." default:"5m"`

NotificationQueueCapacity int `long:"alertmanager.notification-queue-capacity" description:"The capacity of the queue for pending alert manager notifications." default:"10000"`
AccessLogDestination string `long:"access-log-destination" description:"where to log access logs, options (none, stderr, stdout)" default:"stdout"`
NotificationQueueCapacity int `long:"alertmanager.notification-queue-capacity" description:"The capacity of the queue for pending alert manager notifications." default:"10000"`
AccessLogDestination string `long:"access-log-destination" description:"where to log access logs, options (none, stderr, stdout)" default:"stdout"`
ForOutageTolerance time.Duration `long:"rules.alert.for-outage-tolerance" description:"Max time to tolerate prometheus outage for restoring for state of alert." default:"1h"`
ForGracePeriod time.Duration `long:"rules.alert.for-grace-period" description:"Minimum duration between alert and restored for state. This is maintained only for alerts with configured for time greater than grace period." default:"10m"`
ResendDelay time.Duration `long:"rules.alert.resend-delay" description:"Minimum amount of time to wait before resending an alert to Alertmanager." default:"1m"`

ShutdownDelay time.Duration `long:"http.shutdown-delay" description:"time to wait before shutting down the http server, this allows for a grace period for upstreams (e.g. LoadBalancers) to discover the new stopping status through healthchecks" default:"10s"`
ShutdownTimeout time.Duration `long:"http.shutdown-timeout" description:"max time to wait for a graceful shutdown of the HTTP server" default:"60s"`
Expand Down Expand Up @@ -235,13 +238,17 @@ func main() {
}()

ruleManager := rules.NewManager(&rules.ManagerOptions{
Context: ctx, // base context for all background tasks
ExternalURL: externalUrl, // URL listed as URL for "who fired this alert"
QueryFunc: rules.EngineQueryFunc(engine, proxyStorage),
NotifyFunc: sendAlerts(notifierManager, externalUrl.String()),
TSDB: noop.NewNoopStorage(), // TODO: use remote_read?
Appendable: proxyStorage,
Logger: logger,
Context: ctx, // base context for all background tasks
ExternalURL: externalUrl, // URL listed as URL for "who fired this alert"
QueryFunc: rules.EngineQueryFunc(engine, proxyStorage),
NotifyFunc: sendAlerts(notifierManager, externalUrl.String()),
TSDB: noop.NewNoopStorage(), // TODO: use remote_read?
Appendable: proxyStorage,
Logger: logger,
Registerer: prometheus.DefaultRegisterer,
OutageTolerance: opts.ForOutageTolerance,
ForGracePeriod: opts.ForGracePeriod,
ResendDelay: opts.ResendDelay,
})
go ruleManager.Run()

Expand Down

0 comments on commit d09f33b

Please sign in to comment.