Skip to content

Commit 9bd180c

Browse files
committed
httpmonitorscanner: add retry scanner
1 parent a4d72d8 commit 9bd180c

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

cmd/alertmanager/httpmonitorscanner.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func httpMonitorScanAndAlertFailures(ctx context.Context, app *amstate.App) erro
2525
failures := scanMonitors(
2626
ctx,
2727
app.State.HttpMonitors(),
28-
newScanner(),
28+
newRetryScanner(newScanner()),
2929
logex.Prefix("httpscanner", app.Logger))
3030

3131
// convert monitor failures into alerts
@@ -56,7 +56,7 @@ func scanMonitors(
5656
failedMu := sync.Mutex{}
5757

5858
checkOne := func(monitor amstate.HttpMonitor) {
59-
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
59+
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
6060
defer cancel()
6161

6262
started := time.Now()
@@ -101,6 +101,31 @@ type HttpMonitorScanner interface {
101101
Scan(context.Context, amstate.HttpMonitor) error
102102
}
103103

104+
type retryScanner struct {
105+
actualScanner HttpMonitorScanner
106+
}
107+
108+
// retries once, but only if it looks retryable
109+
func newRetryScanner(actual HttpMonitorScanner) HttpMonitorScanner {
110+
return &retryScanner{actual}
111+
}
112+
113+
func (r *retryScanner) Scan(ctx context.Context, monitor amstate.HttpMonitor) error {
114+
firstTryCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
115+
defer cancel()
116+
117+
if err := r.actualScanner.Scan(firstTryCtx, monitor); err != nil {
118+
if err != context.DeadlineExceeded { // non-retryable error
119+
return err
120+
}
121+
122+
// now use the longer context
123+
return r.actualScanner.Scan(ctx, monitor)
124+
}
125+
126+
return nil
127+
}
128+
104129
type scanner struct {
105130
noRedirects *http.Client
106131
}

0 commit comments

Comments
 (0)