@@ -25,7 +25,7 @@ func httpMonitorScanAndAlertFailures(ctx context.Context, app *amstate.App) erro
25
25
failures := scanMonitors (
26
26
ctx ,
27
27
app .State .HttpMonitors (),
28
- newScanner (),
28
+ newRetryScanner ( newScanner () ),
29
29
logex .Prefix ("httpscanner" , app .Logger ))
30
30
31
31
// convert monitor failures into alerts
@@ -56,7 +56,7 @@ func scanMonitors(
56
56
failedMu := sync.Mutex {}
57
57
58
58
checkOne := func (monitor amstate.HttpMonitor ) {
59
- ctx , cancel := context .WithTimeout (ctx , 10 * time .Second )
59
+ ctx , cancel := context .WithTimeout (ctx , 30 * time .Second )
60
60
defer cancel ()
61
61
62
62
started := time .Now ()
@@ -101,6 +101,31 @@ type HttpMonitorScanner interface {
101
101
Scan (context.Context , amstate.HttpMonitor ) error
102
102
}
103
103
104
+ type retryScanner struct {
105
+ actualScanner HttpMonitorScanner
106
+ }
107
+
108
+ // retries once, but only if it looks retryable
109
+ func newRetryScanner (actual HttpMonitorScanner ) HttpMonitorScanner {
110
+ return & retryScanner {actual }
111
+ }
112
+
113
+ func (r * retryScanner ) Scan (ctx context.Context , monitor amstate.HttpMonitor ) error {
114
+ firstTryCtx , cancel := context .WithTimeout (ctx , 15 * time .Second )
115
+ defer cancel ()
116
+
117
+ if err := r .actualScanner .Scan (firstTryCtx , monitor ); err != nil {
118
+ if err != context .DeadlineExceeded { // non-retryable error
119
+ return err
120
+ }
121
+
122
+ // now use the longer context
123
+ return r .actualScanner .Scan (ctx , monitor )
124
+ }
125
+
126
+ return nil
127
+ }
128
+
104
129
type scanner struct {
105
130
noRedirects * http.Client
106
131
}
0 commit comments