diff --git a/notify/notify.go b/notify/notify.go index 33d499af30..56dec33c5a 100644 --- a/notify/notify.go +++ b/notify/notify.go @@ -790,6 +790,11 @@ func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Ale case <-ctx.Done(): if iErr == nil { iErr = ctx.Err() + if errors.Is(iErr, context.Canceled) { + iErr = NewErrorWithReason(ContextCanceledReason, iErr) + } else if errors.Is(iErr, context.DeadlineExceeded) { + iErr = NewErrorWithReason(ContextDeadlineExceededReason, iErr) + } } return ctx, nil, errors.Wrapf(iErr, "%s/%s: notify retry canceled after %d attempts", r.groupName, r.integration.String(), i) @@ -808,14 +813,15 @@ func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Ale if !retry { return ctx, alerts, errors.Wrapf(err, "%s/%s: notify retry canceled due to unrecoverable error after %d attempts", r.groupName, r.integration.String(), i) } - if ctx.Err() == nil && (iErr == nil || err.Error() != iErr.Error()) { - // Log the error if the context isn't done and the error isn't the same as before. - level.Warn(l).Log("msg", "Notify attempt failed, will retry later", "attempts", i, "err", err) + if ctx.Err() == nil { + if iErr == nil || err.Error() != iErr.Error() { + // Log the error if the context isn't done and the error isn't the same as before. + level.Warn(l).Log("msg", "Notify attempt failed, will retry later", "attempts", i, "err", err) + } + // Save this error to be able to return the last seen error by an + // integration upon context timeout. + iErr = err } - - // Save this error to be able to return the last seen error by an - // integration upon context timeout. - iErr = err } else { lvl := level.Info(l) if i <= 1 { @@ -827,7 +833,12 @@ func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Ale } case <-ctx.Done(): if iErr == nil { - iErr = ctx.Err() + iErr := ctx.Err() + if errors.Is(iErr, context.Canceled) { + iErr = NewErrorWithReason(ContextCanceledReason, iErr) + } else if errors.Is(iErr, context.DeadlineExceeded) { + iErr = NewErrorWithReason(ContextDeadlineExceededReason, iErr) + } } return ctx, nil, errors.Wrapf(iErr, "%s/%s: notify retry canceled after %d attempts", r.groupName, r.integration.String(), i) diff --git a/notify/notify_test.go b/notify/notify_test.go index d3eeb4670a..ead6e17b94 100644 --- a/notify/notify_test.go +++ b/notify/notify_test.go @@ -469,6 +469,39 @@ func TestRetryStageWithErrorCode(t *testing.T) { } } +func TestRetryStageWithContextCanceled(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + i := Integration{ + name: "test", + notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) { + cancel() + return true, errors.New("request failed: context canceled") + }), + rs: sendResolved(false), + } + r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{})) + + alerts := []*types.Alert{ + { + Alert: model.Alert{ + EndsAt: time.Now().Add(time.Hour), + }, + }, + } + + ctx = WithFiringAlerts(ctx, []uint64{0}) + + // Notify with a non-recoverable error. + resctx, _, err := r.Exec(ctx, log.NewNopLogger(), alerts...) + counter := r.metrics.numTotalFailedNotifications + + require.Equal(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), ContextCanceledReason.String())))) + + require.NotNil(t, err) + require.NotNil(t, resctx) +} + func TestRetryStageNoResolved(t *testing.T) { sent := []*types.Alert{} i := Integration{ diff --git a/notify/util.go b/notify/util.go index 706856c160..b5110f63d5 100644 --- a/notify/util.go +++ b/notify/util.go @@ -270,6 +270,8 @@ const ( DefaultReason Reason = iota ClientErrorReason ServerErrorReason + ContextCanceledReason + ContextDeadlineExceededReason ) func (s Reason) String() string { @@ -280,13 +282,17 @@ func (s Reason) String() string { return "clientError" case ServerErrorReason: return "serverError" + case ContextCanceledReason: + return "contextCanceled" + case ContextDeadlineExceededReason: + return "contextDeadlineExceeded" default: panic(fmt.Sprintf("unknown Reason: %d", s)) } } // possibleFailureReasonCategory is a list of possible failure reason. -var possibleFailureReasonCategory = []string{DefaultReason.String(), ClientErrorReason.String(), ServerErrorReason.String()} +var possibleFailureReasonCategory = []string{DefaultReason.String(), ClientErrorReason.String(), ServerErrorReason.String(), ContextCanceledReason.String(), ContextDeadlineExceededReason.String()} // GetFailureReasonFromStatusCode returns the reason for the failure based on the status code provided. func GetFailureReasonFromStatusCode(statusCode int) Reason {