Skip to content

Commit d2bf952

Browse files
santihernandezcgotjosh
authored andcommitted
Capture and expose notification delivery errors (#31)
This PR makes it possible to store the last error for each receiver in case of notification delivery failure. These errors are exposed via the `/api/v2/receivers` endpoint. Co-authored-by: gotjosh <josue.abreu@gmail.com>
1 parent 1da134a commit d2bf952

File tree

15 files changed

+590
-52
lines changed

15 files changed

+590
-52
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@
2121
!/.promu.yml
2222
!/api/v2/openapi.yaml
2323
!.github/workflows/*.yml
24+
25+
# Editor
26+
.vscode
27+
.DS_Store

api/api.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/prometheus/alertmanager/cluster"
3030
"github.com/prometheus/alertmanager/config"
3131
"github.com/prometheus/alertmanager/dispatch"
32+
"github.com/prometheus/alertmanager/notify"
3233
"github.com/prometheus/alertmanager/provider"
3334
"github.com/prometheus/alertmanager/silence"
3435
"github.com/prometheus/alertmanager/types"
@@ -186,8 +187,8 @@ func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {
186187

187188
// Update config and resolve timeout of each API. APIv2 also needs
188189
// setAlertStatus to be updated.
189-
func (api *API) Update(cfg *config.Config, setAlertStatus func(model.LabelSet)) {
190-
api.v2.Update(cfg, setAlertStatus)
190+
func (api *API) Update(cfg *config.Config, receivers []*notify.Receiver, setAlertStatus func(model.LabelSet)) {
191+
api.v2.Update(cfg, setAlertStatus, receivers)
191192
}
192193

193194
func (api *API) limitHandler(h http.Handler) http.Handler {

api/v2/api.go

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import (
3333
"github.com/prometheus/common/version"
3434
"github.com/rs/cors"
3535

36-
"github.com/prometheus/alertmanager/api/metrics"
3736
open_api_models "github.com/prometheus/alertmanager/api/v2/models"
3837
"github.com/prometheus/alertmanager/api/v2/restapi"
3938
"github.com/prometheus/alertmanager/api/v2/restapi/operations"
@@ -42,10 +41,13 @@ import (
4241
general_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/general"
4342
receiver_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/receiver"
4443
silence_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/silence"
44+
45+
"github.com/prometheus/alertmanager/api/metrics"
4546
"github.com/prometheus/alertmanager/cluster"
4647
"github.com/prometheus/alertmanager/config"
4748
"github.com/prometheus/alertmanager/dispatch"
4849
"github.com/prometheus/alertmanager/matchers/compat"
50+
"github.com/prometheus/alertmanager/notify"
4951
"github.com/prometheus/alertmanager/pkg/labels"
5052
"github.com/prometheus/alertmanager/provider"
5153
"github.com/prometheus/alertmanager/silence"
@@ -73,7 +75,8 @@ type API struct {
7375
logger log.Logger
7476
m *metrics.Alerts
7577

76-
Handler http.Handler
78+
Handler http.Handler
79+
receivers []*notify.Receiver
7780
}
7881

7982
type (
@@ -155,13 +158,14 @@ func (api *API) requestLogger(req *http.Request) log.Logger {
155158
}
156159

157160
// Update sets the API struct members that may change between reloads of alertmanager.
158-
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn) {
161+
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn, receivers []*notify.Receiver) {
159162
api.mtx.Lock()
160163
defer api.mtx.Unlock()
161164

162165
api.alertmanagerConfig = cfg
163166
api.route = dispatch.NewRoute(cfg.Route, nil)
164167
api.setAlertStatus = setAlertStatus
168+
api.receivers = receivers
165169
}
166170

167171
func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.Responder {
@@ -222,11 +226,40 @@ func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.
222226

223227
func (api *API) getReceiversHandler(params receiver_ops.GetReceiversParams) middleware.Responder {
224228
api.mtx.RLock()
225-
defer api.mtx.RUnlock()
229+
configReceivers := api.receivers
230+
api.mtx.RUnlock()
231+
232+
receivers := make([]*open_api_models.Receiver, 0, len(configReceivers))
233+
for _, r := range configReceivers {
234+
integrations := make([]*open_api_models.Integration, 0, len(r.Integrations()))
235+
236+
for _, integration := range r.Integrations() {
237+
notify, duration, err := integration.GetReport()
238+
iname := integration.String()
239+
sendResolved := integration.SendResolved()
240+
integrations = append(integrations, &open_api_models.Integration{
241+
Name: &iname,
242+
SendResolved: &sendResolved,
243+
LastNotifyAttempt: strfmt.DateTime(notify.UTC()),
244+
LastNotifyAttemptDuration: duration.String(),
245+
LastNotifyAttemptError: func() string {
246+
if err != nil {
247+
return err.Error()
248+
}
249+
return ""
250+
}(),
251+
})
252+
}
253+
254+
rName := r.Name()
255+
active := r.Active()
256+
model := &open_api_models.Receiver{
257+
Name: &rName,
258+
Active: &active,
259+
Integrations: integrations,
260+
}
226261

227-
receivers := make([]*open_api_models.Receiver, 0, len(api.alertmanagerConfig.Receivers))
228-
for i := range api.alertmanagerConfig.Receivers {
229-
receivers = append(receivers, &open_api_models.Receiver{Name: &api.alertmanagerConfig.Receivers[i].Name})
262+
receivers = append(receivers, model)
230263
}
231264

232265
return receiver_ops.NewGetReceiversOK().WithPayload(receivers)

api/v2/api_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
receiver_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/receiver"
3434
silence_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/silence"
3535
"github.com/prometheus/alertmanager/config"
36+
"github.com/prometheus/alertmanager/notify"
3637
"github.com/prometheus/alertmanager/pkg/labels"
3738
"github.com/prometheus/alertmanager/silence"
3839
"github.com/prometheus/alertmanager/silence/silencepb"
@@ -484,14 +485,18 @@ receivers:
484485
uptime: time.Now(),
485486
logger: log.NewNopLogger(),
486487
alertmanagerConfig: cfg,
488+
receivers: []*notify.Receiver{
489+
notify.NewReceiver("team-X", true, nil),
490+
notify.NewReceiver("team-Y", true, nil),
491+
},
487492
}
488493

489494
for _, tc := range []struct {
490495
body string
491496
expectedCode int
492497
}{
493498
{
494-
`[{"name":"team-X"},{"name":"team-Y"}]`,
499+
`[{"active":true,"integrations":[],"name":"team-X"},{"active":true,"integrations":[],"name":"team-Y"}]`,
495500
200,
496501
},
497502
} {

api/v2/models/integration.go

Lines changed: 122 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v2/models/receiver.go

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v2/openapi.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,36 @@ definitions:
510510
properties:
511511
name:
512512
type: string
513+
active:
514+
type: boolean
515+
integrations:
516+
type: array
517+
items:
518+
$ref: '#/definitions/integration'
519+
required:
520+
- name
521+
- active
522+
- integrations
523+
integration:
524+
type: object
525+
properties:
526+
name:
527+
type: string
528+
sendResolved:
529+
type: boolean
530+
lastNotifyAttempt:
531+
description: A timestamp indicating the last attempt to deliver a notification regardless of the outcome.
532+
type: string
533+
format: date-time
534+
lastNotifyAttemptDuration:
535+
description: Duration of the last attempt to deliver a notification in humanized format (`1s` or `15ms`, etc).
536+
type: string
537+
lastNotifyAttemptError:
538+
description: Error string for the last attempt to deliver a notification. Empty if the last attempt was successful.
539+
type: string
513540
required:
514541
- name
542+
- sendResolved
515543
labelSet:
516544
type: object
517545
additionalProperties:

0 commit comments

Comments
 (0)