Skip to content

Commit b2a2eff

Browse files
santihernandezcyuri-tceretian
authored andcommitted
Capture and expose notification delivery errors (#31)
This PR makes it possible to store the last error for each receiver in case of notification delivery failure. These errors are exposed via the `/api/v2/receivers` endpoint. Co-authored-by: gotjosh <josue.abreu@gmail.com> ^ Conflicts: ^ cmd/alertmanager/main.go ^ config/receiver/receiver.go ^ notify/notify.go ^ Conflicts: ^ notify/notify.go
1 parent 2c9daaa commit b2a2eff

File tree

14 files changed

+444
-50
lines changed

14 files changed

+444
-50
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@
2121
!/.promu.yml
2222
!/api/v2/openapi.yaml
2323
!.github/workflows/*.yml
24+
25+
# Editor
26+
.vscode
27+
.DS_Store

api/api.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/prometheus/alertmanager/cluster"
3131
"github.com/prometheus/alertmanager/config"
3232
"github.com/prometheus/alertmanager/dispatch"
33+
"github.com/prometheus/alertmanager/notify"
3334
"github.com/prometheus/alertmanager/provider"
3435
"github.com/prometheus/alertmanager/silence"
3536
"github.com/prometheus/alertmanager/types"
@@ -195,8 +196,8 @@ func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {
195196

196197
// Update config and resolve timeout of each API. APIv2 also needs
197198
// setAlertStatus to be updated.
198-
func (api *API) Update(cfg *config.Config, setAlertStatus func(model.LabelSet)) {
199-
api.v2.Update(cfg, setAlertStatus)
199+
func (api *API) Update(cfg *config.Config, receivers []*notify.Receiver, setAlertStatus func(model.LabelSet)) {
200+
api.v2.Update(cfg, setAlertStatus, receivers)
200201
}
201202

202203
func (api *API) limitHandler(h http.Handler) http.Handler {

api/v2/api.go

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"github.com/prometheus/common/version"
3333
"github.com/rs/cors"
3434

35-
"github.com/prometheus/alertmanager/api/metrics"
3635
open_api_models "github.com/prometheus/alertmanager/api/v2/models"
3736
"github.com/prometheus/alertmanager/api/v2/restapi"
3837
"github.com/prometheus/alertmanager/api/v2/restapi/operations"
@@ -41,10 +40,13 @@ import (
4140
general_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/general"
4241
receiver_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/receiver"
4342
silence_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/silence"
43+
44+
"github.com/prometheus/alertmanager/api/metrics"
4445
"github.com/prometheus/alertmanager/cluster"
4546
"github.com/prometheus/alertmanager/config"
4647
"github.com/prometheus/alertmanager/dispatch"
4748
"github.com/prometheus/alertmanager/matcher/compat"
49+
"github.com/prometheus/alertmanager/notify"
4850
"github.com/prometheus/alertmanager/pkg/labels"
4951
"github.com/prometheus/alertmanager/provider"
5052
"github.com/prometheus/alertmanager/silence"
@@ -73,7 +75,8 @@ type API struct {
7375
logger *slog.Logger
7476
m *metrics.Alerts
7577

76-
Handler http.Handler
78+
Handler http.Handler
79+
receivers []*notify.Receiver
7780
}
7881

7982
type (
@@ -158,13 +161,14 @@ func (api *API) requestLogger(req *http.Request) *slog.Logger {
158161
}
159162

160163
// Update sets the API struct members that may change between reloads of alertmanager.
161-
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn) {
164+
func (api *API) Update(cfg *config.Config, setAlertStatus setAlertStatusFn, receivers []*notify.Receiver) {
162165
api.mtx.Lock()
163166
defer api.mtx.Unlock()
164167

165168
api.alertmanagerConfig = cfg
166169
api.route = dispatch.NewRoute(cfg.Route, nil)
167170
api.setAlertStatus = setAlertStatus
171+
api.receivers = receivers
168172
}
169173

170174
func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.Responder {
@@ -225,11 +229,40 @@ func (api *API) getStatusHandler(params general_ops.GetStatusParams) middleware.
225229

226230
func (api *API) getReceiversHandler(params receiver_ops.GetReceiversParams) middleware.Responder {
227231
api.mtx.RLock()
228-
defer api.mtx.RUnlock()
232+
configReceivers := api.receivers
233+
api.mtx.RUnlock()
234+
235+
receivers := make([]*open_api_models.Receiver, 0, len(configReceivers))
236+
for _, r := range configReceivers {
237+
integrations := make([]*open_api_models.Integration, 0, len(r.Integrations()))
238+
239+
for _, integration := range r.Integrations() {
240+
notify, duration, err := integration.GetReport()
241+
iname := integration.String()
242+
sendResolved := integration.SendResolved()
243+
integrations = append(integrations, &open_api_models.Integration{
244+
Name: &iname,
245+
SendResolved: &sendResolved,
246+
LastNotifyAttempt: strfmt.DateTime(notify.UTC()),
247+
LastNotifyAttemptDuration: duration.String(),
248+
LastNotifyAttemptError: func() string {
249+
if err != nil {
250+
return err.Error()
251+
}
252+
return ""
253+
}(),
254+
})
255+
}
256+
257+
rName := r.Name()
258+
active := r.Active()
259+
model := &open_api_models.Receiver{
260+
Name: &rName,
261+
Active: &active,
262+
Integrations: integrations,
263+
}
229264

230-
receivers := make([]*open_api_models.Receiver, 0, len(api.alertmanagerConfig.Receivers))
231-
for i := range api.alertmanagerConfig.Receivers {
232-
receivers = append(receivers, &open_api_models.Receiver{Name: &api.alertmanagerConfig.Receivers[i].Name})
265+
receivers = append(receivers, model)
233266
}
234267

235268
return receiver_ops.NewGetReceiversOK().WithPayload(receivers)

api/v2/api_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
receiver_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/receiver"
3636
silence_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/silence"
3737
"github.com/prometheus/alertmanager/config"
38+
"github.com/prometheus/alertmanager/notify"
3839
"github.com/prometheus/alertmanager/pkg/labels"
3940
"github.com/prometheus/alertmanager/silence"
4041
"github.com/prometheus/alertmanager/silence/silencepb"
@@ -557,14 +558,18 @@ receivers:
557558
uptime: time.Now(),
558559
logger: promslog.NewNopLogger(),
559560
alertmanagerConfig: cfg,
561+
receivers: []*notify.Receiver{
562+
notify.NewReceiver("team-X", true, nil),
563+
notify.NewReceiver("team-Y", true, nil),
564+
},
560565
}
561566

562567
for _, tc := range []struct {
563568
body string
564569
expectedCode int
565570
}{
566571
{
567-
`[{"name":"team-X"},{"name":"team-Y"}]`,
572+
`[{"active":true,"integrations":[],"name":"team-X"},{"active":true,"integrations":[],"name":"team-Y"}]`,
568573
200,
569574
},
570575
} {

api/v2/models/integration.go

Lines changed: 122 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v2/models/receiver.go

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v2/openapi.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,36 @@ definitions:
520520
properties:
521521
name:
522522
type: string
523+
active:
524+
type: boolean
525+
integrations:
526+
type: array
527+
items:
528+
$ref: '#/definitions/integration'
529+
required:
530+
- name
531+
- active
532+
- integrations
533+
integration:
534+
type: object
535+
properties:
536+
name:
537+
type: string
538+
sendResolved:
539+
type: boolean
540+
lastNotifyAttempt:
541+
description: A timestamp indicating the last attempt to deliver a notification regardless of the outcome.
542+
type: string
543+
format: date-time
544+
lastNotifyAttemptDuration:
545+
description: Duration of the last attempt to deliver a notification in humanized format (`1s` or `15ms`, etc).
546+
type: string
547+
lastNotifyAttemptError:
548+
description: Error string for the last attempt to deliver a notification. Empty if the last attempt was successful.
549+
type: string
523550
required:
524551
- name
552+
- sendResolved
525553
labelSet:
526554
type: object
527555
additionalProperties:

0 commit comments

Comments
 (0)