Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow muting alerts via inline keyboard #42

Merged
merged 8 commits into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,27 @@ grafana:
# Customize your plot width/height
width: "1000"
height: "500"
# A set of mutes duration used when launching a /firing command and creating
# a new silence via inline keyboard from the result of this command.
# Used for silencing Grafana alerts only.
# Defaults to: 1h, 8h, 48h, 168h, 99999h
mutes_durations:
- 1h
- 8h
- 24h
- 168h
- 99999h
# Optional config if you use external Alertmanager, used for getting silences list and creating new ones.
alertmanager:
# URL of the remote Grafana to do queries against. Defaults to http://localhost:9093
url: http://localhost:9093
# Alertmanager credentials
user: admin
password: admin
# Same as grafana.mutes_duration, but for Prometheus alerts. Defaults are the same.
mutes_durations:
- 1h
- 8h
- 24h
- 168h
- 99999h
133 changes: 117 additions & 16 deletions pkg/app/alerts_firing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package app

import (
"fmt"
"main/pkg/constants"
"main/pkg/types"
"main/pkg/types/render"
"main/pkg/utils"

tele "gopkg.in/telebot.v3"
)
Expand All @@ -25,21 +25,122 @@ func (a *App) HandleListFiringAlerts(c tele.Context) error {
return c.Reply(fmt.Sprintf("Error querying alerts: %s", err))
}

grafanaGroups = utils.FilterFiringOrPendingAlertGroups(grafanaGroups)
prometheusGroups = utils.FilterFiringOrPendingAlertGroups(prometheusGroups)

template, err := a.TemplateManager.Render("alerts_firing", render.RenderStruct{
Grafana: a.Grafana,
Alertmanager: a.Alertmanager,
Data: types.AlertsListStruct{
GrafanaGroups: grafanaGroups,
PrometheusGroups: prometheusGroups,
},
})
if err != nil {
a.Logger.Error().Err(err).Msg("Error rendering alerts_firing template")
return c.Reply(fmt.Sprintf("Error rendering template: %s", err))
grafanaGroups = grafanaGroups.FilterFiringOrPendingAlertGroups()
prometheusGroups = prometheusGroups.FilterFiringOrPendingAlertGroups()

batches := []types.FiringAlertsListStruct{}
batchToAdd := types.FiringAlertsListStruct{
GrafanaAlerts: make([]types.FiringAlert, 0),
PrometheusAlerts: make([]types.FiringAlert, 0),
GrafanaAlertsCount: len(grafanaGroups),
PrometheusAlertsCount: len(prometheusGroups),
ShowGrafanaHeader: true,
}
batchIndex := 0

for _, grafanaGroup := range grafanaGroups {
for ruleIndex, grafanaRule := range grafanaGroup.Rules {
for _, grafanaAlert := range grafanaRule.Alerts {
batchToAdd.GrafanaAlerts = append(batchToAdd.GrafanaAlerts, types.FiringAlert{
GroupName: grafanaGroup.Name,
GroupAlertsCount: len(grafanaGroup.Rules),
AlertName: grafanaRule.Name,
Alert: grafanaAlert,
ShowAlertName: ruleIndex == 0,
})
batchIndex++

if len(batchToAdd.GrafanaAlerts) >= constants.AlertsInOneMessage {
batches = append(batches, batchToAdd)
batchToAdd = types.FiringAlertsListStruct{
GrafanaAlerts: make([]types.FiringAlert, 0),
PrometheusAlerts: make([]types.FiringAlert, 0),
GrafanaAlertsCount: len(grafanaGroups),
PrometheusAlertsCount: len(prometheusGroups),
}
batchIndex = 0
}
}
}
}

batchToAdd.ShowPrometheusHeader = true

for _, prometheusGroup := range prometheusGroups {
for _, prometheusRule := range prometheusGroup.Rules {
for alertIndex, prometheusAlert := range prometheusRule.Alerts {
batchToAdd.PrometheusAlerts = append(batchToAdd.PrometheusAlerts, types.FiringAlert{
GroupName: prometheusGroup.Name,
GroupAlertsCount: len(prometheusGroup.Rules),
AlertName: prometheusRule.Name,
Alert: prometheusAlert,
ShowAlertName: alertIndex == 0,
})
batchIndex++

if len(batchToAdd.PrometheusAlerts) >= constants.AlertsInOneMessage {
batches = append(batches, batchToAdd)
batchToAdd = types.FiringAlertsListStruct{
GrafanaAlerts: make([]types.FiringAlert, 0),
PrometheusAlerts: make([]types.FiringAlert, 0),
GrafanaAlertsCount: len(grafanaGroups),
PrometheusAlertsCount: len(prometheusGroups),
}
batchIndex = 0
}
}
}
}

if len(batches) == 0 {
batches = append(batches, batchToAdd)
}

for _, batch := range batches {
template, renderErr := a.TemplateManager.Render("alerts_firing", render.RenderStruct{
Grafana: a.Grafana,
Alertmanager: a.Alertmanager,
Data: batch,
})
if renderErr != nil {
a.Logger.Error().Err(renderErr).Msg("Error rendering alerts_firing template")
return c.Reply(fmt.Sprintf("Error rendering template: %s", renderErr))
}

menu := &tele.ReplyMarkup{ResizeKeyboard: true}

rows := make([]tele.Row, 0)

index := 0

for _, alert := range batch.GrafanaAlerts {
button := menu.Data(
fmt.Sprintf("🔇Silence alert #%d", index+1),
constants.AlertmanagerPrepareSilencePrefix,
alert.Alert.GetCallbackHash(),
)

rows = append(rows, menu.Row(button))
index += 1
}

for _, alert := range batch.PrometheusAlerts {
button := menu.Data(
fmt.Sprintf("🔇Silence alert #%d", index+1),
constants.AlertmanagerPrepareSilencePrefix,
alert.Alert.GetCallbackHash(),
)

rows = append(rows, menu.Row(button))
index += 1
}

menu.Inline(rows...)

if sendErr := a.BotReply(c, template, menu); sendErr != nil {
return err
}
}

return a.BotReply(c, template)
return nil
}
6 changes: 5 additions & 1 deletion pkg/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
const MaxMessageSize = 4096

type App struct {
Config configPkg.Config
Config *configPkg.Config
Grafana *grafanaPkg.Grafana
Alertmanager *alertmanagerPkg.Alertmanager
TemplateManager *templates.TemplateManager
Expand Down Expand Up @@ -52,6 +52,7 @@ func NewApp(config *configPkg.Config, version string) *App {
}

return &App{
Config: config,
Logger: logger,
Grafana: grafana,
Alertmanager: alertmanager,
Expand Down Expand Up @@ -81,6 +82,9 @@ func (a *App) Start() {
// Callbacks
a.Bot.Handle("\f"+constants.GrafanaUnsilencePrefix, a.HandleGrafanaCallbackDeleteSilence)
a.Bot.Handle("\f"+constants.AlertmanagerUnsilencePrefix, a.HandleAlertmanagerCallbackDeleteSilence)
a.Bot.Handle("\f"+constants.AlertmanagerPrepareSilencePrefix, a.HandleAlertmanagerPrepareNewSilenceFromCallback)
a.Bot.Handle("\f"+constants.GrafanaSilencePrefix, a.HandleGrafanaCallbackNewSilence)
a.Bot.Handle("\f"+constants.AlertmanagerSilencePrefix, a.HandleAlertmanagerCallbackNewSilence)

a.Logger.Info().Msg("Telegram bot listening")

Expand Down
167 changes: 160 additions & 7 deletions pkg/app/silences_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"main/pkg/types"
"main/pkg/types/render"
"main/pkg/utils"
"strings"

tele "gopkg.in/telebot.v3"
)
Expand All @@ -16,7 +17,12 @@ func (a *App) HandleGrafanaNewSilence(c tele.Context) error {
Str("text", c.Text()).
Msg("Got new silence query")

return a.HandleNewSilence(c, a.Grafana, constants.GrafanaUnsilencePrefix)
silenceInfo, err := utils.ParseSilenceFromCommand(c.Text(), c.Sender().FirstName)
if err != "" {
return c.Reply(fmt.Sprintf("Error parsing silence option: %s\n", err))
}

return a.HandleNewSilence(c, a.Grafana, constants.GrafanaUnsilencePrefix, silenceInfo)
}

func (a *App) HandleAlertmanagerNewSilence(c tele.Context) error {
Expand All @@ -29,19 +35,166 @@ func (a *App) HandleAlertmanagerNewSilence(c tele.Context) error {
return c.Reply("Alertmanager is disabled.")
}

return a.HandleNewSilence(c, a.Alertmanager, constants.AlertmanagerUnsilencePrefix)
silenceInfo, err := utils.ParseSilenceFromCommand(c.Text(), c.Sender().FirstName)
if err != "" {
return c.Reply(fmt.Sprintf("Error parsing silence option: %s\n", err))
}

return a.HandleNewSilence(c, a.Alertmanager, constants.AlertmanagerUnsilencePrefix, silenceInfo)
}

func (a *App) HandleGrafanaPrepareNewSilenceFromCallback(c tele.Context) error {
a.Logger.Info().
Str("sender", c.Sender().Username).
Msg("Got new prepare Grafana silence callback via button")

callback := c.Callback()
a.RemoveKeyboardItemByCallback(c, callback)

groups, err := a.Grafana.GetGrafanaAlertingRules()
if err != nil {
return c.Reply(fmt.Sprintf("Error querying alerts: %s", err))
}

groups = groups.FilterFiringOrPendingAlertGroups()
labels, found := groups.FindLabelsByHash(callback.Data)
if !found {
return c.Reply("Alert was not found!")
}

matchers := types.QueryMatcherFromKeyValueMap(labels)
template, renderErr := a.TemplateManager.Render("silence_prepare_create", render.RenderStruct{
Grafana: a.Grafana,
Alertmanager: a.Alertmanager,
Data: matchers,
})
if renderErr != nil {
a.Logger.Error().Err(renderErr).Msg("Error rendering silence_prepare_create template")
return c.Reply(fmt.Sprintf("Error rendering template: %s", renderErr))
}

menu := &tele.ReplyMarkup{ResizeKeyboard: true}
rows := make([]tele.Row, len(a.Config.Grafana.MutesDurations))

for index, mute := range a.Config.Grafana.MutesDurations {
rows[index] = menu.Row(menu.Data(
fmt.Sprintf("⌛ Silence for %s", mute),
constants.GrafanaSilencePrefix,
mute+" "+callback.Data,
))
}

menu.Inline(rows...)
return a.BotReply(c, template, menu)
}

func (a *App) HandleAlertmanagerPrepareNewSilenceFromCallback(c tele.Context) error {
a.Logger.Info().
Str("sender", c.Sender().Username).
Msg("Got new prepare Alertmanager silence callback via button")

callback := c.Callback()
a.RemoveKeyboardItemByCallback(c, callback)

groups, err := a.Grafana.GetPrometheusAlertingRules()
if err != nil {
return c.Reply(fmt.Sprintf("Error querying alerts: %s", err))
}

groups = groups.FilterFiringOrPendingAlertGroups()
labels, found := groups.FindLabelsByHash(callback.Data)
if !found {
return c.Reply("Alert was not found!")
}

matchers := types.QueryMatcherFromKeyValueMap(labels)
template, renderErr := a.TemplateManager.Render("silence_prepare_create", render.RenderStruct{
Grafana: a.Grafana,
Alertmanager: a.Alertmanager,
Data: matchers,
})
if renderErr != nil {
a.Logger.Error().Err(renderErr).Msg("Error rendering silence_prepare_create template")
return c.Reply(fmt.Sprintf("Error rendering template: %s", renderErr))
}

menu := &tele.ReplyMarkup{ResizeKeyboard: true}
rows := make([]tele.Row, len(a.Config.Alertmanager.MutesDurations))

for index, mute := range a.Config.Alertmanager.MutesDurations {
rows[index] = menu.Row(menu.Data(
fmt.Sprintf("⌛ Silence for %s", mute),
constants.AlertmanagerSilencePrefix,
mute+" "+callback.Data,
))
}

menu.Inline(rows...)
return a.BotReply(c, template, menu)
}

func (a *App) HandleGrafanaCallbackNewSilence(c tele.Context) error {
a.Logger.Info().
Str("sender", c.Sender().Username).
Msg("Got new create Grafana silence callback via button")

callback := c.Callback()
a.RemoveKeyboardItemByCallback(c, callback)

dataSplit := strings.SplitN(callback.Data, " ", 2)
if len(dataSplit) != 2 {
return c.Reply("Invalid callback provided!")
}

alertHashToMute := dataSplit[1]

groups, err := a.Grafana.GetGrafanaAlertingRules()
if err != nil {
return c.Reply(fmt.Sprintf("Error querying alerts: %s", err))
}

silenceInfo, err := a.GenerateSilenceForAlert(c, groups, alertHashToMute, dataSplit[0])
if err != nil {
return c.Reply(err.Error())
}

return a.HandleNewSilence(c, a.Grafana, constants.GrafanaUnsilencePrefix, silenceInfo)
}

func (a *App) HandleAlertmanagerCallbackNewSilence(c tele.Context) error {
a.Logger.Info().
Str("sender", c.Sender().Username).
Msg("Got new create Alertmanager silence callback via button")

callback := c.Callback()
a.RemoveKeyboardItemByCallback(c, callback)

dataSplit := strings.SplitN(callback.Data, " ", 2)
if len(dataSplit) != 2 {
return c.Reply("Invalid callback provided!")
}

alertHashToMute := dataSplit[1]

groups, err := a.Grafana.GetPrometheusAlertingRules()
if err != nil {
return c.Reply(fmt.Sprintf("Error querying alerts: %s", err))
}

silenceInfo, err := a.GenerateSilenceForAlert(c, groups, alertHashToMute, dataSplit[0])
if err != nil {
return c.Reply(err.Error())
}

return a.HandleNewSilence(c, a.Alertmanager, constants.AlertmanagerUnsilencePrefix, silenceInfo)
}

func (a *App) HandleNewSilence(
c tele.Context,
silenceManager types.SilenceManager,
unsilencePrefix string,
silenceInfo *types.Silence,
) error {
silenceInfo, err := utils.ParseSilenceOptions(c.Text(), c)
if err != "" {
return c.Reply(fmt.Sprintf("Error parsing silence option: %s\n", err))
}

silenceResponse, silenceErr := silenceManager.CreateSilence(*silenceInfo)
if silenceErr != nil {
return c.Reply(fmt.Sprintf("Error creating silence: %s", silenceErr))
Expand Down
Loading
Loading