@@ -3,7 +3,7 @@ kind: HelmRelease
33metadata :
44 name : kube-prometheus-stack
55spec :
6- interval : 1h
6+ interval : 5m
77 chartRef :
88 kind : OCIRepository
99 name : kube-prometheus-stack
@@ -29,18 +29,129 @@ spec:
2929 # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
3030 values :
3131 alertmanager :
32- enabled : false
32+ enabled : true
33+ alertmanagerSpec :
34+ priorityClassName : spectrum-monitoring
35+ secrets : [
36+ slack-api-url,
37+ slack-templates
38+ ]
39+
40+ config :
41+ route :
42+ group_by :
43+ - alertname
44+ - namespace
45+ - severity
46+ group_wait : 30s
47+ group_interval : 5m
48+ repeat_interval : 7d
49+ receiver : blackhole
50+ routes :
51+ - receiver : ' slack-all'
52+ matchers :
53+ - service != "fluxcd"
54+ - severity =~ "warning|critical"
55+ - receiver : ' slack-fluxcd'
56+ matchers :
57+ - service = "fluxcd"
58+ receivers :
59+ - name : blackhole
60+ - name : slack-all
61+ slack_configs :
62+ - channel : ' #{{- template "slack_channel_main" . -}}'
63+ api_url_file : /etc/alertmanager/secrets/slack-api-url/slack-all
64+ send_resolved : true
65+ title : ' {{ template "slack.main.title" . }}'
66+ text : ' {{ template "slack.main.text" . }}'
67+ icon_url : https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Prometheus_software_logo.svg/500px-Prometheus_software_logo.svg.png
68+ - name : slack-fluxcd
69+ slack_configs :
70+ - channel : ' #{{- template "slack_channel_flux" . -}}'
71+ api_url_file : /etc/alertmanager/secrets/slack-api-url/slack-fluxcd
72+ send_resolved : true
73+ title : ' {{ template "slack.main.title" . }}'
74+ text : ' {{ template "slack.main.text" . }}'
75+ icon_url : https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Prometheus_software_logo.svg/500px-Prometheus_software_logo.svg.png
76+ templates :
77+ - ' /etc/alertmanager/config/*.tmpl'
78+ - ' /etc/alertmanager/secrets/slack-templates/*.tmpl'
79+ templateFiles :
80+ template_1.tmpl : |-
81+ {{ define "__main_title" }}
82+ [{{ .Status | toUpper }}
83+ {{- if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{- template "provider" . -}}-{{- template "cluster_name" . -}}-{{- template "ip_address" . -}} {{ with .GroupLabels }} {{- .Values | join " " }} {{- end -}}
84+ {{ end }}
85+
86+ {{ define "__main_body_firing" }}
87+ {{ range .Alerts }}
88+ *Cluster:* {{ template "provider" . }}-{{- template "cluster_name" . -}}-{{- template "ip_address" . -}}
89+ *Summary:* {{ .Annotations.summary }}
90+ *Description:* {{ .Annotations.description }}
91+ *Since:* {{ .StartsAt.Local.Format "02/01/06 15:04 UTC" }}
92+ *Details:*
93+ {{ range .Labels.SortedPairs }}• {{ .Name }}: `{{ .Value }}`
94+ {{ end }}{{ end }}
95+ {{ end }}
96+
97+ {{ define "__main_body_resolved" }}
98+ {{ range .Alerts }}
99+ *Cluster:* {{ template "provider" . }}-{{- template "cluster_name" . -}}-{{- template "ip_address" . -}}
100+ *Message:* {{ if .Annotations.resolved }}{{ .Annotations.resolved }}{{ else }}{{ .Annotations.summary }}{{ end }}
101+ *Description:* {{ if .Annotations.resolved }}{{ .Annotations.resolved }}{{ else }}{{ .Annotations.description }}{{ end }}
102+ *Started at:* {{ .StartsAt.Local.Format "02/01/06 15:04 UTC" }}
103+ *Ended at:* {{ .EndsAt.Local.Format "02/01/06 15:04 UTC" }}
104+ *Details:*
105+ {{ range .Labels.SortedPairs }}• {{ .Name }}: `{{ .Value }}`
106+ {{ end }}{{ end }}
107+ {{ end }}
108+
109+ {{ define "slack.main.title" }}{{ template "__main_title" . }}{{ end }}
110+
111+ {{ define "slack.main.text" }}
112+ {{ if eq (len .Alerts.Firing) 1 -}}
113+ {{ template "__main_body_firing" . }}
114+ {{- else if gt (len .Alerts.Firing) 1 -}}
115+ *Alerts:* {{ template "__main_body_firing" . }}
116+ {{- else -}}
117+ {{ template "__main_body_resolved" . }}
118+ {{- end -}}
119+ {{ end }}
120+
121+ {{ define "slack.main.dashboard" }}{{ template "__main_dashboard" . }}{{ end }}
122+ {{ define "slack.main.link" }}{{ template "__main_link" . }}{{ end }}
123+ {{ define "slack.main.silence" }}{{ template "__main_silence" . }}{{ end }}
124+ {{ define "slack.main.explore" }}{{ template "__main_explore" . }}{{ end }}
125+ customRules :
126+ KubeStateMetricsListErrors :
127+ severity : info
128+ KubeClientCertificateExpiration :
129+ severity : info
130+ KubeControllerManagerDown :
131+ severity : info
132+ KubeSchedulerDown :
133+ severity : info
134+ PrometheusNotConnectedToAlertmanagers :
135+ severity : info
136+ PrometheusDuplicateTimestamps :
137+ severity : info
138+ PrometheusRuleFailures :
139+ severity : info
140+ KubeProxyDown :
141+ severity : info
142+
33143 prometheusOperator :
34144 priorityClassName : spectrum-monitoring
35145 prometheus :
36146 prometheusSpec :
37147 priorityClassName : spectrum-monitoring
38- retention : 168h
148+ retention : 200h
39149 resources :
40150 requests :
41151 cpu : 200m
42152 memory : 200Mi
43153
154+ ruleSelectorNilUsesHelmValues : false
44155 serviceMonitorNamespaceSelector : {}
45156 serviceMonitorSelector :
46157 matchExpressions :
0 commit comments