資料夾結構
prometheus
├── docker-compose.yml
├── golang
│ └── Dockerfile
├── promApp
│ ├── go.mod
│ ├── go.sum
│ └── main.go
└── prometheus.yml
#docker-compose.yml
version: '3.2'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- 9090:9090
command:
- --config.file=/etc/prometheus/prometheus.yml
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
environment:
TZ: Asia/Taipei
golang:
build:
context: ./
dockerfile: ./golang/Dockerfile
entrypoint: ./exec
ports:
- '2112:2112'
#/golang/Dockerfile
FROM golang:alpine as builder
WORKDIR /app
COPY ./promApp .
RUN go mod download
RUN go build -o exec
FROM alpine
WORKDIR /usr/bin
COPY --from=builder /app/exec .
RUN apk update && apk add tzdata
//prometheus.yml
scrape_configs:
- job_name: myapp
scrape_interval: 1s
static_configs:
- targets:
- 192.168.10.100:2112
- job_name: docker desktop
scrape_interval: 1s
static_configs:
- targets:
- 192.168.10.101:9323
// /promApp/main.go
package main
import (
"log"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
func recordMetrics() {
go func() {
for {
opsProcessed.Inc()
time.Sleep(2 * time.Second)
}
}()
}
func switchFlip() {
go func() {
i := 0
for {
sw.Set(float64(i % 2))
i++
if i == 10 {
i = 0
}
time.Sleep(2 * time.Second)
}
}()
}
var (
opsProcessed = promauto.NewCounter(prometheus.CounterOpts{
Name: "myapp_processed_ops_total",
Help: "The total number of processed events",
})
sw = promauto.NewGauge(prometheus.GaugeOpts{
Name: "on_off_flip",
Help: "開開關關翻翻樂",
ConstLabels: map[string]string{},
})
)
func main() {
recordMetrics()
switchFlip()
http.Handle("/metrics", promhttp.Handler())
log.Fatal(http.ListenAndServe(":2112", nil))
}
資料夾結構
.
├── alertmanager
│ └── config.yml <- add
├── config.monitoring
├── docker-compose.yml
├── golang
│ └── Dockerfile
├── grafana
│ ├── dashboards
│ └── provisioning
├── notifyHook <- api http://192.168.10.100:8001/notify
│ ├── api
│ ├── go.mod
│ ├── jsonWrap
│ ├── line
│ ├── main.go
│ ├── test
│ └── tools
├── promApp <- alertname: goApp_flip, goApp_processed_ops_total
│ ├── go.mod
│ ├── go.sum
│ └── main.go
└── prometheus
├── alert.rules <- add
└── prometheus.yml
alert.rules
groups:
- name: example
rules:
- alert: goApp-down
expr: up == 0
for: 1s
labels:
severity: critical
annotations:
title: Node {{ $labels.instance }} is down
description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for more than 3 minutes. Node seems down.
- alert: goApp-flip
expr: goApp_flip > 5
labels:
severity: warm
annotations:
title: flip {{ $labels.instance }} is more than 5 value is {{ $value }}
description: system make {{ $labels.instance }} some error for test {{ $labels.job }}
config.yml
global:
resolve_timeout: 5s # 未收到標記告警通知,等待 timeout 時間之後事件標記為 resolve。
route:
receiver: default-receiver
group_wait: 10s # 告警群組訊息建立後的等待時間
group_interval: 10s # 上下两组发送告警的间隔时间
repeat_interval: 20s # 重新發送相同告警訊息的間隔時間
group_by: [cluster, alertname]
routes:
- receiver: "Line-Notfiy" #利用alertname來取路徑至receivers
group_wait: 1s
match:
alertname: goApp-flip
#continue: true
- receiver: "Line-Logger"
group_wait: 1s
match:
alertname: goApp-logger
#continue: true
# 設定通知管道
receivers:
- name: Line-Notfiy
webhook_configs:
- url: "http://192.168.10.100:8001/notify" #notifyHook app
http_config:
authorization:
type: Bearer
credentials: "XXXX"
#send_resolved: true
- name: Line-Logger
webhook_configs:
- url: "http://192.168.10.100:8001/logger" #notifyHook app
http_config:
authorization:
type: Bearer
credentials: "XXXX"
#send_resolved: true
- name: default-receiver
webhook_configs:
- url: "http://192.168.10.100:8001/XXX"
http_config:
authorization:
type: Bearer
credentials: "XXXX"
#send_resolved: true
用curl測試Alertmanager
curl --location --request POST 'http://192.168.10.101:9093/api/v2/alerts' \
--header 'Content-Type: application/json' \
--data-raw '[
{
"status": "resolve",
"labels": {
"alertname": "goApp_api",
"instance": "localhost:8080",
"job": "node",
"severity": "critical"
},
"annotations": {
"summary": "測試"
},"generatorURL": "http://localhost:9090/graph"
}
]'
在alertmanager:9093看到的alert
notify上面看到的alert
問題: Alertmanager 提供的方式種共有以下這幾種方式
# The unique name of the receiver.
name: <string>
# Configurations for several notification integrations.
email_configs:
[ - <email_config>, ... ]
opsgenie_configs:
[ - <opsgenie_config>, ... ]
pagerduty_configs:
[ - <pagerduty_config>, ... ]
pushover_configs:
[ - <pushover_config>, ... ]
slack_configs:
[ - <slack_config>, ... ]
sns_configs:
[ - <sns_config>, ... ]
victorops_configs:
[ - <victorops_config>, ... ]
webhook_configs:
[ - <webhook_config>, ... ]
wechat_configs:
[ - <wechat_config>, ... ]
telegram_configs:
[ - <telegram_config>, ... ]
webex_configs:
[ - <webex_config>, ... ]
最符合我使用的應該只有webhook
receivers:
- name: Line-Notfiy
webhook_configs:
- url: "https://notify-api.line.me/api/notify?message=test" #每次呼叫只能固定的值不太符合使用,要硬幹的話可能會有好幾個route跟receiver
http_config:
authorization:
type: Bearer
credentials: "XXXX"
#send_resolved: true
所以只能自己寫API來處理
資料夾結構
.
├── alertmanager
│ └── config.yml
├── config.monitoring
├── docker-compose.yml
├── grafana
│ ├── dashboards
│ └── provisioning
├── notifyHook #src
│ ├── api
│ ├── go.mod
│ ├── jsonWrap
│ ├── line
│ ├── main.go
│ ├── test
│ └── tools
├── notifyHookD #Dockerfile
│ └── Dockerfile
├── promApp
│ ├── go.mod
│ ├── go.sum
│ └── main.go
├── promAppD
│ └── Dockerfile
└── prometheus
├── alert.rules
└── prometheus.yml
Dockerfile
FROM golang:alpine as builder
WORKDIR /app
COPY ./notifyHook .
RUN go mod download
RUN go build -o exec
FROM alpine
WORKDIR /usr/bin
COPY --from=builder /app/exec .
RUN apk update && apk add tzdata
docker-compose
notifyHook:
build: #假如dockerfile不再根目錄下必須使用這個方法
context: ./
dockerfile: ./notifyHookD/Dockerfile
container_name: notfiyHook
entrypoint: ./exec
ports:
- '8001:8001'
用curl測試notifyHook
curl --location --request POST '192.168.10.101:8001/notify' \
--header 'Authorization: Bearer' \
--header 'Content-Type: application/json' \
--data-raw '{
"receiver": "api-receiver",
"status": "firing",
"alerts": [
{
"status": "firing",
"labels": {
"alertname": "goApp flip",
"instance": "golang:9001",
"job": "goApp",
"severity": "warm"
},
"annotations": {
"description": "Failed to scrape goApp on golang:9001 for more than 3 minutes. Node seems down.",
"title": "flip golang:9001 is more 5"
},
"startsAt": "2023-01-12T08:09:31.656Z",
"endsAt": "2023-01-12T08:10:01.656Z",
"generatorURL": "http://07712047c817:9090/graph?g0.expr=goApp_flip+%3E+5\u0026g0.tab=1",
"fingerprint": "989b0a76f5d22d7f"
}
],
"groupLabels": {
"alertname": "goApp flip"
},
"commonLabels": {
"alertname": "goApp flip",
"instance": "golang:9001",
"job": "goApp",
"severity": "warm"
},
"commonAnnotations": {
"description": "Failed to scrape goApp on golang:9001 for more than 3 minutes. Node seems down.",
"title": "flip golang:9001 is more 5"
},
"externalURL": "http://28fdaf41f3ea:9093",
"version": "4",
"groupKey": "{}/{alertname=\"goApp flip\"}:{alertname=\"goApp flip\"}",
"truncatedAlerts": 0
}'
#docker-compose relationship