-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
alert.rules
70 lines (63 loc) · 2.35 KB
/
alert.rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
groups:
- name: targets
rules:
- alert: monitor_service_down
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Monitor service non-operational"
description: "Service {{ $labels.instance }} is down."
- name: host
rules:
- alert: high_cpu_load
expr: node_load1 > 1.5
for: 30s
labels:
severity: warning
annotations:
summary: "Server under high load"
description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_memory_load
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server memory is almost full"
description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_storage_load
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server storage is almost full"
description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- name: containers
rules:
- alert: jenkins_down
expr: absent(container_memory_usage_bytes{name="jenkins"})
for: 30s
labels:
severity: critical
annotations:
summary: "Jenkins down"
description: "Jenkins container is down for more than 30 seconds."
- alert: jenkins_high_cpu
expr: sum(rate(container_cpu_usage_seconds_total{name="jenkins"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
for: 30s
labels:
severity: warning
annotations:
summary: "Jenkins high CPU usage"
description: "Jenkins CPU usage is {{ humanize $value}}%."
- alert: jenkins_high_memory
expr: sum(container_memory_usage_bytes{name="jenkins"}) > 1200000000
for: 30s
labels:
severity: warning
annotations:
summary: "Jenkins high memory usage"
description: "Jenkins memory consumption is at {{ humanize $value}}."