forked from jaegertracing/jaeger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dashboards.libsonnet
102 lines (100 loc) · 3.33 KB
/
dashboards.libsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
local g = (import 'grafana-builder/grafana.libsonnet') + {
qpsPanelErrTotal(selectorErr, selectorTotal):: {
local expr(selector) = 'sum(rate(' + selector + '[1m]))',
aliasColors: {
success: '#7EB26D',
'error': '#E24D42',
},
targets: [
{
expr: expr(selectorErr),
format: 'time_series',
intervalFactor: 2,
legendFormat: 'error',
refId: 'A',
step: 10,
},
{
expr: expr(selectorTotal) + ' - ' + expr(selectorErr),
format: 'time_series',
intervalFactor: 2,
legendFormat: 'success',
refId: 'B',
step: 10,
},
],
} + $.stack,
};
{
grafanaDashboards+: {
'jaeger.json':
g.dashboard('Jaeger')
.addRow(
g.row('Services')
.addPanel(
g.panel('span creation rate') +
g.qpsPanelErrTotal('jaeger_reporter_spans{result=~"dropped|err"}', 'jaeger_reporter_spans') +
g.stack
)
.addPanel(
g.panel('% spans dropped') +
g.queryPanel('sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (namespace) / sum(rate(jaeger_reporter_spans[1m])) by (namespace)', '{{namespace}}') +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) } +
g.stack
)
)
.addRow(
g.row('Agent')
.addPanel(
g.panel('batch ingest rate') +
g.qpsPanelErrTotal('jaeger_agent_reporter_batches_failures_total', 'jaeger_agent_reporter_batches_submitted_total') +
g.stack
)
.addPanel(
g.panel('% batches dropped') +
g.queryPanel('sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (cluster) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by (cluster)', '{{cluster}}') +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) } +
g.stack
)
)
.addRow(
g.row('Collector')
.addPanel(
g.panel('span ingest rate') +
g.qpsPanelErrTotal('jaeger_collector_spans_dropped_total', 'jaeger_collector_spans_received_total') +
g.stack
)
.addPanel(
g.panel('% spans dropped') +
g.queryPanel('sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance)', '{{instance}}') +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) } +
g.stack
)
)
.addRow(
g.row('Collector Queue')
.addPanel(
g.panel('span queue length') +
g.queryPanel('jaeger_collector_queue_length', '{{instance}}') +
g.stack
)
.addPanel(
g.panel('span queue time - 95 percentile') +
g.queryPanel('histogram_quantile(0.95, sum(rate(jaeger_collector_in_queue_latency_bucket[1m])) by (le, instance))', '{{instance}}')
)
)
.addRow(
g.row('Query')
.addPanel(
g.panel('qps') +
g.qpsPanelErrTotal('jaeger_query_requests_total{result="err"}', 'jaeger_query_requests_total') +
g.stack
)
.addPanel(
g.panel('latency - 99 percentile') +
g.queryPanel('histogram_quantile(0.99, sum(rate(jaeger_query_latency_bucket[1m])) by (le, instance))', '{{instance}}') +
g.stack
)
),
},
}