Skip to content
This repository was archived by the owner on Feb 11, 2022. It is now read-only.

Commit a161714

Browse files
committed
monitoring replication lag
1 parent 929d41d commit a161714

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

rockspecs/prometheus-1.1.0.rockspec

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package = 'prometheus'
2+
version = '1.1.0'
3+
source = {
4+
url = 'git://github.com/tarantool/prometheus.git',
5+
tag = '1.1.0',
6+
}
7+
description = {
8+
summary = 'Prometheus library to collect metrics from Tarantool',
9+
homepage = 'https://github.com/tarantool/prometheus.git',
10+
license = 'BSD',
11+
}
12+
dependencies = {
13+
'lua >= 5.1';
14+
}
15+
build = {
16+
type = 'builtin',
17+
18+
modules = {
19+
['prometheus.tarantool-metrics'] = 'tarantool-metrics.lua',
20+
['prometheus'] = 'prometheus.lua'
21+
}
22+
}

tarantool-metrics.lua

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ local tuples_total = prometheus.gauge(
3030
'Total number of tuples in a space',
3131
{'space_name'})
3232

33+
local replication_lag = prometheus.gauge(
34+
'tarantool_replication_lag',
35+
'The time difference between the instance and the master',
36+
{'uuid'})
37+
local replication_state_normal = prometheus.gauge(
38+
'tarantool_is_replication_healthy',
39+
'Is replication healthy?')
40+
3341

3442
local function measure_tarantool_memory_usage()
3543
local slabs = box.slab.info()
@@ -67,7 +75,24 @@ local function measure_tarantool_space_stats()
6775
tuples_total:set(box.space[space_name]:len(), {space_name})
6876
end
6977
end
78+
end
7079

80+
local function measure_tarantool_replication_lag()
81+
local idle = 0
82+
83+
for _, replica in ipairs(box.info.replication) do
84+
if replica.upstream ~= nil then
85+
replication_lag:set(replica.upstream.lag, { replica.uuid })
86+
if replica.upstream.idle > idle then
87+
idle = replica.upstream.idle
88+
end
89+
end
90+
end
91+
92+
if idle ~= 0 then
93+
local replication_timeout = box.cfg.replication_timeout
94+
replication_state_normal:set(idle <= replication_timeout)
95+
end
7196
end
7297

7398
local function measure_tarantool_metrics()
@@ -76,6 +101,7 @@ local function measure_tarantool_metrics()
76101
measure_tarantool_request_stats()
77102
measure_tarantool_uptime()
78103
measure_tarantool_space_stats()
104+
measure_tarantool_replication_lag()
79105
end
80106
end
81107

0 commit comments

Comments
 (0)