@@ -30,6 +30,14 @@ local tuples_total = prometheus.gauge(
30
30
' Total number of tuples in a space' ,
31
31
{' space_name' })
32
32
33
+ local replication_lag = prometheus .gauge (
34
+ ' tarantool_replication_lag' ,
35
+ ' The time difference between the instance and the master' ,
36
+ {' uuid' })
37
+ local replication_state_normal = prometheus .gauge (
38
+ ' tarantool_is_replication_healthy' ,
39
+ ' Is replication healthy?' )
40
+
33
41
34
42
local function measure_tarantool_memory_usage ()
35
43
local slabs = box .slab .info ()
@@ -67,7 +75,24 @@ local function measure_tarantool_space_stats()
67
75
tuples_total :set (box .space [space_name ]:len (), {space_name })
68
76
end
69
77
end
78
+ end
70
79
80
+ local function measure_tarantool_replication_lag ()
81
+ local idle = 0
82
+
83
+ for _ , replica in ipairs (box .info .replication ) do
84
+ if replica .upstream ~= nil then
85
+ replication_lag :set (replica .upstream .lag , { replica .uuid })
86
+ if replica .upstream .idle > idle then
87
+ idle = replica .upstream .idle
88
+ end
89
+ end
90
+ end
91
+
92
+ if idle ~= 0 then
93
+ local replication_timeout = box .cfg .replication_timeout
94
+ replication_state_normal :set (idle <= replication_timeout )
95
+ end
71
96
end
72
97
73
98
local function measure_tarantool_metrics ()
@@ -76,6 +101,7 @@ local function measure_tarantool_metrics()
76
101
measure_tarantool_request_stats ()
77
102
measure_tarantool_uptime ()
78
103
measure_tarantool_space_stats ()
104
+ measure_tarantool_replication_lag ()
79
105
end
80
106
end
81
107
0 commit comments