Skip to content

Commit 5106818

Browse files
Refactor GaugeBucketCollector metrics to be homeserver-scoped (#18715)
Refactor `GaugeBucketCollector` metrics to be homeserver-scoped Part of #18592 ### Testing strategy 1. Add the `metrics` listener in your `homeserver.yaml` ```yaml listeners: # This is just showing how to configure metrics either way # # `http` `metrics` resource - port: 9322 type: http bind_addresses: ['127.0.0.1'] resources: - names: [metrics] compress: false # `metrics` listener - port: 9323 type: metrics bind_addresses: ['127.0.0.1'] ``` 1. Start the homeserver: `poetry run synapse_homeserver --config-path homeserver.yaml` 1. Fetch `http://localhost:9322/_synapse/metrics` and/or `http://localhost:9323/metrics` 1. Adjust the number of [`msecs` in the `looping_call` so that `_read_forward_extremities`](https://github.com/element-hq/synapse/blob/a82b8a966a7dbe218cb788548683a83ec404e468/synapse/storage/databases/main/metrics.py#L79) runs immediately instead of after an hour. 1. Observe response includes the `synapse_forward_extremities` and `synapse_excess_extremity_events` metrics with the `server_name` label
1 parent f13a136 commit 5106818

File tree

4 files changed

+97
-29
lines changed

4 files changed

+97
-29
lines changed

changelog.d/18715.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Refactor `GaugeBucketCollector` metrics to be homeserver-scoped.

synapse/metrics/__init__.py

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Iterable,
3434
Mapping,
3535
Optional,
36+
Sequence,
3637
Set,
3738
Tuple,
3839
Type,
@@ -343,6 +344,51 @@ def _register_with_collector(self) -> None:
343344
all_gauges[self.name] = self
344345

345346

347+
class GaugeHistogramMetricFamilyWithLabels(GaugeHistogramMetricFamily):
348+
"""
349+
Custom version of `GaugeHistogramMetricFamily` from `prometheus_client` that allows
350+
specifying labels and label values.
351+
352+
A single gauge histogram and its samples.
353+
354+
For use by custom collectors.
355+
"""
356+
357+
def __init__(
358+
self,
359+
*,
360+
name: str,
361+
documentation: str,
362+
gsum_value: float,
363+
buckets: Optional[Sequence[Tuple[str, float]]] = None,
364+
labelnames: StrSequence = (),
365+
labelvalues: StrSequence = (),
366+
unit: str = "",
367+
):
368+
# Sanity check the number of label values matches the number of label names.
369+
if len(labelvalues) != len(labelnames):
370+
raise ValueError(
371+
"The number of label values must match the number of label names"
372+
)
373+
374+
# Call the super to validate and set the labelnames. We use this stable API
375+
# instead of setting the internal `_labelnames` field directly.
376+
super().__init__(
377+
name=name,
378+
documentation=documentation,
379+
labels=labelnames,
380+
# Since `GaugeHistogramMetricFamily` doesn't support supplying `labels` and
381+
# `buckets` at the same time (artificial limitation), we will just set these
382+
# as `None` and set up the buckets ourselves just below.
383+
buckets=None,
384+
gsum_value=None,
385+
)
386+
387+
# Create a gauge for each bucket.
388+
if buckets is not None:
389+
self.add_metric(labels=labelvalues, buckets=buckets, gsum_value=gsum_value)
390+
391+
346392
class GaugeBucketCollector(Collector):
347393
"""Like a Histogram, but the buckets are Gauges which are updated atomically.
348394
@@ -355,14 +401,17 @@ class GaugeBucketCollector(Collector):
355401
__slots__ = (
356402
"_name",
357403
"_documentation",
404+
"_labelnames",
358405
"_bucket_bounds",
359406
"_metric",
360407
)
361408

362409
def __init__(
363410
self,
411+
*,
364412
name: str,
365413
documentation: str,
414+
labelnames: Optional[StrSequence],
366415
buckets: Iterable[float],
367416
registry: CollectorRegistry = REGISTRY,
368417
):
@@ -376,6 +425,7 @@ def __init__(
376425
"""
377426
self._name = name
378427
self._documentation = documentation
428+
self._labelnames = labelnames if labelnames else ()
379429

380430
# the tops of the buckets
381431
self._bucket_bounds = [float(b) for b in buckets]
@@ -387,7 +437,7 @@ def __init__(
387437

388438
# We initially set this to None. We won't report metrics until
389439
# this has been initialised after a successful data update
390-
self._metric: Optional[GaugeHistogramMetricFamily] = None
440+
self._metric: Optional[GaugeHistogramMetricFamilyWithLabels] = None
391441

392442
registry.register(self)
393443

@@ -396,15 +446,26 @@ def collect(self) -> Iterable[Metric]:
396446
if self._metric is not None:
397447
yield self._metric
398448

399-
def update_data(self, values: Iterable[float]) -> None:
449+
def update_data(self, values: Iterable[float], labels: StrSequence = ()) -> None:
400450
"""Update the data to be reported by the metric
401451
402452
The existing data is cleared, and each measurement in the input is assigned
403453
to the relevant bucket.
454+
455+
Args:
456+
values
457+
labels
404458
"""
405-
self._metric = self._values_to_metric(values)
459+
self._metric = self._values_to_metric(values, labels)
406460

407-
def _values_to_metric(self, values: Iterable[float]) -> GaugeHistogramMetricFamily:
461+
def _values_to_metric(
462+
self, values: Iterable[float], labels: StrSequence = ()
463+
) -> GaugeHistogramMetricFamilyWithLabels:
464+
"""
465+
Args:
466+
values
467+
labels
468+
"""
408469
total = 0.0
409470
bucket_values = [0 for _ in self._bucket_bounds]
410471

@@ -422,9 +483,11 @@ def _values_to_metric(self, values: Iterable[float]) -> GaugeHistogramMetricFami
422483
# that bucket or below.
423484
accumulated_values = itertools.accumulate(bucket_values)
424485

425-
return GaugeHistogramMetricFamily(
426-
self._name,
427-
self._documentation,
486+
return GaugeHistogramMetricFamilyWithLabels(
487+
name=self._name,
488+
documentation=self._documentation,
489+
labelnames=self._labelnames,
490+
labelvalues=labels,
428491
buckets=list(
429492
zip((str(b) for b in self._bucket_bounds), accumulated_values)
430493
),

synapse/storage/databases/main/metrics.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import time
2424
from typing import TYPE_CHECKING, Dict, List, Tuple, cast
2525

26-
from synapse.metrics import GaugeBucketCollector
26+
from synapse.metrics import SERVER_NAME_LABEL, GaugeBucketCollector
2727
from synapse.metrics.background_process_metrics import wrap_as_background_process
2828
from synapse.storage._base import SQLBaseStore
2929
from synapse.storage.database import (
@@ -42,9 +42,10 @@
4242

4343
# Collect metrics on the number of forward extremities that exist.
4444
_extremities_collecter = GaugeBucketCollector(
45-
"synapse_forward_extremities",
46-
"Number of rooms on the server with the given number of forward extremities"
45+
name="synapse_forward_extremities",
46+
documentation="Number of rooms on the server with the given number of forward extremities"
4747
" or fewer",
48+
labelnames=[SERVER_NAME_LABEL],
4849
buckets=[1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500],
4950
)
5051

@@ -54,9 +55,10 @@
5455
# we could remove from state resolution by reducing the graph to a single
5556
# forward extremity.
5657
_excess_state_events_collecter = GaugeBucketCollector(
57-
"synapse_excess_extremity_events",
58-
"Number of rooms on the server with the given number of excess extremity "
58+
name="synapse_excess_extremity_events",
59+
documentation="Number of rooms on the server with the given number of excess extremity "
5960
"events, or fewer",
61+
labelnames=[SERVER_NAME_LABEL],
6062
buckets=[0] + [1 << n for n in range(12)],
6163
)
6264

@@ -100,10 +102,12 @@ def fetch(txn: LoggingTransaction) -> List[Tuple[int, int]]:
100102

101103
res = await self.db_pool.runInteraction("read_forward_extremities", fetch)
102104

103-
_extremities_collecter.update_data(x[0] for x in res)
105+
_extremities_collecter.update_data(
106+
values=(x[0] for x in res), labels=(self.server_name,)
107+
)
104108

105109
_excess_state_events_collecter.update_data(
106-
(x[0] - 1) * x[1] for x in res if x[1]
110+
values=((x[0] - 1) * x[1] for x in res if x[1]), labels=(self.server_name,)
107111
)
108112

109113
async def count_daily_e2ee_messages(self) -> int:

tests/storage/test_event_metrics.py renamed to tests/storage/databases/main/test_metrics.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,24 @@ def test_exposed_to_prometheus(self) -> None:
6565
)
6666

6767
expected = [
68-
b'synapse_forward_extremities_bucket{le="1.0"} 0.0',
69-
b'synapse_forward_extremities_bucket{le="2.0"} 2.0',
70-
b'synapse_forward_extremities_bucket{le="3.0"} 2.0',
71-
b'synapse_forward_extremities_bucket{le="5.0"} 2.0',
72-
b'synapse_forward_extremities_bucket{le="7.0"} 3.0',
73-
b'synapse_forward_extremities_bucket{le="10.0"} 3.0',
74-
b'synapse_forward_extremities_bucket{le="15.0"} 3.0',
75-
b'synapse_forward_extremities_bucket{le="20.0"} 3.0',
76-
b'synapse_forward_extremities_bucket{le="50.0"} 3.0',
77-
b'synapse_forward_extremities_bucket{le="100.0"} 3.0',
78-
b'synapse_forward_extremities_bucket{le="200.0"} 3.0',
79-
b'synapse_forward_extremities_bucket{le="500.0"} 3.0',
68+
b'synapse_forward_extremities_bucket{le="1.0",server_name="test"} 0.0',
69+
b'synapse_forward_extremities_bucket{le="2.0",server_name="test"} 2.0',
70+
b'synapse_forward_extremities_bucket{le="3.0",server_name="test"} 2.0',
71+
b'synapse_forward_extremities_bucket{le="5.0",server_name="test"} 2.0',
72+
b'synapse_forward_extremities_bucket{le="7.0",server_name="test"} 3.0',
73+
b'synapse_forward_extremities_bucket{le="10.0",server_name="test"} 3.0',
74+
b'synapse_forward_extremities_bucket{le="15.0",server_name="test"} 3.0',
75+
b'synapse_forward_extremities_bucket{le="20.0",server_name="test"} 3.0',
76+
b'synapse_forward_extremities_bucket{le="50.0",server_name="test"} 3.0',
77+
b'synapse_forward_extremities_bucket{le="100.0",server_name="test"} 3.0',
78+
b'synapse_forward_extremities_bucket{le="200.0",server_name="test"} 3.0',
79+
b'synapse_forward_extremities_bucket{le="500.0",server_name="test"} 3.0',
8080
# per https://docs.google.com/document/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit#heading=h.wghdjzzh72j9,
8181
# "inf" is valid: "this includes variants such as inf"
82-
b'synapse_forward_extremities_bucket{le="inf"} 3.0',
82+
b'synapse_forward_extremities_bucket{le="inf",server_name="test"} 3.0',
8383
b"# TYPE synapse_forward_extremities_gcount gauge",
84-
b"synapse_forward_extremities_gcount 3.0",
84+
b'synapse_forward_extremities_gcount{server_name="test"} 3.0',
8585
b"# TYPE synapse_forward_extremities_gsum gauge",
86-
b"synapse_forward_extremities_gsum 10.0",
86+
b'synapse_forward_extremities_gsum{server_name="test"} 10.0',
8787
]
8888
self.assertEqual(items, expected)

0 commit comments

Comments
 (0)