Skip to content

Commit cda9228

Browse files
Clean up MetricsResource and Prometheus hacks (#18687)
Clean up `MetricsResource`, Prometheus hacks (`_set_prometheus_client_use_created_metrics`), and better document why we care about having a separate `metrics` listener type. These clean-up changes have been split out from #18584 since that PR was closed.
1 parent f0f9a82 commit cda9228

File tree

8 files changed

+115
-128
lines changed

8 files changed

+115
-128
lines changed

changelog.d/18687.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Clean up `MetricsResource` and Prometheus hacks.

poetry.lock

Lines changed: 24 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,9 @@ pymacaroons = ">=0.13.0"
195195
msgpack = ">=0.5.2"
196196
phonenumbers = ">=8.2.0"
197197
# we use GaugeHistogramMetric, which was added in prom-client 0.4.0.
198-
prometheus-client = ">=0.4.0"
198+
# `prometheus_client.metrics` was added in 0.5.0, so we require that too.
199+
# We chose 0.6.0 as that is the current version in Debian Buster (oldstable).
200+
prometheus-client = ">=0.6.0"
199201
# we use `order`, which arrived in attrs 19.2.0.
200202
# Note: 21.1.0 broke `/sync`, see https://github.com/matrix-org/synapse/issues/9936
201203
attrs = ">=19.2.0,!=21.1.0"

synapse/app/_base.py

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -286,39 +286,26 @@ async def wrapper() -> None:
286286
def listen_metrics(bind_addresses: StrCollection, port: int) -> None:
287287
"""
288288
Start Prometheus metrics server.
289+
290+
This method runs the metrics server on a different port, in a different thread to
291+
Synapse. This can make it more resilient to heavy load in Synapse causing metric
292+
requests to be slow or timeout.
293+
294+
Even though `start_http_server_prometheus(...)` uses `threading.Thread` behind the
295+
scenes (where all threads share the GIL and only one thread can execute Python
296+
bytecode at a time), this still works because the metrics thread can preempt the
297+
Twisted reactor thread between bytecode boundaries and the metrics thread gets
298+
scheduled with roughly equal priority to the Twisted reactor thread.
289299
"""
290300
from prometheus_client import start_http_server as start_http_server_prometheus
291301

292302
from synapse.metrics import RegistryProxy
293303

294304
for host in bind_addresses:
295305
logger.info("Starting metrics listener on %s:%d", host, port)
296-
_set_prometheus_client_use_created_metrics(False)
297306
start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
298307

299308

300-
def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
301-
"""
302-
Sets whether prometheus_client should expose `_created`-suffixed metrics for
303-
all gauges, histograms and summaries.
304-
There is no programmatic way to disable this without poking at internals;
305-
the proper way is to use an environment variable which prometheus_client
306-
loads at import time.
307-
308-
The motivation for disabling these `_created` metrics is that they're
309-
a waste of space as they're not useful but they take up space in Prometheus.
310-
"""
311-
312-
import prometheus_client.metrics
313-
314-
if hasattr(prometheus_client.metrics, "_use_created"):
315-
prometheus_client.metrics._use_created = new_value
316-
else:
317-
logger.error(
318-
"Can't disable `_created` metrics in prometheus_client (brittle hack broken?)"
319-
)
320-
321-
322309
def listen_manhole(
323310
bind_addresses: StrCollection,
324311
port: int,

synapse/metrics/__init__.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import os
2626
import platform
2727
import threading
28+
from importlib import metadata
2829
from typing import (
2930
Callable,
3031
Dict,
@@ -41,19 +42,28 @@
4142
)
4243

4344
import attr
44-
from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram, Metric
45+
from pkg_resources import parse_version
46+
from prometheus_client import (
47+
CollectorRegistry,
48+
Counter,
49+
Gauge,
50+
Histogram,
51+
Metric,
52+
generate_latest,
53+
)
4554
from prometheus_client.core import (
4655
REGISTRY,
4756
GaugeHistogramMetricFamily,
4857
GaugeMetricFamily,
4958
)
5059

5160
from twisted.python.threadpool import ThreadPool
61+
from twisted.web.resource import Resource
62+
from twisted.web.server import Request
5263

5364
# This module is imported for its side effects; flake8 needn't warn that it's unused.
5465
import synapse.metrics._reactor_metrics # noqa: F401
5566
from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
56-
from synapse.metrics._twisted_exposition import MetricsResource, generate_latest
5767
from synapse.metrics._types import Collector
5868
from synapse.types import StrSequence
5969
from synapse.util import SYNAPSE_VERSION
@@ -81,6 +91,53 @@
8191
single process." (source: https://prometheus.io/docs/concepts/jobs_instances/)
8292
"""
8393

94+
CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
95+
"""
96+
Content type of the latest text format for Prometheus metrics.
97+
98+
Pulled directly from the prometheus_client library.
99+
"""
100+
101+
102+
def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
103+
"""
104+
Sets whether prometheus_client should expose `_created`-suffixed metrics for
105+
all gauges, histograms and summaries.
106+
107+
There is no programmatic way in the old versions of `prometheus_client` to disable
108+
this without poking at internals; the proper way in the old `prometheus_client`
109+
versions (> `0.14.0` < `0.18.0`) is to use an environment variable which
110+
prometheus_client loads at import time. For versions > `0.18.0`, we can use the
111+
dedicated `disable_created_metrics()`/`enable_created_metrics()`.
112+
113+
The motivation for disabling these `_created` metrics is that they're a waste of
114+
space as they're not useful but they take up space in Prometheus. It's not the end
115+
of the world if this doesn't work.
116+
"""
117+
import prometheus_client.metrics
118+
119+
if hasattr(prometheus_client.metrics, "_use_created"):
120+
prometheus_client.metrics._use_created = new_value
121+
# Just log an error for old versions that don't support disabling the unecessary
122+
# metrics. It's not the end of the world if this doesn't work as it just means extra
123+
# wasted space taken up in Prometheus but things keep working.
124+
elif parse_version(metadata.version("prometheus_client")) < parse_version("0.14.0"):
125+
logger.error(
126+
"Can't disable `_created` metrics in prometheus_client (unsupported `prometheus_client` version, too old)"
127+
)
128+
# If the attribute doesn't exist on a newer version, this is a sign that the brittle
129+
# hack is broken. We should consider updating the minimum version of
130+
# `prometheus_client` to a version (> `0.18.0`) where we can use dedicated
131+
# `disable_created_metrics()`/`enable_created_metrics()` functions.
132+
else:
133+
raise Exception(
134+
"Can't disable `_created` metrics in prometheus_client (brittle hack broken?)"
135+
)
136+
137+
138+
# Set this globally so it applies wherever we generate/collect metrics
139+
_set_prometheus_client_use_created_metrics(False)
140+
84141

85142
class _RegistryProxy:
86143
@staticmethod
@@ -508,6 +565,23 @@ def register_threadpool(name: str, threadpool: ThreadPool) -> None:
508565
)
509566

510567

568+
class MetricsResource(Resource):
569+
"""
570+
Twisted ``Resource`` that serves prometheus metrics.
571+
"""
572+
573+
isLeaf = True
574+
575+
def __init__(self, registry: CollectorRegistry = REGISTRY):
576+
self.registry = registry
577+
578+
def render_GET(self, request: Request) -> bytes:
579+
request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
580+
response = generate_latest(self.registry)
581+
request.setHeader(b"Content-Length", str(len(response)))
582+
return response
583+
584+
511585
__all__ = [
512586
"Collector",
513587
"MetricsResource",

0 commit comments

Comments
 (0)