diff --git a/changelog.d/13717.misc b/changelog.d/13717.misc new file mode 100644 index 000000000000..07ace50b12a0 --- /dev/null +++ b/changelog.d/13717.misc @@ -0,0 +1 @@ +Add experimental configuration option to allow disabling legacy Prometheus metric names. \ No newline at end of file diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py index 133f1603ddea..563d8cc2c60d 100644 --- a/synapse/metrics/_legacy_exposition.py +++ b/synapse/metrics/_legacy_exposition.py @@ -34,8 +34,6 @@ from twisted.web.resource import Resource from twisted.web.server import Request -from synapse.util import caches - CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" @@ -107,11 +105,6 @@ def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> byt by prometheus-client. """ - # Trigger the cache metrics to be rescraped, which updates the common - # metrics but do not produce metrics themselves - for collector in caches.collectors_by_name.values(): - collector.collect() - output = [] for metric in registry.collect(): diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index d4a2b77c297f..35c0be08b01d 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -20,9 +20,11 @@ from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar import attr +from prometheus_client import REGISTRY from prometheus_client.core import Gauge from synapse.config.cache import add_resizable_cache +from synapse.util.metrics import DynamicCollectorRegistry logger = logging.getLogger(__name__) @@ -30,27 +32,62 @@ # Whether to track estimated memory usage of the LruCaches. TRACK_MEMORY_USAGE = False +# We track cache metrics in a special registry that lets us update the metrics +# just before they are returned from the scrape endpoint. +CACHE_METRIC_REGISTRY = DynamicCollectorRegistry() caches_by_name: Dict[str, Sized] = {} -collectors_by_name: Dict[str, "CacheMetric"] = {} -cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"]) -cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"]) -cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"]) -cache_total = Gauge("synapse_util_caches_cache", "", ["name"]) -cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"]) +cache_size = Gauge( + "synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY +) +cache_hits = Gauge( + "synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY +) +cache_evicted = Gauge( + "synapse_util_caches_cache_evicted_size", + "", + ["name", "reason"], + registry=CACHE_METRIC_REGISTRY, +) +cache_total = Gauge( + "synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY +) +cache_max_size = Gauge( + "synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY +) cache_memory_usage = Gauge( "synapse_util_caches_cache_size_bytes", "Estimated memory usage of the caches", ["name"], + registry=CACHE_METRIC_REGISTRY, ) -response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"]) -response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"]) +response_cache_size = Gauge( + "synapse_util_caches_response_cache_size", + "", + ["name"], + registry=CACHE_METRIC_REGISTRY, +) +response_cache_hits = Gauge( + "synapse_util_caches_response_cache_hits", + "", + ["name"], + registry=CACHE_METRIC_REGISTRY, +) response_cache_evicted = Gauge( - "synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"] + "synapse_util_caches_response_cache_evicted_size", + "", + ["name", "reason"], + registry=CACHE_METRIC_REGISTRY, ) -response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"]) +response_cache_total = Gauge( + "synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY +) + + +# Register our custom cache metrics registry with the global registry +REGISTRY.register(CACHE_METRIC_REGISTRY) class EvictionReason(Enum): @@ -168,9 +205,8 @@ def register_cache( add_resizable_cache(cache_name, resize_callback) metric = CacheMetric(cache, cache_type, cache_name, collect_callback) - metric_name = "cache_%s_%s" % (cache_type, cache_name) caches_by_name[cache_name] = cache - collectors_by_name[metric_name] = metric + CACHE_METRIC_REGISTRY.register_hook(metric.collect) return metric diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index bc3b4938ea15..9687120ebfdb 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -15,9 +15,9 @@ import logging from functools import wraps from types import TracebackType -from typing import Awaitable, Callable, Optional, Type, TypeVar +from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar -from prometheus_client import Counter +from prometheus_client import CollectorRegistry, Counter, Metric from typing_extensions import Concatenate, ParamSpec, Protocol from synapse.logging.context import ( @@ -208,3 +208,33 @@ def _update_in_flight(self, metrics: _InFlightMetric) -> None: metrics.real_time_sum += duration # TODO: Add other in flight metrics. + + +class DynamicCollectorRegistry(CollectorRegistry): + """ + Custom Prometheus Collector registry that calls a hook first, allowing you + to update metrics on-demand. + + Don't forget to register this registry with the main registry! + """ + + def __init__(self) -> None: + super().__init__() + self._pre_update_hooks: List[Callable[[], None]] = [] + + def collect(self) -> Generator[Metric, None, None]: + """ + Collects metrics, calling pre-update hooks first. + """ + + for pre_update_hook in self._pre_update_hooks: + pre_update_hook() + + yield from super().collect() + + def register_hook(self, hook: Callable[[], None]) -> None: + """ + Registers a hook that is called before metric collection. + """ + + self._pre_update_hooks.append(hook)