Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix cache metrics not being updated when not using the legacy exposition module. #13717

Merged
merged 4 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13717.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add experimental configuration option to allow disabling legacy Prometheus metric names.
7 changes: 0 additions & 7 deletions synapse/metrics/_legacy_exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
from twisted.web.resource import Resource
from twisted.web.server import Request

from synapse.util import caches

CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"


Expand Down Expand Up @@ -102,11 +100,6 @@ def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> byt
by prometheus-client.
"""

# Trigger the cache metrics to be rescraped, which updates the common
# metrics but do not produce metrics themselves
for collector in caches.collectors_by_name.values():
collector.collect()

output = []

for metric in registry.collect():
Expand Down
60 changes: 48 additions & 12 deletions synapse/util/caches/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,74 @@
from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar

import attr
from prometheus_client import REGISTRY
from prometheus_client.core import Gauge

from synapse.config.cache import add_resizable_cache
from synapse.util.metrics import DynamicCollectorRegistry

logger = logging.getLogger(__name__)


# Whether to track estimated memory usage of the LruCaches.
TRACK_MEMORY_USAGE = False

# We track cache metrics in a special registry that lets us update the metrics
# just before they are returned from the scrape endpoint.
CACHE_METRIC_REGISTRY = DynamicCollectorRegistry()

caches_by_name: Dict[str, Sized] = {}
collectors_by_name: Dict[str, "CacheMetric"] = {}

cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"])
cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"])
cache_total = Gauge("synapse_util_caches_cache", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
cache_size = Gauge(
"synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_hits = Gauge(
"synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_evicted = Gauge(
"synapse_util_caches_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
cache_total = Gauge(
"synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_max_size = Gauge(
"synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_memory_usage = Gauge(
"synapse_util_caches_cache_size_bytes",
"Estimated memory usage of the caches",
["name"],
registry=CACHE_METRIC_REGISTRY,
)

response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"])
response_cache_size = Gauge(
"synapse_util_caches_response_cache_size",
"",
["name"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_hits = Gauge(
"synapse_util_caches_response_cache_hits",
"",
["name"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_evicted = Gauge(
"synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"]
"synapse_util_caches_response_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"])
response_cache_total = Gauge(
"synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)


# Register our custom cache metrics registry with the global registry
REGISTRY.register(CACHE_METRIC_REGISTRY)


class EvictionReason(Enum):
Expand Down Expand Up @@ -168,9 +205,8 @@ def register_cache(
add_resizable_cache(cache_name, resize_callback)

metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
metric_name = "cache_%s_%s" % (cache_type, cache_name)
caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric
CACHE_METRIC_REGISTRY.register_hook(metric.collect)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this replaces _legacy_exposition.py line -108?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep!

return metric


Expand Down
34 changes: 32 additions & 2 deletions synapse/util/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
import logging
from functools import wraps
from types import TracebackType
from typing import Awaitable, Callable, Optional, Type, TypeVar
from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar

from prometheus_client import Counter
from prometheus_client import CollectorRegistry, Counter, Metric
from typing_extensions import Concatenate, ParamSpec, Protocol

from synapse.logging.context import (
Expand Down Expand Up @@ -208,3 +208,33 @@ def _update_in_flight(self, metrics: _InFlightMetric) -> None:
metrics.real_time_sum += duration

# TODO: Add other in flight metrics.


class DynamicCollectorRegistry(CollectorRegistry):
"""
Custom Prometheus Collector registry that calls a hook first, allowing you
to update metrics on-demand.

Don't forget to register this registry with the main registry!
"""

def __init__(self) -> None:
super().__init__()
self._pre_update_hooks: List[Callable[[], None]] = []

def collect(self) -> Generator[Metric, None, None]:
"""
Collects metrics, calling pre-update hooks first.
"""

for pre_update_hook in self._pre_update_hooks:
pre_update_hook()

yield from super().collect()

def register_hook(self, hook: Callable[[], None]) -> None:
"""
Registers a hook that is called before metric collection.
"""

self._pre_update_hooks.append(hook)