Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit fcf951d

Browse files
authored
Track in memory events using weakrefs (#10533)
1 parent 1fe202a commit fcf951d

File tree

4 files changed

+60
-2
lines changed

4 files changed

+60
-2
lines changed

changelog.d/10533.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve event caching mechanism to avoid having multiple copies of an event in memory at a time.

synapse/storage/databases/main/events_worker.py

+33-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import logging
1616
import threading
17+
import weakref
1718
from enum import Enum, auto
1819
from typing import (
1920
TYPE_CHECKING,
@@ -23,6 +24,7 @@
2324
Dict,
2425
Iterable,
2526
List,
27+
MutableMapping,
2628
Optional,
2729
Set,
2830
Tuple,
@@ -248,6 +250,12 @@ def __init__(
248250
str, ObservableDeferred[Dict[str, EventCacheEntry]]
249251
] = {}
250252

253+
# We keep track of the events we have currently loaded in memory so that
254+
# we can reuse them even if they've been evicted from the cache. We only
255+
# track events that don't need redacting in here (as then we don't need
256+
# to track redaction status).
257+
self._event_ref: MutableMapping[str, EventBase] = weakref.WeakValueDictionary()
258+
251259
self._event_fetch_lock = threading.Condition()
252260
self._event_fetch_list: List[
253261
Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"]
@@ -723,6 +731,8 @@ async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]:
723731

724732
def _invalidate_get_event_cache(self, event_id: str) -> None:
725733
self._get_event_cache.invalidate((event_id,))
734+
self._event_ref.pop(event_id, None)
735+
self._current_event_fetches.pop(event_id, None)
726736

727737
def _get_events_from_cache(
728738
self, events: Iterable[str], update_metrics: bool = True
@@ -738,13 +748,30 @@ def _get_events_from_cache(
738748
event_map = {}
739749

740750
for event_id in events:
751+
# First check if it's in the event cache
741752
ret = self._get_event_cache.get(
742753
(event_id,), None, update_metrics=update_metrics
743754
)
744-
if not ret:
755+
if ret:
756+
event_map[event_id] = ret
745757
continue
746758

747-
event_map[event_id] = ret
759+
# Otherwise check if we still have the event in memory.
760+
event = self._event_ref.get(event_id)
761+
if event:
762+
# Reconstruct an event cache entry
763+
764+
cache_entry = EventCacheEntry(
765+
event=event,
766+
# We don't cache weakrefs to redacted events, so we know
767+
# this is None.
768+
redacted_event=None,
769+
)
770+
event_map[event_id] = cache_entry
771+
772+
# We add the entry back into the cache as we want to keep
773+
# recently queried events in the cache.
774+
self._get_event_cache.set((event_id,), cache_entry)
748775

749776
return event_map
750777

@@ -1124,6 +1151,10 @@ async def _get_events_from_db(
11241151
self._get_event_cache.set((event_id,), cache_entry)
11251152
result_map[event_id] = cache_entry
11261153

1154+
if not redacted_event:
1155+
# We only cache references to unredacted events.
1156+
self._event_ref[event_id] = original_ev
1157+
11271158
return result_map
11281159

11291160
async def _enqueue_events(self, events: Collection[str]) -> Dict[str, _EventRow]:

tests/handlers/test_sync.py

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def test_unknown_room_version(self):
160160
# Blow away caches (supported room versions can only change due to a restart).
161161
self.store.get_rooms_for_user_with_stream_ordering.invalidate_all()
162162
self.store._get_event_cache.clear()
163+
self.store._event_ref.clear()
163164

164165
# The rooms should be excluded from the sync response.
165166
# Get a new request key.

tests/storage/databases/main/test_events_worker.py

+25
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,31 @@ def test_simple(self):
154154
# We should have fetched the event from the DB
155155
self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1)
156156

157+
def test_event_ref(self):
158+
"""Test that we reuse events that are still in memory but have fallen
159+
out of the cache, rather than requesting them from the DB.
160+
"""
161+
162+
# Reset the event cache
163+
self.store._get_event_cache.clear()
164+
165+
with LoggingContext("test") as ctx:
166+
# We keep hold of the event event though we never use it.
167+
event = self.get_success(self.store.get_event(self.event_id)) # noqa: F841
168+
169+
# We should have fetched the event from the DB
170+
self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1)
171+
172+
# Reset the event cache
173+
self.store._get_event_cache.clear()
174+
175+
with LoggingContext("test") as ctx:
176+
self.get_success(self.store.get_event(self.event_id))
177+
178+
# Since the event is still in memory we shouldn't have fetched it
179+
# from the DB
180+
self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 0)
181+
157182
def test_dedupe(self):
158183
"""Test that if we request the same event multiple times we only pull it
159184
out once.

0 commit comments

Comments
 (0)