Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 68ae0fd

Browse files
committed
Optimize backfill receiving to have less missing prev_event thrashing
Pulled from scratch changes in, #13864
1 parent 6f0c3e6 commit 68ae0fd

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

synapse/handlers/federation_event.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
from synapse.storage.databases.main.events import PartialStateConflictError
7676
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
7777
from synapse.storage.state import StateFilter
78+
from synapse.storage.util.id_generators import AbstractStreamIdGenerator
7879
from synapse.types import (
7980
PersistedEventPosition,
8081
RoomStreamToken,
@@ -644,9 +645,71 @@ async def backfill(
644645
f"room {ev.room_id}, when we were backfilling in {room_id}"
645646
)
646647

648+
# We expect the events from the `/backfill` response to start from
649+
# `?v` and include events that preceded it (so the list will be
650+
# newest -> oldest, reverse-chronological). It's described in the
651+
# spec this way so we can rely on people doing it the right way for
652+
# the historical messages to show up correctly.
653+
reverse_chronological_events = events
654+
# `[::-1]` is just syntax to reverse the list and give us a copy
655+
chronological_events = reverse_chronological_events[::-1]
656+
657+
# We want to calculate the `stream_ordering` from newest -> oldest
658+
# (reverse-chronological) (so MSC2716 historical events end up
659+
# sorting in the correct order) and persist oldest -> newest
660+
# (chronological) to get the least missing `prev_event` fetch
661+
# thrashing.
662+
# ------------------------------------------------------------------
663+
664+
# Since we have been configured to write, we ought to have id generators,
665+
# rather than id trackers.
666+
assert (
667+
self._instance_name in self._config.worker.writers.events
668+
), "Can only write stream IDs on master"
669+
assert isinstance(self._store._backfill_id_gen, AbstractStreamIdGenerator)
670+
stream_ordering_manager = self._store._backfill_id_gen.get_next_mult(
671+
len(reverse_chronological_events)
672+
)
673+
async with stream_ordering_manager as stream_orderings:
674+
# Calculate the `stream_ordering` from newest -> oldest
675+
# (reverse-chronological) (so historical events end up sorting
676+
# in the correct order).
677+
#
678+
# Backfilled events start with `stream_ordering=-1` and
679+
# decrement. For events, that we backfill at the same `depth`
680+
# (like chains of historical messages) in order for them to have
681+
# the best chance of ending up in the correct order, assign
682+
# `stream_ordering` to the assumed reverse-chronological list of
683+
# events to backfill (where the newest events get
684+
# stream_ordering assigned first)
685+
#
686+
# depth : stream_ordering : event
687+
# ----- : --------------- : -----------------------
688+
# 1 : 1 : Event before 1
689+
# 2 : 2 : Event before 2
690+
# 3 : -4 : Historical message 1
691+
# 3 : -4 : Historical message 2
692+
# 3 : -3 : Historical message 3
693+
# 3 : -2 : Historical message 4
694+
# 3 : -1 : Historical message 5
695+
# 3 : 3 : Event after 1
696+
# 4 : 4 : Event after 2
697+
#
698+
for event, stream in zip(
699+
reverse_chronological_events, stream_orderings
700+
):
701+
event.internal_metadata.stream_ordering = stream
702+
647703
await self._process_pulled_events(
648704
dest,
649-
events,
705+
# Persist events from oldest -> newest (chronological) to get
706+
# the least missing `prev_event` fetch thrashing.
707+
# `_process_pulled_events` does some sorting of its own by
708+
# `depth` but if we let it sort the reverse-chronological list
709+
# of events, it naively orders events with the same depth in the
710+
# opposite order we want. If we pass it an already sorted by
711+
# depth list, then everything lines up.
712+
chronological_events,
650713
backfilled=True,
651714
)
652715

synapse/storage/databases/main/events.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,11 @@ async def _persist_events_and_state_updates(
209209

210210
async with stream_ordering_manager as stream_orderings:
211211
for (event, _), stream in zip(events_and_contexts, stream_orderings):
212-
event.internal_metadata.stream_ordering = stream
212+
# If someone has already decided the stream_ordering for the
213+
# event before, then just use that. This is done during backfill
214+
# to help ordering of MSC2716 historical messages.
215+
if event.internal_metadata.stream_ordering is None:
216+
event.internal_metadata.stream_ordering = stream
213217

214218
await self.db_pool.runInteraction(
215219
"persist_events",

0 commit comments

Comments
 (0)