Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Experimental Federation Speedup #9702

Merged
merged 27 commits into from
Apr 14, 2021
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
588631f
add experimental federation speedup
ShadowJonathan Mar 26, 2021
5ea99b1
news and minor fixes
ShadowJonathan Mar 26, 2021
79a5fd9
fix Collection errors
ShadowJonathan Mar 26, 2021
a30d979
swiftly pet isort
ShadowJonathan Mar 26, 2021
027a574
2 -> len
ShadowJonathan Mar 26, 2021
daa9712
🪄
ShadowJonathan Mar 31, 2021
831ceda
Merge remote-tracking branch 'origin/develop' into experimental-fed-s…
ShadowJonathan Mar 31, 2021
2506f63
word
ShadowJonathan Mar 31, 2021
73e2c78
news
ShadowJonathan Mar 31, 2021
44fcea4
event_to_dests naming
ShadowJonathan Mar 31, 2021
30e3338
split handle_room_events comprehensions to make it more readable
ShadowJonathan Mar 31, 2021
fbfcadb
actually fix async bug
ShadowJonathan Mar 31, 2021
fc7e650
naming again
ShadowJonathan Mar 31, 2021
16c39da
Apply suggestions from code review
ShadowJonathan Mar 31, 2021
621d637
apply suggestions from feedback
ShadowJonathan Mar 31, 2021
632be5c
more feedback suggestions
ShadowJonathan Mar 31, 2021
43f9e28
even more feedback suggestions
ShadowJonathan Mar 31, 2021
dc6730a
fix room_and_destination_to_ordering
ShadowJonathan Mar 31, 2021
ad796a3
apply feedback suggestions
ShadowJonathan Apr 2, 2021
e7e6c67
Merge remote-tracking branch 'origin/develop' into experimental-fed-s…
ShadowJonathan Apr 2, 2021
9165cb5
wording
ShadowJonathan Apr 2, 2021
bbf52d5
more wording
ShadowJonathan Apr 2, 2021
7a33b4c
Apply suggestions from code review
ShadowJonathan Apr 9, 2021
a89cd8e
Apply suggestions from code review (manually)
ShadowJonathan Apr 9, 2021
4551879
Merge branch 'develop' into experimental-fed-speedup
ShadowJonathan Apr 9, 2021
95ceb32
Apply suggestions from code review (manually) #2
ShadowJonathan Apr 9, 2021
5703cd0
Update 9702.misc
richvdh Apr 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9702.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up Federation by using less database calls.
42 changes: 23 additions & 19 deletions contrib/experiments/test_messaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,16 @@ def send_message(self, room_name, sender, body):
destinations = yield self.get_servers_for_context(room_name)

try:
yield self.replication_layer.send_pdu(
Pdu.create_new(
context=room_name,
pdu_type="sy.room.message",
content={"sender": sender, "body": body},
origin=self.server_name,
destinations=destinations,
)
yield self.replication_layer.send_pdus(
[
Pdu.create_new(
context=room_name,
pdu_type="sy.room.message",
content={"sender": sender, "body": body},
origin=self.server_name,
destinations=destinations,
)
]
)
except Exception as e:
logger.exception(e)
Expand All @@ -254,7 +256,7 @@ def join_room(self, room_name, sender, joinee):
origin=self.server_name,
destinations=destinations,
)
yield self.replication_layer.send_pdu(pdu)
yield self.replication_layer.send_pdus([pdu])
except Exception as e:
logger.exception(e)

Expand All @@ -266,16 +268,18 @@ def invite_to_room(self, room_name, sender, invitee):
destinations = yield self.get_servers_for_context(room_name)

try:
yield self.replication_layer.send_pdu(
Pdu.create_new(
context=room_name,
is_state=True,
pdu_type="sy.room.member",
state_key=invitee,
content={"membership": "invite"},
origin=self.server_name,
destinations=destinations,
)
yield self.replication_layer.send_pdus(
[
Pdu.create_new(
context=room_name,
is_state=True,
pdu_type="sy.room.member",
state_key=invitee,
content={"membership": "invite"},
origin=self.server_name,
destinations=destinations,
)
]
)
except Exception as e:
logger.exception(e)
Expand Down
137 changes: 81 additions & 56 deletions synapse/federation/sender/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,22 @@

from prometheus_client import Counter

from twisted.internet import defer

import synapse.metrics
from synapse.api.presence import UserPresenceState
from synapse.events import EventBase
from synapse.federation.sender.per_destination_queue import PerDestinationQueue
from synapse.federation.sender.transaction_manager import TransactionManager
from synapse.federation.units import Edu
from synapse.handlers.presence import get_interested_remotes
from synapse.logging.context import (
make_deferred_yieldable,
preserve_fn,
run_in_background,
)
from synapse.logging.context import preserve_fn
from synapse.metrics import (
LaterGauge,
event_processing_loop_counter,
event_processing_loop_room_count,
events_processed_counter,
)
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
from synapse.types import Collection, JsonDict, ReadReceipt, RoomStreamToken
from synapse.util.metrics import Measure, measure_func

if TYPE_CHECKING:
Expand Down Expand Up @@ -275,15 +269,27 @@ async def _process_event_queue_loop(self) -> None:
if not events and next_token >= self._last_poked_id:
break

async def handle_event(event: EventBase) -> None:
async def get_destinations_for_event(
event: EventBase,
) -> Collection[str]:
"""Computes the destinations to which this event must be sent.

This returns None when there are no destinations to send to,
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
or if this event is not from this homeserver and it is not sending
it on behalf of another server.

Will also filter out destinations which this sender is not responsible for,
if multiple federation senders exist.
"""

# Only send events for this server.
send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of()
is_mine = self.is_mine_id(event.sender)
if not is_mine and send_on_behalf_of is None:
return
return ()

if not event.internal_metadata.should_proactively_send():
return
return ()

destinations = None # type: Optional[Set[str]]
if not event.prev_event_ids():
Expand Down Expand Up @@ -318,7 +324,7 @@ async def handle_event(event: EventBase) -> None:
"Failed to calculate hosts in room for event: %s",
event.event_id,
)
return
return ()

destinations = {
d
Expand All @@ -328,42 +334,45 @@ async def handle_event(event: EventBase) -> None:
)
}

destinations.discard(self.server_name)

if send_on_behalf_of is not None:
# If we are sending the event on behalf of another server
# then it already has the event and there is no reason to
# send the event to it.
destinations.discard(send_on_behalf_of)

logger.debug("Sending %s to %r", event, destinations)

if destinations:
await self._send_pdu(event, destinations)

now = self.clock.time_msec()
ts = await self.store.get_received_ts(event.event_id)

synapse.metrics.event_processing_lag_by_event.labels(
"federation_sender"
).observe((now - ts) / 1000)

async def handle_room_events(events: Iterable[EventBase]) -> None:
with Measure(self.clock, "handle_room_events"):
for event in events:
await handle_event(event)

events_by_room = {} # type: Dict[str, List[EventBase]]
for event in events:
events_by_room.setdefault(event.room_id, []).append(event)

await make_deferred_yieldable(
defer.gatherResults(
[
run_in_background(handle_room_events, evs)
for evs in events_by_room.values()
],
consumeErrors=True,
)
)
return destinations
return ()

async def get_federatable_events_and_destinations(
events: Iterable[EventBase],
) -> List[Tuple[EventBase, Collection[str]]]:
with Measure(self.clock, "fetch_destinations_for_events"):
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
# Get destinations for events, skip if get_destinations_for_event returns None
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
return [
(event, dests)
for (event, dests) in [
(event, await get_destinations_for_event(event))
for event in events
]
if dests
]

events_and_dests = await get_federatable_events_and_destinations(
events
) # type: List[Tuple[EventBase, Collection[str]]]
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved

# Send corresponding events to each destination queue
await self._distribute_events(events_and_dests)

await self.store.update_federation_out_pos("events", next_token)

Expand All @@ -381,7 +390,7 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
events_processed_counter.inc(len(events))

event_processing_loop_room_count.labels("federation_sender").inc(
len(events_by_room)
len({event.room_id for event in events})
)

event_processing_loop_counter.labels("federation_sender").inc()
Expand All @@ -393,34 +402,50 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
finally:
self._is_processing = False

async def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
# We loop through all destinations to see whether we already have
# a transaction in progress. If we do, stick it in the pending_pdus
# table and we'll get back to it later.
async def _distribute_events(
self,
events_and_dests: List[Tuple[EventBase, Collection[str]]],
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""Distribute events from the transmission loop.
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved

Args:
events_and_dests: A list of tuples, which are (event, destinations).
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
"""
# Tuples of room_id + destination to their max-seen stream_ordering
room_with_dest_stream_ordering = {} # type: Dict[Tuple[str, str], int]

destinations = set(destinations)
destinations.discard(self.server_name)
logger.debug("Sending to: %s", str(destinations))
# List of events to send to each destination
events_by_dest = {} # type: Dict[str, List[EventBase]]

if not destinations:
return
# For each event-destinations pair...
for event, destinations in events_and_dests:

sent_pdus_destination_dist_total.inc(len(destinations))
sent_pdus_destination_dist_count.inc()
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
# (we got this from the database, it's filled)
assert event.internal_metadata.stream_ordering

assert pdu.internal_metadata.stream_ordering
sent_pdus_destination_dist_total.inc(len(destinations))
sent_pdus_destination_dist_count.inc()

# track the fact that we have a PDU for these destinations,
# to allow us to perform catch-up later on if the remote is unreachable
# for a while.
await self.store.store_destination_rooms_entries(
destinations,
pdu.room_id,
pdu.internal_metadata.stream_ordering,
# ...iterate over those destinations..
for destination in destinations:
# ...update their stream-ordering...
room_with_dest_stream_ordering[(event.room_id, destination)] = max(
event.internal_metadata.stream_ordering,
room_with_dest_stream_ordering.get((event.room_id, destination), 0),
)

# ...and add the event to each destination queue.
events_by_dest.setdefault(destination, []).append(event)

# Bulk-store destination_rooms stream_ids
await self.store.bulk_store_destination_rooms_entries(
room_with_dest_stream_ordering
)

for destination in destinations:
self._get_per_destination_queue(destination).send_pdu(pdu)
for destination, pdus in events_by_dest.items():
logger.debug("Sending %d pdus to %s", len(pdus), destination)

self._get_per_destination_queue(destination).send_pdus(pdus)

async def send_read_receipt(self, receipt: ReadReceipt) -> None:
"""Send a RR to any other servers in the room
Expand Down
17 changes: 10 additions & 7 deletions synapse/federation/sender/per_destination_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from synapse.logging.opentracing import SynapseTags, set_tag
from synapse.metrics import sent_transactions_counter
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import ReadReceipt
from synapse.types import Collection, ReadReceipt
from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter

if TYPE_CHECKING:
Expand Down Expand Up @@ -155,19 +155,22 @@ def pending_edu_count(self) -> int:
+ len(self._pending_edus_keyed)
)

def send_pdu(self, pdu: EventBase) -> None:
"""Add a PDU to the queue, and start the transmission loop if necessary
def send_pdus(self, pdus: Collection[EventBase]) -> None:
ShadowJonathan marked this conversation as resolved.
Show resolved Hide resolved
"""Add PDUs to the queue, and start the transmission loop if necessary

Args:
pdu: pdu to send
pdus: pdus to send
"""
if not self._catching_up or self._last_successful_stream_ordering is None:
# only enqueue the PDU if we are not catching up (False) or do not
# yet know if we have anything to catch up (None)
self._pending_pdus.append(pdu)
self._pending_pdus.extend(pdus)
else:
assert pdu.internal_metadata.stream_ordering
self._catchup_last_skipped = pdu.internal_metadata.stream_ordering
self._catchup_last_skipped = max(
pdu.internal_metadata.stream_ordering
for pdu in pdus
if pdu.internal_metadata.stream_ordering is not None
)

self.attempt_new_transaction()

Expand Down
28 changes: 12 additions & 16 deletions synapse/storage/databases/main/transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import logging
from collections import namedtuple
from typing import Iterable, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple

from canonicaljson import encode_canonical_json

Expand Down Expand Up @@ -296,37 +296,33 @@ def _set_destination_retry_timings_emulated(
},
)

async def store_destination_rooms_entries(
self,
destinations: Iterable[str],
room_id: str,
stream_ordering: int,
) -> None:
async def bulk_store_destination_rooms_entries(
self, room_and_destination_to_ordering: Dict[Tuple[str, str], int]
):
"""
Updates or creates `destination_rooms` entries in batch for a single event.
Updates or creates `destination_rooms` entries for a number of events.

Args:
destinations: list of destinations
room_id: the room_id of the event
stream_ordering: the stream_ordering of the event
room_and_destination_to_ordering: A mapping of (room, destination) -> stream_id
"""

await self.db_pool.simple_upsert_many(
table="destinations",
key_names=("destination",),
key_values=[(d,) for d in destinations],
key_values={(d,) for _, d in room_and_destination_to_ordering.keys()},
value_names=[],
value_values=[],
desc="store_destination_rooms_entries_dests",
)

rows = [(destination, room_id) for destination in destinations]
await self.db_pool.simple_upsert_many(
table="destination_rooms",
key_names=("destination", "room_id"),
key_values=rows,
key_names=("room_id", "destination"),
key_values=list(room_and_destination_to_ordering.keys()),
value_names=["stream_ordering"],
value_values=[(stream_ordering,)] * len(rows),
value_values=[
(stream_id,) for stream_id in room_and_destination_to_ordering.values()
],
desc="store_destination_rooms_entries_rooms",
)

Expand Down