Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Optimize how we calculate likely_domains during backfill #13575

Merged
merged 15 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13575.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optimize how Synapse calculates domains to fetch from during backfill.
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
53 changes: 10 additions & 43 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
from synapse.storage.databases.main.events import PartialStateConflictError
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.state import StateFilter
from synapse.types import JsonDict, StateMap, get_domain_from_id
from synapse.types import JsonDict, get_domain_from_id
from synapse.util.async_helpers import Linearizer
from synapse.util.retryutils import NotRetryingDestination
from synapse.visibility import filter_events_for_server
Expand Down Expand Up @@ -99,37 +99,6 @@
)


def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]:
Copy link
Contributor Author

@MadLittleMods MadLittleMods Aug 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eliminating this option to get domains for a room in favor of get_current_hosts_in_room.

In a room like #matrixhq, getting all 80k state events in the room, then whittling it down in the app is not faster than a quick targeted database query or maybe even luckily hitting the cache.

"""Get joined domains from state

Args:
state: State map from type/state key to event.

Returns:
Returns a list of servers with the lowest depth of their joins.
Sorted by lowest depth first.
"""
joined_users = [
(state_key, int(event.depth))
for (e_type, state_key), event in state.items()
if e_type == EventTypes.Member and event.membership == Membership.JOIN
]

joined_domains: Dict[str, int] = {}
for u, d in joined_users:
try:
dom = get_domain_from_id(u)
old_d = joined_domains.get(dom)
if old_d:
joined_domains[dom] = min(d, old_d)
else:
joined_domains[dom] = d
except Exception:
pass

return sorted(joined_domains.items(), key=lambda d: d[1])


class _BackfillPointType(Enum):
# a regular backwards extremity (ie, an event which we don't yet have, but which
# is referred to by other events in the DAG)
Expand Down Expand Up @@ -427,21 +396,19 @@ async def _maybe_backfill_inner(
)

# Now we need to decide which hosts to hit first.

# First we try hosts that are already in the room
# First we try hosts that are already in the room.
# TODO: HEURISTIC ALERT.
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)

curr_state = await self._storage_controllers.state.get_current_state(room_id)

curr_domains = get_domains_from_state(curr_state)

likely_domains = [
domain for domain, depth in curr_domains if domain != self.server_name
]

async def try_backfill(domains: List[str]) -> bool:
async def try_backfill(domains: Collection[str]) -> bool:
# TODO: Should we try multiple of these at a time?
for dom in domains:
# We don't want to ask our own server for information we don't have
if dom == self.server_name:
continue

try:
await self._federation_event_handler.backfill(
dom, room_id, limit=100, extremities=extremities_to_request
Expand Down
14 changes: 6 additions & 8 deletions synapse/handlers/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
from synapse.events import EventBase
from synapse.events.utils import copy_and_fixup_power_levels_contents
from synapse.federation.federation_client import InvalidResponseError
from synapse.handlers.federation import get_domains_from_state
from synapse.handlers.relations import BundledAggregations
from synapse.module_api import NOT_SPAM
from synapse.rest.admin._base import assert_user_is_admin
Expand Down Expand Up @@ -1459,17 +1458,16 @@ async def get_event_for_timestamp(
timestamp,
)

# Find other homeservers from the given state in the room
curr_state = await self._storage_controllers.state.get_current_state(
room_id
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)
curr_domains = get_domains_from_state(curr_state)
likely_domains = [
domain for domain, depth in curr_domains if domain != self.server_name
]

# Loop through each homeserver candidate until we get a succesful response
for domain in likely_domains:
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
# We don't want to ask our own server for information we don't have
if domain == self.server_name:
continue

try:
remote_response = await self.federation_client.timestamp_to_event(
domain, room_id, timestamp, direction
Expand Down
36 changes: 30 additions & 6 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,17 @@ async def _check_host_room_membership(

@cached(iterable=True, max_entries=10000)
async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
"""Get current hosts in room based on current state."""
"""
Get current hosts in room based on current state.

The heuristic of sorting by servers who have been in the room the
longest is good because they're most likely to have anything we ask
about.

Returns:
Returns a list of servers sorted by longest in the room first. (aka.
sorted by join with the lowest depth first).
"""

# First we check if we already have `get_users_in_room` in the cache, as
# we can just calculate result from that
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -1039,13 +1049,27 @@ async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
# joined users in `current_state_events` via regex.

def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]:
# Returns a list of servers currently joined in the room sorted by
# longest in the room first (aka. with the lowest depth). The
# heuristic of sorting by servers who have been in the room the
# longest is good because they're most likely to have anything we
# ask about.
sql = """
SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$')
FROM current_state_events
SELECT
/* Match the domain part of the MXID */
substring(c.state_key FROM '@[^:]*:(.*)$') as server_domain
FROM current_state_events c
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
WHERE
type = 'm.room.member'
AND membership = 'join'
AND room_id = ?
/* Find any join state events in the room */
c.type = 'm.room.member'
AND c.membership = 'join'
AND c.room_id = ?
/* Group all state events from the same domain into their own buckets (groups) */
GROUP BY server_domain
/* Sorted by lowest depth first */
ORDER BY min(e.depth) ASC;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating the query to order by depth to match what get_domains_from_state did before (drop-in replacement).

The heuristic of sorting by servers who have been there the longest is good because they're most likely to have anything we ask about.

"""
txn.execute(sql, (room_id,))
return {d for d, in txn}
Expand Down