Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix performance regression in get_users_in_room #13972

Merged
merged 6 commits into from
Sep 30, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13972.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix performance regression in `get_users_in_room` database query. Introduced in v1.67.0.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 3 additions & 1 deletion synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,9 @@ async def _maybe_backfill_inner(
# First we try hosts that are already in the room.
# TODO: HEURISTIC ALERT.
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
await self._storage_controllers.state.get_current_hosts_in_room_ordered(
room_id
)
)

async def try_backfill(domains: Collection[str]) -> bool:
Expand Down
4 changes: 3 additions & 1 deletion synapse/handlers/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1540,7 +1540,9 @@ async def get_event_for_timestamp(
)

likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
await self._storage_controllers.state.get_current_hosts_in_room_ordered(
room_id
)
)

# Loop through each homeserver candidate until we get a succesful response
Expand Down
16 changes: 14 additions & 2 deletions synapse/storage/controllers/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Mapping,
Optional,
Sequence,
Set,
Tuple,
)

Expand Down Expand Up @@ -529,7 +530,18 @@ async def get_current_state_event(
)
return state_map.get(key)

async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
"""Get current hosts in room based on current state.

Blocks until we have full state for the given room. This only happens for rooms
with partial state.
"""

await self._partial_state_room_tracker.await_full_state(room_id)

return await self.stores.main.get_current_hosts_in_room(room_id)

async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
"""Get current hosts in room based on current state.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should update this docstring describing the order


Blocks until we have full state for the given room. This only happens for rooms
Expand All @@ -542,7 +554,7 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]:

await self._partial_state_room_tracker.await_full_state(room_id)

return await self.stores.main.get_current_hosts_in_room(room_id)
return await self.stores.main.get_current_hosts_in_room_ordered(room_id)

async def get_current_hosts_in_room_or_partial_state_approximation(
squahtx marked this conversation as resolved.
Show resolved Hide resolved
self, room_id: str
Expand Down
89 changes: 50 additions & 39 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,7 @@ def _transact(txn: LoggingTransaction) -> int:

@cached(max_entries=100000, iterable=True)
async def get_users_in_room(self, room_id: str) -> List[str]:
"""
Returns a list of users in the room sorted by longest in the room first
(aka. with the lowest depth). This is done to match the sort in
`get_current_hosts_in_room()` and so we can re-use the cache but it's
not horrible to have here either.

Uses `m.room.member`s in the room state at the current forward extremities to
determine which users are in the room.
squahtx marked this conversation as resolved.
Show resolved Hide resolved
"""Returns a list of users in the room.

Will return inaccurate results for rooms with partial state, since the state for
the forward extremities of those rooms will exclude most members. We may also
Expand All @@ -165,19 +158,10 @@ async def get_users_in_room(self, room_id: str) -> List[str]:
)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made this not go through get_users_in_room_txn as using simple_select_onecol directly is actually slightly more performant (as we don't enter a transaction)


def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]:
"""
Returns a list of users in the room sorted by longest in the room first
(aka. with the lowest depth). This is done to match the sort in
`get_current_hosts_in_room()` and so we can re-use the cache but it's
not horrible to have here either.
"""
"""Returns a list of users in the room."""
sql = """
SELECT c.state_key FROM current_state_events as c
/* Get the depth of the event from the events table */
INNER JOIN events AS e USING (event_id)
WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
/* Sorted by lowest depth first */
ORDER BY e.depth ASC;
SELECT state_key FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
"""

txn.execute(sql, (room_id, Membership.JOIN))
Expand Down Expand Up @@ -931,14 +915,53 @@ async def _check_host_room_membership(
return True

@cached(iterable=True, max_entries=10000)
async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
"""Get current hosts in room based on current state."""

# First we check if we already have `get_users_in_room` in the cache, as
# we can just calculate result from that
users = self.get_users_in_room.cache.get_immediate(
(room_id,), None, update_metrics=False
)
if users is not None:
return {get_domain_from_id(u) for u in users}

if isinstance(self.database_engine, Sqlite3Engine):
# If we're using SQLite then let's just always use
# `get_users_in_room` rather than funky SQL.
users = await self.get_users_in_room(room_id)
return {get_domain_from_id(u) for u in users}

# For PostgreSQL we can use a regex to pull out the domains from the
# joined users in `current_state_events` via regex.

def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]:
sql = """
SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$')
FROM current_state_events
WHERE
type = 'm.room.member'
AND membership = 'join'
AND room_id = ?
"""
txn.execute(sql, (room_id,))
return {d for d, in txn}

return await self.db_pool.runInteraction(
"get_current_hosts_in_room", get_current_hosts_in_room_txn
)

@cached(iterable=True, max_entries=10000)
async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
"""
Get current hosts in room based on current state.

The heuristic of sorting by servers who have been in the room the
longest is good because they're most likely to have anything we ask
about.

For SQLite the returned list is not ordered (for performance reasons).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a surprise given the function name!

Are we really doing this for performance reasons? Isn't it because the regex function thingy doesn't exist on SQLite?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Err, yes


Uses `m.room.member`s in the room state at the current forward extremities to
determine which hosts are in the room.

Expand All @@ -952,35 +975,23 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
sorted by join with the lowest depth first).
"""

# First we check if we already have `get_users_in_room` in the cache, as
# we can just calculate result from that
users = self.get_users_in_room.cache.get_immediate(
(room_id,), None, update_metrics=False
)
if users is None and isinstance(self.database_engine, Sqlite3Engine):
if isinstance(self.database_engine, Sqlite3Engine):
# If we're using SQLite then let's just always use
# `get_users_in_room` rather than funky SQL.
users = await self.get_users_in_room(room_id)

if users is not None:
# Because `users` is sorted from lowest -> highest depth, the list
# of domains will also be sorted that way.
domains: List[str] = []
# We use a `Set` just for fast lookups
domain_set: Set[str] = set()
domains: Set[str] = set()
for u in users:
if ":" not in u:
continue
domain = get_domain_from_id(u)
if domain not in domain_set:
domain_set.add(domain)
domains.append(domain)
return domains
domains.add(domain)
return list(domains)
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

# For PostgreSQL we can use a regex to pull out the domains from the
# joined users in `current_state_events` via regex.

def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]:
def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]:
# Returns a list of servers currently joined in the room sorted by
# longest in the room first (aka. with the lowest depth). The
# heuristic of sorting by servers who have been in the room the
Expand Down Expand Up @@ -1008,7 +1019,7 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]:
return [d for d, in txn if d is not None]

return await self.db_pool.runInteraction(
"get_current_hosts_in_room", get_current_hosts_in_room_txn
"get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn
)

async def get_joined_hosts(
Expand Down