This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix performance regression in get_users_in_room
#13972
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
db73726
Fix performance regression in `get_users_in_room`
erikjohnston 1f400a5
Newsfile
erikjohnston 0dca91d
Fix query
erikjohnston 586ceae
Update changelog.d/13972.bugfix
erikjohnston 99d4bdc
Simplify approximation
erikjohnston 01fd9bc
Review comments
erikjohnston File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fix a performance regression in the `get_users_in_room` database query. Introduced in v1.67.0. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,42 +146,37 @@ def _transact(txn: LoggingTransaction) -> int: | |
|
||
@cached(max_entries=100000, iterable=True) | ||
async def get_users_in_room(self, room_id: str) -> List[str]: | ||
""" | ||
Returns a list of users in the room sorted by longest in the room first | ||
(aka. with the lowest depth). This is done to match the sort in | ||
`get_current_hosts_in_room()` and so we can re-use the cache but it's | ||
not horrible to have here either. | ||
|
||
Uses `m.room.member`s in the room state at the current forward extremities to | ||
determine which users are in the room. | ||
squahtx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Returns a list of users in the room. | ||
|
||
Will return inaccurate results for rooms with partial state, since the state for | ||
the forward extremities of those rooms will exclude most members. We may also | ||
calculate room state incorrectly for such rooms and believe that a member is or | ||
is not in the room when the opposite is true. | ||
""" | ||
return await self.db_pool.runInteraction( | ||
"get_users_in_room", self.get_users_in_room_txn, room_id | ||
return await self.db_pool.simple_select_onecol( | ||
table="current_state_events", | ||
keyvalues={ | ||
"type": EventTypes.Member, | ||
"room_id": room_id, | ||
"membership": Membership.JOIN, | ||
}, | ||
retcol="state_key", | ||
desc="get_users_in_room", | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've made this not go through |
||
|
||
def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: | ||
""" | ||
Returns a list of users in the room sorted by longest in the room first | ||
(aka. with the lowest depth). This is done to match the sort in | ||
`get_current_hosts_in_room()` and so we can re-use the cache but it's | ||
not horrible to have here either. | ||
""" | ||
sql = """ | ||
SELECT c.state_key FROM current_state_events as c | ||
/* Get the depth of the event from the events table */ | ||
INNER JOIN events AS e USING (event_id) | ||
WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? | ||
/* Sorted by lowest depth first */ | ||
ORDER BY e.depth ASC; | ||
""" | ||
"""Returns a list of users in the room.""" | ||
|
||
txn.execute(sql, (room_id, Membership.JOIN)) | ||
return [r[0] for r in txn] | ||
return self.db_pool.simple_select_onecol_txn( | ||
txn, | ||
table="current_state_events", | ||
keyvalues={ | ||
"type": EventTypes.Member, | ||
"room_id": room_id, | ||
"membership": Membership.JOIN, | ||
}, | ||
retcol="state_key", | ||
) | ||
|
||
@cached() | ||
def get_user_in_room_with_profile( | ||
|
@@ -931,56 +926,78 @@ async def _check_host_room_membership( | |
return True | ||
|
||
@cached(iterable=True, max_entries=10000) | ||
async def get_current_hosts_in_room(self, room_id: str) -> List[str]: | ||
async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: | ||
"""Get current hosts in room based on current state.""" | ||
|
||
# First we check if we already have `get_users_in_room` in the cache, as | ||
# we can just calculate result from that | ||
users = self.get_users_in_room.cache.get_immediate( | ||
(room_id,), None, update_metrics=False | ||
) | ||
if users is not None: | ||
return {get_domain_from_id(u) for u in users} | ||
|
||
if isinstance(self.database_engine, Sqlite3Engine): | ||
# If we're using SQLite then let's just always use | ||
# `get_users_in_room` rather than funky SQL. | ||
users = await self.get_users_in_room(room_id) | ||
return {get_domain_from_id(u) for u in users} | ||
|
||
# For PostgreSQL we can use a regex to pull out the domains from the | ||
# joined users in `current_state_events` via regex. | ||
|
||
def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: | ||
sql = """ | ||
SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$') | ||
FROM current_state_events | ||
WHERE | ||
type = 'm.room.member' | ||
AND membership = 'join' | ||
AND room_id = ? | ||
""" | ||
txn.execute(sql, (room_id,)) | ||
return {d for d, in txn} | ||
|
||
return await self.db_pool.runInteraction( | ||
"get_current_hosts_in_room", get_current_hosts_in_room_txn | ||
) | ||
|
||
@cached(iterable=True, max_entries=10000) | ||
async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: | ||
""" | ||
Get current hosts in room based on current state. | ||
|
||
The heuristic of sorting by servers who have been in the room the | ||
longest is good because they're most likely to have anything we ask | ||
about. | ||
|
||
Uses `m.room.member`s in the room state at the current forward extremities to | ||
determine which hosts are in the room. | ||
For SQLite the returned list is not ordered, as SQLite doesn't support | ||
the appropriate SQL. | ||
|
||
Will return inaccurate results for rooms with partial state, since the state for | ||
the forward extremities of those rooms will exclude most members. We may also | ||
calculate room state incorrectly for such rooms and believe that a host is or | ||
is not in the room when the opposite is true. | ||
Uses `m.room.member`s in the room state at the current forward | ||
extremities to determine which hosts are in the room. | ||
|
||
Will return inaccurate results for rooms with partial state, since the | ||
state for the forward extremities of those rooms will exclude most | ||
members. We may also calculate room state incorrectly for such rooms and | ||
believe that a host is or is not in the room when the opposite is true. | ||
|
||
Returns: | ||
Returns a list of servers sorted by longest in the room first. (aka. | ||
sorted by join with the lowest depth first). | ||
""" | ||
|
||
# First we check if we already have `get_users_in_room` in the cache, as | ||
# we can just calculate result from that | ||
users = self.get_users_in_room.cache.get_immediate( | ||
(room_id,), None, update_metrics=False | ||
) | ||
if users is None and isinstance(self.database_engine, Sqlite3Engine): | ||
if isinstance(self.database_engine, Sqlite3Engine): | ||
# If we're using SQLite then let's just always use | ||
# `get_users_in_room` rather than funky SQL. | ||
users = await self.get_users_in_room(room_id) | ||
|
||
if users is not None: | ||
# Because `users` is sorted from lowest -> highest depth, the list | ||
# of domains will also be sorted that way. | ||
domains: List[str] = [] | ||
# We use a `Set` just for fast lookups | ||
domain_set: Set[str] = set() | ||
for u in users: | ||
if ":" not in u: | ||
continue | ||
domain = get_domain_from_id(u) | ||
if domain not in domain_set: | ||
domain_set.add(domain) | ||
domains.append(domain) | ||
return domains | ||
domains = await self.get_current_hosts_in_room(room_id) | ||
return list(domains) | ||
|
||
# For PostgreSQL we can use a regex to pull out the domains from the | ||
# joined users in `current_state_events` via regex. | ||
|
||
def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: | ||
def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]: | ||
# Returns a list of servers currently joined in the room sorted by | ||
# longest in the room first (aka. with the lowest depth). The | ||
# heuristic of sorting by servers who have been in the room the | ||
|
@@ -1008,7 +1025,7 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: | |
return [d for d, in txn if d is not None] | ||
|
||
return await self.db_pool.runInteraction( | ||
"get_current_hosts_in_room", get_current_hosts_in_room_txn | ||
"get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn | ||
) | ||
|
||
async def get_joined_hosts( | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should update this docstring describing the order