Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 4f00432

Browse files
David Robertsonrichvdh
andauthored
Fix potential leak of per-room profiles when the user dir is rebuilt. (#10981)
There are two steps to rebuilding the user directory: 1. a scan over rooms, followed by 2. a scan over local users. The former reads avatars and display names from the `room_memberships` table and therefore contains potentially private avatars and display names. The latter reads from the the `profiles` table which only contains public data; moreover it will overwrite any private profiles that the rooms scan may have written to the user directory. This means that the rebuild could leak private user while the rebuild was in progress, only to later cover up the leaks once the rebuild had completed. This change skips over local users when writing user_directory rows when scanning rooms. Doing so means that it'll take longer for a rebuild to make local users searchable, which is unfortunate. I think a future PR can improve this by swapping the order of the two steps above. (And indeed there's more to do here, e.g. copying from `profiles` without going via Python.) Small tidy-ups while I'm here: * Remove duplicated code from test_initial. This was meant to be pulled into `purge_and_rebuild_user_dir`. * Move `is_public` before updating sharing tables. No functional change; it's still before the first read of `is_public`. * Don't bother creating a set from dict keys. Slightly nicer and makes the code simpler. Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
1 parent 392863f commit 4f00432

File tree

3 files changed

+99
-29
lines changed

3 files changed

+99
-29
lines changed

changelog.d/10981.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug that could leak local users' per-room nicknames and avatars when the user directory is rebuilt.

synapse/storage/databases/main/user_directory.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -228,10 +228,6 @@ def _get_next_batch(
228228
is_in_room = await self.is_host_joined(room_id, self.server_name)
229229

230230
if is_in_room:
231-
is_public = await self.is_room_world_readable_or_publicly_joinable(
232-
room_id
233-
)
234-
235231
users_with_profile = await self.get_users_in_room_with_profiles(room_id)
236232
# Throw away users excluded from the directory.
237233
users_with_profile = {
@@ -241,22 +237,33 @@ def _get_next_batch(
241237
or await self.should_include_local_user_in_dir(user_id)
242238
}
243239

244-
# Update each user in the user directory.
240+
# Upsert a user_directory record for each remote user we see.
245241
for user_id, profile in users_with_profile.items():
242+
# Local users are processed separately in
243+
# `_populate_user_directory_users`; there we can read from
244+
# the `profiles` table to ensure we don't leak their per-room
245+
# profiles. It also means we write local users to this table
246+
# exactly once, rather than once for every room they're in.
247+
if self.hs.is_mine_id(user_id):
248+
continue
249+
# TODO `users_with_profile` above reads from the `user_directory`
250+
# table, meaning that `profile` is bespoke to this room.
251+
# and this leaks remote users' per-room profiles to the user directory.
246252
await self.update_profile_in_user_dir(
247253
user_id, profile.display_name, profile.avatar_url
248254
)
249255

250-
to_insert = set()
251-
256+
# Now update the room sharing tables to include this room.
257+
is_public = await self.is_room_world_readable_or_publicly_joinable(
258+
room_id
259+
)
252260
if is_public:
253-
for user_id in users_with_profile:
254-
to_insert.add(user_id)
255-
256-
if to_insert:
257-
await self.add_users_in_public_rooms(room_id, to_insert)
258-
to_insert.clear()
261+
if users_with_profile:
262+
await self.add_users_in_public_rooms(
263+
room_id, users_with_profile.keys()
264+
)
259265
else:
266+
to_insert = set()
260267
for user_id in users_with_profile:
261268
# We want the set of pairs (L, M) where L and M are
262269
# in `users_with_profile` and L is local.

tests/storage/test_user_directory.py

Lines changed: 78 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,19 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from typing import Dict, List, Set, Tuple
14+
from typing import Any, Dict, List, Set, Tuple
15+
from unittest import mock
1516
from unittest.mock import Mock, patch
1617

1718
from twisted.test.proto_helpers import MemoryReactor
1819

19-
from synapse.api.constants import UserTypes
20+
from synapse.api.constants import EventTypes, Membership, UserTypes
2021
from synapse.appservice import ApplicationService
2122
from synapse.rest import admin
2223
from synapse.rest.client import login, register, room
2324
from synapse.server import HomeServer
2425
from synapse.storage import DataStore
26+
from synapse.storage.roommember import ProfileInfo
2527
from synapse.util import Clock
2628

2729
from tests.test_utils.event_injection import inject_member_event
@@ -52,6 +54,11 @@ def _compress_shared(
5254
return r
5355

5456
async def get_users_in_public_rooms(self) -> List[Tuple[str, str]]:
57+
"""Fetch the entire `users_in_public_rooms` table.
58+
59+
Returns a list of tuples (user_id, room_id) where room_id is public and
60+
contains the user with the given id.
61+
"""
5562
r = await self.store.db_pool.simple_select_list(
5663
"users_in_public_rooms", None, ("user_id", "room_id")
5764
)
@@ -62,20 +69,50 @@ async def get_users_in_public_rooms(self) -> List[Tuple[str, str]]:
6269
return retval
6370

6471
async def get_users_who_share_private_rooms(self) -> List[Dict[str, str]]:
72+
"""Fetch the entire `users_who_share_private_rooms` table.
73+
74+
Returns a dict containing "user_id", "other_user_id" and "room_id" keys.
75+
The dicts can be flattened to Tuples with the `_compress_shared` method.
76+
(This seems a little awkward---maybe we could clean this up.)
77+
"""
78+
6579
return await self.store.db_pool.simple_select_list(
6680
"users_who_share_private_rooms",
6781
None,
6882
["user_id", "other_user_id", "room_id"],
6983
)
7084

7185
async def get_users_in_user_directory(self) -> Set[str]:
86+
"""Fetch the set of users in the `user_directory` table.
87+
88+
This is useful when checking we've correctly excluded users from the directory.
89+
"""
7290
result = await self.store.db_pool.simple_select_list(
7391
"user_directory",
7492
None,
7593
["user_id"],
7694
)
7795
return {row["user_id"] for row in result}
7896

97+
async def get_profiles_in_user_directory(self) -> Dict[str, ProfileInfo]:
98+
"""Fetch users and their profiles from the `user_directory` table.
99+
100+
This is useful when we want to inspect display names and avatars.
101+
It's almost the entire contents of the `user_directory` table: the only
102+
thing missing is an unused room_id column.
103+
"""
104+
rows = await self.store.db_pool.simple_select_list(
105+
"user_directory",
106+
None,
107+
("user_id", "display_name", "avatar_url"),
108+
)
109+
return {
110+
row["user_id"]: ProfileInfo(
111+
display_name=row["display_name"], avatar_url=row["avatar_url"]
112+
)
113+
for row in rows
114+
}
115+
79116

80117
class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
81118
"""Ensure that rebuilding the directory writes the correct data to the DB.
@@ -201,20 +238,6 @@ def test_initial(self) -> None:
201238
self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token)
202239
self.helper.join(private_room, user=u3, tok=u3_token)
203240

204-
self.get_success(self.store.update_user_directory_stream_pos(None))
205-
self.get_success(self.store.delete_all_from_user_dir())
206-
207-
shares_private = self.get_success(
208-
self.user_dir_helper.get_users_who_share_private_rooms()
209-
)
210-
public_users = self.get_success(
211-
self.user_dir_helper.get_users_in_public_rooms()
212-
)
213-
214-
# Nothing updated yet
215-
self.assertEqual(shares_private, [])
216-
self.assertEqual(public_users, [])
217-
218241
# Do the initial population of the user directory via the background update
219242
self._purge_and_rebuild_user_dir()
220243

@@ -346,6 +369,45 @@ def test_population_excludes_appservice_user(self) -> None:
346369
# Check the AS user is not in the directory.
347370
self._check_room_sharing_tables(user, public, private)
348371

372+
def test_population_conceals_private_nickname(self) -> None:
373+
# Make a private room, and set a nickname within
374+
user = self.register_user("aaaa", "pass")
375+
user_token = self.login(user, "pass")
376+
private_room = self.helper.create_room_as(user, is_public=False, tok=user_token)
377+
self.helper.send_state(
378+
private_room,
379+
EventTypes.Member,
380+
state_key=user,
381+
body={"membership": Membership.JOIN, "displayname": "BBBB"},
382+
tok=user_token,
383+
)
384+
385+
# Rebuild the user directory. Make the rescan of the `users` table a no-op
386+
# so we only see the effect of scanning the `room_memberships` table.
387+
async def mocked_process_users(*args: Any, **kwargs: Any) -> int:
388+
await self.store.db_pool.updates._end_background_update(
389+
"populate_user_directory_process_users"
390+
)
391+
return 1
392+
393+
with mock.patch.dict(
394+
self.store.db_pool.updates._background_update_handlers,
395+
populate_user_directory_process_users=mocked_process_users,
396+
):
397+
self._purge_and_rebuild_user_dir()
398+
399+
# Local users are ignored by the scan over rooms
400+
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
401+
self.assertEqual(users, {})
402+
403+
# Do a full rebuild including the scan over the `users` table. The local
404+
# user should appear with their profile name.
405+
self._purge_and_rebuild_user_dir()
406+
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
407+
self.assertEqual(
408+
users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)}
409+
)
410+
349411

350412
class UserDirectoryStoreTestCase(HomeserverTestCase):
351413
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:

0 commit comments

Comments
 (0)