Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix potential leak of per-room profiles when the user dir is rebuilt. #10981

Merged
merged 9 commits into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion synapse/storage/databases/main/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def _get_next_batch(
room_id
)

# TODO this leaks per-room profiles!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be in conflict with the newsfile 🤔

I think I don't really understand what the TODO means. How does it leak them?

Perhaps it's ok if you're going to be updating this soon, but otherwise a few more words in this comment might be helpful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll expand the comment. For completeness here:

get_users_in_room_with_profiles reads from the room_memberships table. That contains a display_name and avatar_url specific to this room (it doesn't read from the profiles table). With my change, we no longer upsert user_directory and user_directory_search rows for local users in this function, but we do continue to do so for remote users.

I meant the TODO as a marker to track where the data we don't want to leak comes from.

users_with_profile = await self.get_users_in_room_with_profiles(room_id)
# Throw away users excluded from the directory.
users_with_profile = {
Expand All @@ -241,8 +242,11 @@ def _get_next_batch(
or await self.should_include_local_user_in_dir(user_id)
}

# Update each user in the user directory.
# Upsert a user_directory record for each remote user we see.
# (Local users are processed in `_populate_user_directory_users`.)
for user_id, profile in users_with_profile.items():
if self.hs.is_mine_id(user_id):
continue
Comment on lines +247 to +248
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be worth adding a comment here to explain why it's important to skip local users here.

await self.update_profile_in_user_dir(
user_id, profile.display_name, profile.avatar_url
)
Expand Down
58 changes: 56 additions & 2 deletions tests/storage/test_user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, List, Set, Tuple
from typing import Any, Dict, List, Set, Tuple
from unittest import mock
from unittest.mock import Mock, patch

from twisted.test.proto_helpers import MemoryReactor

from synapse.api.constants import UserTypes
from synapse.api.constants import EventTypes, Membership, UserTypes
from synapse.appservice import ApplicationService
from synapse.rest import admin
from synapse.rest.client import login, register, room
from synapse.server import HomeServer
from synapse.storage import DataStore
from synapse.storage.roommember import ProfileInfo
from synapse.util import Clock

from tests.test_utils.event_injection import inject_member_event
Expand Down Expand Up @@ -76,6 +78,19 @@ async def get_users_in_user_directory(self) -> Set[str]:
)
return {row["user_id"] for row in result}

async def get_profiles_in_user_directory(self) -> Dict[str, ProfileInfo]:
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved
rows = await self.store.db_pool.simple_select_list(
"user_directory",
None,
("user_id", "display_name", "avatar_url"),
)
return {
row["user_id"]: ProfileInfo(
display_name=row["display_name"], avatar_url=row["avatar_url"]
)
for row in rows
}


class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
"""Ensure that rebuilding the directory writes the correct data to the DB.
Expand Down Expand Up @@ -346,6 +361,45 @@ def test_population_excludes_appservice_user(self) -> None:
# Check the AS user is not in the directory.
self._check_room_sharing_tables(user, public, private)

def test_population_conceals_private_nickname(self) -> None:
# Make a private room, and set a nickname within
user = self.register_user("aaaa", "pass")
user_token = self.login(user, "pass")
private_room = self.helper.create_room_as(user, is_public=False, tok=user_token)
self.helper.send_state(
private_room,
EventTypes.Member,
state_key=user,
body={"membership": Membership.JOIN, "displayname": "BBBB"},
tok=user_token,
)

# Rebuild the user directory. Make the rescan of the `users` table a no-op
# so we only see the effect of scanning the `room_memberships` table.
async def mocked_process_users(*args: Any, **kwargs: Any) -> int:
await self.store.db_pool.updates._end_background_update(
"populate_user_directory_process_users"
)
return 1

with mock.patch.dict(
self.store.db_pool.updates._background_update_handlers,
populate_user_directory_process_users=mocked_process_users,
):
self._purge_and_rebuild_user_dir()

# Local users are ignored by the scan over rooms
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
self.assertEqual(users, {})

# Do a full rebuild including the scan over the `users` table. The local
# user should appear with their profile name.
self._purge_and_rebuild_user_dir()
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
self.assertEqual(
users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)}
)


class UserDirectoryStoreTestCase(HomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
Expand Down