From 5d92a1428ceb4077801afc1785a5472e89fd9df3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:54:24 -0700 Subject: [PATCH] Prevent join->join membership transitions changing member count (#7977) `StatsHandler` handles updates to the `current_state_delta_stream`, and updates room stats such as the amount of state events, joined users, etc. However, it counts every new join membership as a new user entering a room (and that user being in another room), whereas it's possible for a user's membership status to go from join -> join, for instance when they change their per-room profile information. This PR adds a check for join->join membership transitions, and bails out early, as none of the further checks are necessary at that point. Due to this bug, membership stats in many rooms have ended up being wildly larger than their true values. I am not sure if we also want to include a migration step which recalculates these statistics (possibly using the `_populate_stats_process_rooms` bg update). Bug introduced in the initial implementation https://github.com/matrix-org/synapse/pull/4338. --- changelog.d/7977.bugfix | 1 + synapse/handlers/stats.py | 2 +- .../main/schema/delta/58/12room_stats.sql | 32 +++++++++++++ synapse/storage/data_stores/main/stats.py | 34 ++++++++++++-- tests/handlers/test_stats.py | 46 ++++++++++++++++--- tests/rest/client/v1/utils.py | 24 +++++++++- 6 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 changelog.d/7977.bugfix create mode 100644 synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql diff --git a/changelog.d/7977.bugfix b/changelog.d/7977.bugfix new file mode 100644 index 000000000000..c587f1305567 --- /dev/null +++ b/changelog.d/7977.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse v1.7.2 which caused inaccurate membership counts in the room directory. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 149f861239da..249ffe2a55c8 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -232,7 +232,7 @@ async def _handle_deltas(self, deltas): if membership == prev_membership: pass # noop - if membership == Membership.JOIN: + elif membership == Membership.JOIN: room_stats_delta["joined_members"] += 1 elif membership == Membership.INVITE: room_stats_delta["invited_members"] += 1 diff --git a/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql new file mode 100644 index 000000000000..cade5dcca806 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql @@ -0,0 +1,32 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Recalculate the stats for all rooms after the fix to joined_members erroneously +-- incrementing on per-room profile changes. + +-- Note that the populate_stats_process_rooms background update is already set to +-- run if you're upgrading from Synapse <1.0.0. + +-- Additionally, if you've upgraded to v1.18.0 (which doesn't include this fix), +-- this bg job runs, and then update to v1.19.0, you'd end up with only half of +-- your rooms having room stats recalculated after this fix was in place. + +-- So we've switched the old `populate_stats_process_rooms` background job to a +-- no-op, and then kick off a bg job with a new name, but with the same +-- functionality as the old one. This effectively restarts the background job +-- from the beginning, without running it twice in a row, supporting both +-- upgrade usecases. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_stats_process_rooms_2', '{}'); diff --git a/synapse/storage/data_stores/main/stats.py b/synapse/storage/data_stores/main/stats.py index 922400a7c3be..40db8f594eaa 100644 --- a/synapse/storage/data_stores/main/stats.py +++ b/synapse/storage/data_stores/main/stats.py @@ -72,6 +72,9 @@ def __init__(self, database: Database, db_conn, hs): self.db.updates.register_background_update_handler( "populate_stats_process_rooms", self._populate_stats_process_rooms ) + self.db.updates.register_background_update_handler( + "populate_stats_process_rooms_2", self._populate_stats_process_rooms_2 + ) self.db.updates.register_background_update_handler( "populate_stats_process_users", self._populate_stats_process_users ) @@ -140,11 +143,30 @@ def _get_next_batch(txn): return len(users_to_work_on) async def _populate_stats_process_rooms(self, progress, batch_size): + """ + This was a background update which regenerated statistics for rooms. + + It has been replaced by StatsStore._populate_stats_process_rooms_2. This background + job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure + someone upgrading from None: + """ + Send a membership state event into a room. + + Args: + room: The ID of the room to send to + src: The mxid of the event sender + targ: The mxid of the event's target. The state key + membership: The type of membership event + extra_data: Extra information to include in the content of the event + tok: The user access token to use + expect_code: The expected HTTP response code + """ temp_id = self.auth_user_id self.auth_user_id = src @@ -97,6 +118,7 @@ def change_membership(self, room, src, targ, membership, tok=None, expect_code=2 path = path + "?access_token=%s" % tok data = {"membership": membership} + data.update(extra_data) request, channel = make_request( self.hs.get_reactor(), "PUT", path, json.dumps(data).encode("utf8")