-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Stop getting missing prev_events
after we already know their signature is invalid
#13816
Changes from 30 commits
924ae2b
d240aeb
cfb4e88
9b390a3
3d8507d
88a75cf
d29ac0b
14e39ee
7898371
43f1d1a
e32b901
83feb1b
7d102e8
81410b6
958fd3b
e24db41
f853e78
43fb6b8
03f23b7
99d3e79
f11f5b5
6878faa
f3b443d
d135d41
4effca9
e3cc054
74f9e03
0b900e1
3cb2826
5f313df
02e6bdd
7f4fdd2
89ffbcb
354f682
e0b0447
e72c4e5
4e08039
b060652
b1a0c1b
bccd802
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Stop fetching missing `prev_events` after we already know their signature is invalid. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1501,6 +1501,12 @@ async def record_event_failed_pull_attempt( | |
event_id: The event that failed to be fetched or processed | ||
cause: The error message or reason that we failed to pull the event | ||
""" | ||
logger.debug( | ||
"record_event_failed_pull_attempt room_id=%s, event_id=%s, cause=%s", | ||
room_id, | ||
event_id, | ||
cause, | ||
) | ||
await self.db_pool.runInteraction( | ||
"record_event_failed_pull_attempt", | ||
self._record_event_failed_pull_attempt_upsert_txn, | ||
|
@@ -1530,6 +1536,78 @@ def _record_event_failed_pull_attempt_upsert_txn( | |
|
||
txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause)) | ||
|
||
@trace | ||
async def filter_event_ids_with_pull_attempt_backoff( | ||
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self, | ||
room_id: str, | ||
event_ids: Collection[str], | ||
) -> List[str]: | ||
""" | ||
Filter down the events to ones that we've failed to pull before recently. Uses | ||
exponential backoff. | ||
|
||
Args: | ||
room_id: The room that the events belong to | ||
event_ids: A list of events to filter down | ||
|
||
Returns: | ||
List of event_ids that should not be attempted to be pulled | ||
""" | ||
|
||
def _filter_event_ids_with_pull_attempt_backoff_txn( | ||
txn: LoggingTransaction, | ||
) -> List[str]: | ||
where_event_ids_match_clause, values = make_in_list_sql_clause( | ||
txn.database_engine, "event_id", event_ids | ||
) | ||
|
||
if isinstance(self.database_engine, PostgresEngine): | ||
least_function = "least" | ||
elif isinstance(self.database_engine, Sqlite3Engine): | ||
least_function = "min" | ||
else: | ||
raise RuntimeError("Unknown database engine") | ||
|
||
sql = f""" | ||
SELECT event_id FROM event_failed_pull_attempts | ||
WHERE | ||
room_id = ? | ||
AND {where_event_ids_match_clause} | ||
/** | ||
* Exponential back-off (up to the upper bound) so we don't try to | ||
* pull the same event over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. | ||
* | ||
* We use `1 << n` as a power of 2 equivalent for compatibility | ||
* with older SQLites. The left shift equivalent only works with | ||
* powers of 2 because left shift is a binary operation (base-2). | ||
* Otherwise, we would use `power(2, n)` or the power operator, `2^n`. | ||
*/ | ||
AND ? /* current_time */ < last_attempt_ts + ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It occurs to me that these inline SQL comments get sent over the wire to postgres, which is probably not totally ideal. Might be worth moving the big ones out? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, TBH I'm wondering whether for this it would be clearer to just fetch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think this makes a lot of sense here since we have to get a response out for each
In terms of other places to worry about this: we already strip out the newlines which is why I use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I was thinking about that too, though we'd want to make sure that we do it safely (I'm not sure if just stripping Note that we don't actually send the one-line SQL to the database, we use it for logging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created #14192 to continue this comments in SQL conversation ⏩
huh, I swore this was a problem before. Good to know! |
||
(1 << {least_function}(num_attempts, ? /* max doubling steps */)) | ||
* ? /* step */ | ||
) | ||
""" | ||
|
||
txn.execute( | ||
sql, | ||
( | ||
room_id, | ||
*values, | ||
self._clock.time_msec(), | ||
BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS, | ||
BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS, | ||
), | ||
) | ||
|
||
event_ids_to_ignore_result = cast(List[Tuple[str]], txn.fetchall()) | ||
|
||
return [event_id for event_id, in event_ids_to_ignore_result] | ||
|
||
return await self.db_pool.runInteraction( | ||
"filter_event_ids_with_pull_attempt_backoff_txn", | ||
_filter_event_ids_with_pull_attempt_backoff_txn, | ||
) | ||
|
||
async def get_missing_events( | ||
self, | ||
room_id: str, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure it makes sense for this to inherit from
SynapseError
. The risk is that this manages to bubble all the way to the client API, returning a 403, which seems entirely wrong.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense 👍. The docstrings for
SynapseError
andFederationError
could use some improving (would do in separate PR so we don't hang up on that).We probably want to also update
FederationDeniedError
in a separate PR as well?synapse/synapse/api/errors.py
Line 251 in e6e876b
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Created #14190 and #14191 to track these follow-up updates to explain those error classes better ⏩