3838from unpaddedbase64 import decode_base64
3939
4040from synapse import event_auth
41- from synapse .api .constants import EventContentFields , EventTypes , Membership
41+ from synapse .api .constants import MAX_DEPTH , EventContentFields , EventTypes , Membership
4242from synapse .api .errors import (
4343 AuthError ,
4444 CodeMessageException ,
@@ -211,7 +211,7 @@ async def _maybe_backfill_inner(
211211 current_depth : int ,
212212 limit : int ,
213213 * ,
214- processing_start_time : int ,
214+ processing_start_time : Optional [ int ] ,
215215 ) -> bool :
216216 """
217217 Checks whether the `current_depth` is at or approaching any backfill
@@ -223,20 +223,36 @@ async def _maybe_backfill_inner(
223223 room_id: The room to backfill in.
224224 current_depth: The depth to check at for any upcoming backfill points.
225225 limit: The max number of events to request from the remote federated server.
226- processing_start_time: The time when `maybe_backfill` started
227- processing. Only used for timing.
226+ processing_start_time: The time when `maybe_backfill` started processing.
227+ Only used for timing. If `None`, no timing observation will be made .
228228 """
229229 backwards_extremities = [
230230 _BackfillPoint (event_id , depth , _BackfillPointType .BACKWARDS_EXTREMITY )
231- for event_id , depth in await self .store .get_backfill_points_in_room (room_id )
231+ for event_id , depth in await self .store .get_backfill_points_in_room (
232+ room_id = room_id ,
233+ current_depth = current_depth ,
234+ # We only need to end up with 5 extremities combined with the
235+ # insertion event extremities to make the `/backfill` request
236+ # but fetch an order of magnitude more to make sure there is
237+ # enough even after we filter them by whether visible in the
238+ # history. This isn't fool-proof as all backfill points within
239+ # our limit could be filtered out but seems like a good amount
240+ # to try with at least.
241+ limit = 50 ,
242+ )
232243 ]
233244
234245 insertion_events_to_be_backfilled : List [_BackfillPoint ] = []
235246 if self .hs .config .experimental .msc2716_enabled :
236247 insertion_events_to_be_backfilled = [
237248 _BackfillPoint (event_id , depth , _BackfillPointType .INSERTION_PONT )
238249 for event_id , depth in await self .store .get_insertion_event_backward_extremities_in_room (
239- room_id
250+ room_id = room_id ,
251+ current_depth = current_depth ,
252+ # We only need to end up with 5 extremities combined with
253+ # the backfill points to make the `/backfill` request ...
254+ # (see the other comment above for more context).
255+ limit = 50 ,
240256 )
241257 ]
242258 logger .debug (
@@ -245,10 +261,6 @@ async def _maybe_backfill_inner(
245261 insertion_events_to_be_backfilled ,
246262 )
247263
248- if not backwards_extremities and not insertion_events_to_be_backfilled :
249- logger .debug ("Not backfilling as no extremeties found." )
250- return False
251-
252264 # we now have a list of potential places to backpaginate from. We prefer to
253265 # start with the most recent (ie, max depth), so let's sort the list.
254266 sorted_backfill_points : List [_BackfillPoint ] = sorted (
@@ -269,6 +281,33 @@ async def _maybe_backfill_inner(
269281 sorted_backfill_points ,
270282 )
271283
284+ # If we have no backfill points lower than the `current_depth` then
285+ # either we can a) bail or b) still attempt to backfill. We opt to try
286+ # backfilling anyway just in case we do get relevant events.
287+ if not sorted_backfill_points and current_depth != MAX_DEPTH :
288+ logger .debug (
289+ "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
290+ )
291+ return await self ._maybe_backfill_inner (
292+ room_id = room_id ,
293+ # We use `MAX_DEPTH` so that we find all backfill points next
294+ # time (all events are below the `MAX_DEPTH`)
295+ current_depth = MAX_DEPTH ,
296+ limit = limit ,
297+ # We don't want to start another timing observation from this
298+ # nested recursive call. The top-most call can record the time
299+ # overall otherwise the smaller one will throw off the results.
300+ processing_start_time = None ,
301+ )
302+
303+ # Even after recursing with `MAX_DEPTH`, we didn't find any
304+ # backward extremities to backfill from.
305+ if not sorted_backfill_points :
306+ logger .debug (
307+ "_maybe_backfill_inner: Not backfilling as no backward extremeties found."
308+ )
309+ return False
310+
272311 # If we're approaching an extremity we trigger a backfill, otherwise we
273312 # no-op.
274313 #
@@ -278,47 +317,16 @@ async def _maybe_backfill_inner(
278317 # chose more than one times the limit in case of failure, but choosing a
279318 # much larger factor will result in triggering a backfill request much
280319 # earlier than necessary.
281- #
282- # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
283- # care about events that have happened after our current position.
284- #
285- max_depth = sorted_backfill_points [0 ].depth
286- if current_depth - 2 * limit > max_depth :
320+ max_depth_of_backfill_points = sorted_backfill_points [0 ].depth
321+ if current_depth - 2 * limit > max_depth_of_backfill_points :
287322 logger .debug (
288323 "Not backfilling as we don't need to. %d < %d - 2 * %d" ,
289- max_depth ,
324+ max_depth_of_backfill_points ,
290325 current_depth ,
291326 limit ,
292327 )
293328 return False
294329
295- # We ignore extremities that have a greater depth than our current depth
296- # as:
297- # 1. we don't really care about getting events that have happened
298- # after our current position; and
299- # 2. we have likely previously tried and failed to backfill from that
300- # extremity, so to avoid getting "stuck" requesting the same
301- # backfill repeatedly we drop those extremities.
302- #
303- # However, we need to check that the filtered extremities are non-empty.
304- # If they are empty then either we can a) bail or b) still attempt to
305- # backfill. We opt to try backfilling anyway just in case we do get
306- # relevant events.
307- #
308- filtered_sorted_backfill_points = [
309- t for t in sorted_backfill_points if t .depth <= current_depth
310- ]
311- if filtered_sorted_backfill_points :
312- logger .debug (
313- "_maybe_backfill_inner: backfill points before current depth: %s" ,
314- filtered_sorted_backfill_points ,
315- )
316- sorted_backfill_points = filtered_sorted_backfill_points
317- else :
318- logger .debug (
319- "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
320- )
321-
322330 # For performance's sake, we only want to paginate from a particular extremity
323331 # if we can actually see the events we'll get. Otherwise, we'd just spend a lot
324332 # of resources to get redacted events. We check each extremity in turn and
@@ -452,10 +460,15 @@ async def try_backfill(domains: Collection[str]) -> bool:
452460
453461 return False
454462
455- processing_end_time = self .clock .time_msec ()
456- backfill_processing_before_timer .observe (
457- (processing_end_time - processing_start_time ) / 1000
458- )
463+ # If we have the `processing_start_time`, then we can make an
464+ # observation. We wouldn't have the `processing_start_time` in the case
465+ # where `_maybe_backfill_inner` is recursively called to find any
466+ # backfill points regardless of `current_depth`.
467+ if processing_start_time is not None :
468+ processing_end_time = self .clock .time_msec ()
469+ backfill_processing_before_timer .observe (
470+ (processing_end_time - processing_start_time ) / 1000
471+ )
459472
460473 success = await try_backfill (likely_domains )
461474 if success :
0 commit comments