Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 33548f3

Browse files
authored
Improve tracing for to device messages (#9686)
1 parent bb0fe02 commit 33548f3

File tree

7 files changed

+102
-19
lines changed

7 files changed

+102
-19
lines changed

changelog.d/9686.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve Jaeger tracing for `to_device` messages.

synapse/federation/sender/per_destination_queue.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from synapse.events import EventBase
3030
from synapse.federation.units import Edu
3131
from synapse.handlers.presence import format_user_presence_state
32+
from synapse.logging.opentracing import SynapseTags, set_tag
3233
from synapse.metrics import sent_transactions_counter
3334
from synapse.metrics.background_process_metrics import run_as_background_process
3435
from synapse.types import ReadReceipt
@@ -557,6 +558,13 @@ async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int]
557558
contents, stream_id = await self._store.get_new_device_msgs_for_remote(
558559
self._destination, last_device_stream_id, to_device_stream_id, limit
559560
)
561+
for content in contents:
562+
message_id = content.get("message_id")
563+
if not message_id:
564+
continue
565+
566+
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
567+
560568
edus = [
561569
Edu(
562570
origin=self._server_name,

synapse/handlers/devicemessage.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
from synapse.api.ratelimiting import Ratelimiter
2222
from synapse.logging.context import run_in_background
2323
from synapse.logging.opentracing import (
24+
SynapseTags,
2425
get_active_span_text_map,
2526
log_kv,
2627
set_tag,
27-
start_active_span,
2828
)
2929
from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
3030
from synapse.types import JsonDict, Requester, UserID, get_domain_from_id
@@ -183,7 +183,10 @@ async def send_device_message(
183183
) -> None:
184184
sender_user_id = requester.user.to_string()
185185

186-
set_tag("number_of_messages", len(messages))
186+
message_id = random_string(16)
187+
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
188+
189+
log_kv({"number_of_to_device_messages": len(messages)})
187190
set_tag("sender", sender_user_id)
188191
local_messages = {}
189192
remote_messages = {} # type: Dict[str, Dict[str, Dict[str, JsonDict]]]
@@ -205,32 +208,35 @@ async def send_device_message(
205208
"content": message_content,
206209
"type": message_type,
207210
"sender": sender_user_id,
211+
"message_id": message_id,
208212
}
209213
for device_id, message_content in by_device.items()
210214
}
211215
if messages_by_device:
212216
local_messages[user_id] = messages_by_device
217+
log_kv(
218+
{
219+
"user_id": user_id,
220+
"device_id": list(messages_by_device),
221+
}
222+
)
213223
else:
214224
destination = get_domain_from_id(user_id)
215225
remote_messages.setdefault(destination, {})[user_id] = by_device
216226

217-
message_id = random_string(16)
218-
219227
context = get_active_span_text_map()
220228

221229
remote_edu_contents = {}
222230
for destination, messages in remote_messages.items():
223-
with start_active_span("to_device_for_user"):
224-
set_tag("destination", destination)
225-
remote_edu_contents[destination] = {
226-
"messages": messages,
227-
"sender": sender_user_id,
228-
"type": message_type,
229-
"message_id": message_id,
230-
"org.matrix.opentracing_context": json_encoder.encode(context),
231-
}
231+
log_kv({"destination": destination})
232+
remote_edu_contents[destination] = {
233+
"messages": messages,
234+
"sender": sender_user_id,
235+
"type": message_type,
236+
"message_id": message_id,
237+
"org.matrix.opentracing_context": json_encoder.encode(context),
238+
}
232239

233-
log_kv({"local_messages": local_messages})
234240
stream_id = await self.store.add_messages_to_device_inbox(
235241
local_messages, remote_edu_contents
236242
)
@@ -239,7 +245,6 @@ async def send_device_message(
239245
"to_device_key", stream_id, users=local_messages.keys()
240246
)
241247

242-
log_kv({"remote_messages": remote_messages})
243248
if self.federation_sender:
244249
for destination in remote_messages.keys():
245250
# Enqueue a new federation transaction to send the new

synapse/handlers/sync.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from synapse.api.filtering import FilterCollection
2525
from synapse.events import EventBase
2626
from synapse.logging.context import current_context
27+
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
2728
from synapse.push.clientformat import format_push_rules_for_user
2829
from synapse.storage.roommember import MemberSummary
2930
from synapse.storage.state import StateFilter
@@ -340,7 +341,14 @@ async def current_sync_for_user(
340341
full_state: bool = False,
341342
) -> SyncResult:
342343
"""Get the sync for client needed to match what the server has now."""
343-
return await self.generate_sync_result(sync_config, since_token, full_state)
344+
with start_active_span("current_sync_for_user"):
345+
log_kv({"since_token": since_token})
346+
sync_result = await self.generate_sync_result(
347+
sync_config, since_token, full_state
348+
)
349+
350+
set_tag(SynapseTags.SYNC_RESULT, bool(sync_result))
351+
return sync_result
344352

345353
async def push_rules_for_user(self, user: UserID) -> JsonDict:
346354
user_id = user.to_string()
@@ -964,6 +972,7 @@ async def generate_sync_result(
964972
# to query up to a given point.
965973
# Always use the `now_token` in `SyncResultBuilder`
966974
now_token = self.event_sources.get_current_token()
975+
log_kv({"now_token": now_token})
967976

968977
logger.debug(
969978
"Calculating sync response for %r between %s and %s",
@@ -1225,6 +1234,13 @@ async def _generate_sync_entry_for_to_device(
12251234
user_id, device_id, since_stream_id, now_token.to_device_key
12261235
)
12271236

1237+
for message in messages:
1238+
# We pop here as we shouldn't be sending the message ID down
1239+
# `/sync`
1240+
message_id = message.pop("message_id", None)
1241+
if message_id:
1242+
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
1243+
12281244
logger.debug(
12291245
"Returning %d to-device messages between %d and %d (current token: %d)",
12301246
len(messages),

synapse/handlers/typing.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919

2020
from synapse.api.errors import AuthError, ShadowBanError, SynapseError
2121
from synapse.appservice import ApplicationService
22-
from synapse.metrics.background_process_metrics import run_as_background_process
22+
from synapse.metrics.background_process_metrics import (
23+
run_as_background_process,
24+
wrap_as_background_process,
25+
)
2326
from synapse.replication.tcp.streams import TypingStream
2427
from synapse.types import JsonDict, Requester, UserID, get_domain_from_id
2528
from synapse.util.caches.stream_change_cache import StreamChangeCache
@@ -86,6 +89,7 @@ def _reset(self) -> None:
8689
self._member_last_federation_poke = {}
8790
self.wheel_timer = WheelTimer(bucket_size=5000)
8891

92+
@wrap_as_background_process("typing._handle_timeouts")
8993
def _handle_timeouts(self) -> None:
9094
logger.debug("Checking for typing timeouts")
9195

synapse/logging/opentracing.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,14 @@ def report_span(self, span):
259259
logger = logging.getLogger(__name__)
260260

261261

262+
class SynapseTags:
263+
# The message ID of any to_device message processed
264+
TO_DEVICE_MESSAGE_ID = "to_device.message_id"
265+
266+
# Whether the sync response has new data to be returned to the client.
267+
SYNC_RESULT = "sync.new_data"
268+
269+
262270
# Block everything by default
263271
# A regex which matches the server_names to expose traces for.
264272
# None means 'block everything'.

synapse/notifier.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from synapse.events import EventBase
4040
from synapse.handlers.presence import format_user_presence_state
4141
from synapse.logging.context import PreserveLoggingContext
42+
from synapse.logging.opentracing import log_kv, start_active_span
4243
from synapse.logging.utils import log_function
4344
from synapse.metrics import LaterGauge
4445
from synapse.streams.config import PaginationConfig
@@ -136,6 +137,15 @@ def notify(
136137
self.last_notified_ms = time_now_ms
137138
noify_deferred = self.notify_deferred
138139

140+
log_kv(
141+
{
142+
"notify": self.user_id,
143+
"stream": stream_key,
144+
"stream_id": stream_id,
145+
"listeners": self.count_listeners(),
146+
}
147+
)
148+
139149
users_woken_by_stream_counter.labels(stream_key).inc()
140150

141151
with PreserveLoggingContext():
@@ -404,6 +414,13 @@ def on_new_event(
404414
with Measure(self.clock, "on_new_event"):
405415
user_streams = set()
406416

417+
log_kv(
418+
{
419+
"waking_up_explicit_users": len(users),
420+
"waking_up_explicit_rooms": len(rooms),
421+
}
422+
)
423+
407424
for user in users:
408425
user_stream = self.user_to_user_stream.get(str(user))
409426
if user_stream is not None:
@@ -476,21 +493,45 @@ async def wait_for_events(
476493
(end_time - now) / 1000.0,
477494
self.hs.get_reactor(),
478495
)
479-
with PreserveLoggingContext():
480-
await listener.deferred
496+
497+
with start_active_span("wait_for_events.deferred"):
498+
log_kv(
499+
{
500+
"wait_for_events": "sleep",
501+
"token": prev_token,
502+
}
503+
)
504+
505+
with PreserveLoggingContext():
506+
await listener.deferred
507+
508+
log_kv(
509+
{
510+
"wait_for_events": "woken",
511+
"token": user_stream.current_token,
512+
}
513+
)
481514

482515
current_token = user_stream.current_token
483516

484517
result = await callback(prev_token, current_token)
518+
log_kv(
519+
{
520+
"wait_for_events": "result",
521+
"result": bool(result),
522+
}
523+
)
485524
if result:
486525
break
487526

488527
# Update the prev_token to the current_token since nothing
489528
# has happened between the old prev_token and the current_token
490529
prev_token = current_token
491530
except defer.TimeoutError:
531+
log_kv({"wait_for_events": "timeout"})
492532
break
493533
except defer.CancelledError:
534+
log_kv({"wait_for_events": "cancelled"})
494535
break
495536

496537
if result is None:

0 commit comments

Comments
 (0)