Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit c595f33

Browse files
richvdhH-Shay
authored andcommitted
Improve logging and opentracing for to-device message handling (#14598)
A batch of changes intended to make it easier to trace to-device messages through the system. The intention here is that a client can set a property org.matrix.msgid in any to-device message it sends. That ID is then included in any tracing or logging related to the message. (Suggestions as to where this field should be documented welcome. I'm not enthusiastic about speccing it - it's very much an optional extra to help with debugging.) I've also generally improved the data we send to opentracing for these messages.
1 parent 41a5ec3 commit c595f33

File tree

10 files changed

+136
-46
lines changed

10 files changed

+136
-46
lines changed

changelog.d/14598.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve opentracing and logging for to-device message handling.

synapse/api/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ class EventContentFields:
230230
# The authorising user for joining a restricted room.
231231
AUTHORISING_USER: Final = "join_authorised_via_users_server"
232232

233+
# an unspecced field added to to-device messages to identify them uniquely-ish
234+
TO_DEVICE_MSGID: Final = "org.matrix.msgid"
235+
233236

234237
class RoomTypes:
235238
"""Understood values of the room_type field of m.room.create events."""

synapse/federation/sender/per_destination_queue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int]
641641
if not message_id:
642642
continue
643643

644-
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
644+
set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id)
645645

646646
edus = [
647647
Edu(

synapse/handlers/appservice.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,9 +578,6 @@ async def _get_to_device_messages(
578578
device_id,
579579
), messages in recipient_device_to_messages.items():
580580
for message_json in messages:
581-
# Remove 'message_id' from the to-device message, as it's an internal ID
582-
message_json.pop("message_id", None)
583-
584581
message_payload.append(
585582
{
586583
"to_user_id": user_id,

synapse/handlers/devicemessage.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import logging
1616
from typing import TYPE_CHECKING, Any, Dict
1717

18-
from synapse.api.constants import EduTypes, ToDeviceEventTypes
18+
from synapse.api.constants import EduTypes, EventContentFields, ToDeviceEventTypes
1919
from synapse.api.errors import SynapseError
2020
from synapse.api.ratelimiting import Ratelimiter
2121
from synapse.logging.context import run_in_background
@@ -216,14 +216,24 @@ async def send_device_message(
216216
"""
217217
sender_user_id = requester.user.to_string()
218218

219-
message_id = random_string(16)
220-
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
221-
222-
log_kv({"number_of_to_device_messages": len(messages)})
223-
set_tag("sender", sender_user_id)
219+
set_tag(SynapseTags.TO_DEVICE_TYPE, message_type)
220+
set_tag(SynapseTags.TO_DEVICE_SENDER, sender_user_id)
224221
local_messages = {}
225222
remote_messages: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
226223
for user_id, by_device in messages.items():
224+
# add an opentracing log entry for each message
225+
for device_id, message_content in by_device.items():
226+
log_kv(
227+
{
228+
"event": "send_to_device_message",
229+
"user_id": user_id,
230+
"device_id": device_id,
231+
EventContentFields.TO_DEVICE_MSGID: message_content.get(
232+
EventContentFields.TO_DEVICE_MSGID
233+
),
234+
}
235+
)
236+
227237
# Ratelimit local cross-user key requests by the sending device.
228238
if (
229239
message_type == ToDeviceEventTypes.RoomKeyRequest
@@ -233,6 +243,7 @@ async def send_device_message(
233243
requester, (sender_user_id, requester.device_id)
234244
)
235245
if not allowed:
246+
log_kv({"message": f"dropping key requests to {user_id}"})
236247
logger.info(
237248
"Dropping room_key_request from %s to %s due to rate limit",
238249
sender_user_id,
@@ -247,18 +258,11 @@ async def send_device_message(
247258
"content": message_content,
248259
"type": message_type,
249260
"sender": sender_user_id,
250-
"message_id": message_id,
251261
}
252262
for device_id, message_content in by_device.items()
253263
}
254264
if messages_by_device:
255265
local_messages[user_id] = messages_by_device
256-
log_kv(
257-
{
258-
"user_id": user_id,
259-
"device_id": list(messages_by_device),
260-
}
261-
)
262266
else:
263267
destination = get_domain_from_id(user_id)
264268
remote_messages.setdefault(destination, {})[user_id] = by_device
@@ -267,7 +271,11 @@ async def send_device_message(
267271

268272
remote_edu_contents = {}
269273
for destination, messages in remote_messages.items():
270-
log_kv({"destination": destination})
274+
# The EDU contains a "message_id" property which is used for
275+
# idempotence. Make up a random one.
276+
message_id = random_string(16)
277+
log_kv({"destination": destination, "message_id": message_id})
278+
271279
remote_edu_contents[destination] = {
272280
"messages": messages,
273281
"sender": sender_user_id,

synapse/handlers/sync.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,20 @@
3131
import attr
3232
from prometheus_client import Counter
3333

34-
from synapse.api.constants import EventTypes, Membership
34+
from synapse.api.constants import EventContentFields, EventTypes, Membership
3535
from synapse.api.filtering import FilterCollection
3636
from synapse.api.presence import UserPresenceState
3737
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
3838
from synapse.events import EventBase
3939
from synapse.handlers.relations import BundledAggregations
4040
from synapse.logging.context import current_context
41-
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
41+
from synapse.logging.opentracing import (
42+
SynapseTags,
43+
log_kv,
44+
set_tag,
45+
start_active_span,
46+
trace,
47+
)
4248
from synapse.push.clientformat import format_push_rules_for_user
4349
from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
4450
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
@@ -1586,6 +1592,7 @@ async def _generate_sync_entry_for_device_list(
15861592
else:
15871593
return DeviceListUpdates()
15881594

1595+
@trace
15891596
async def _generate_sync_entry_for_to_device(
15901597
self, sync_result_builder: "SyncResultBuilder"
15911598
) -> None:
@@ -1605,11 +1612,16 @@ async def _generate_sync_entry_for_to_device(
16051612
)
16061613

16071614
for message in messages:
1608-
# We pop here as we shouldn't be sending the message ID down
1609-
# `/sync`
1610-
message_id = message.pop("message_id", None)
1611-
if message_id:
1612-
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
1615+
log_kv(
1616+
{
1617+
"event": "to_device_message",
1618+
"sender": message["sender"],
1619+
"type": message["type"],
1620+
EventContentFields.TO_DEVICE_MSGID: message["content"].get(
1621+
EventContentFields.TO_DEVICE_MSGID
1622+
),
1623+
}
1624+
)
16131625

16141626
logger.debug(
16151627
"Returning %d to-device messages between %d and %d (current token: %d)",

synapse/logging/opentracing.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,15 @@ def report_span(self, span: "opentracing.Span") -> None:
292292

293293

294294
class SynapseTags:
295-
# The message ID of any to_device message processed
296-
TO_DEVICE_MESSAGE_ID = "to_device.message_id"
295+
# The message ID of any to_device EDU processed
296+
TO_DEVICE_EDU_ID = "to_device.edu_id"
297+
298+
# Details about to-device messages
299+
TO_DEVICE_TYPE = "to_device.type"
300+
TO_DEVICE_SENDER = "to_device.sender"
301+
TO_DEVICE_RECIPIENT = "to_device.recipient"
302+
TO_DEVICE_RECIPIENT_DEVICE = "to_device.recipient_device"
303+
TO_DEVICE_MSGID = "to_device.msgid" # client-generated ID
297304

298305
# Whether the sync response has new data to be returned to the client.
299306
SYNC_RESULT = "sync.new_data"

synapse/rest/client/sendtodevice.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def __init__(self, hs: "HomeServer"):
4646
def on_PUT(
4747
self, request: SynapseRequest, message_type: str, txn_id: str
4848
) -> Awaitable[Tuple[int, JsonDict]]:
49-
set_tag("message_type", message_type)
5049
set_tag("txn_id", txn_id)
5150
return self.txns.fetch_or_execute_request(
5251
request, self._put, request, message_type, txn_id

synapse/storage/databases/main/deviceinbox.py

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,15 @@
2626
cast,
2727
)
2828

29+
from synapse.api.constants import EventContentFields
2930
from synapse.logging import issue9533_logger
30-
from synapse.logging.opentracing import log_kv, set_tag, trace
31+
from synapse.logging.opentracing import (
32+
SynapseTags,
33+
log_kv,
34+
set_tag,
35+
start_active_span,
36+
trace,
37+
)
3138
from synapse.replication.tcp.streams import ToDeviceStream
3239
from synapse.storage._base import SQLBaseStore, db_to_json
3340
from synapse.storage.database import (
@@ -397,6 +404,17 @@ def get_device_messages_txn(
397404
(recipient_user_id, recipient_device_id), []
398405
).append(message_dict)
399406

407+
# start a new span for each message, so that we can tag each separately
408+
with start_active_span("get_to_device_message"):
409+
set_tag(SynapseTags.TO_DEVICE_TYPE, message_dict["type"])
410+
set_tag(SynapseTags.TO_DEVICE_SENDER, message_dict["sender"])
411+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT, recipient_user_id)
412+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, recipient_device_id)
413+
set_tag(
414+
SynapseTags.TO_DEVICE_MSGID,
415+
message_dict["content"].get(EventContentFields.TO_DEVICE_MSGID),
416+
)
417+
400418
if limit is not None and rowcount == limit:
401419
# We ended up bumping up against the message limit. There may be more messages
402420
# to retrieve. Return what we have, as well as the last stream position that
@@ -678,12 +696,35 @@ def add_messages_txn(
678696
],
679697
)
680698

681-
if remote_messages_by_destination:
682-
issue9533_logger.debug(
683-
"Queued outgoing to-device messages with stream_id %i for %s",
684-
stream_id,
685-
list(remote_messages_by_destination.keys()),
686-
)
699+
for destination, edu in remote_messages_by_destination.items():
700+
if issue9533_logger.isEnabledFor(logging.DEBUG):
701+
issue9533_logger.debug(
702+
"Queued outgoing to-device messages with "
703+
"stream_id %i, EDU message_id %s, type %s for %s: %s",
704+
stream_id,
705+
edu["message_id"],
706+
edu["type"],
707+
destination,
708+
[
709+
f"{user_id}/{device_id} (msgid "
710+
f"{msg.get(EventContentFields.TO_DEVICE_MSGID)})"
711+
for (user_id, messages_by_device) in edu["messages"].items()
712+
for (device_id, msg) in messages_by_device.items()
713+
],
714+
)
715+
716+
for (user_id, messages_by_device) in edu["messages"].items():
717+
for (device_id, msg) in messages_by_device.items():
718+
with start_active_span("store_outgoing_to_device_message"):
719+
set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["sender"])
720+
set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["message_id"])
721+
set_tag(SynapseTags.TO_DEVICE_TYPE, edu["type"])
722+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id)
723+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id)
724+
set_tag(
725+
SynapseTags.TO_DEVICE_MSGID,
726+
msg.get(EventContentFields.TO_DEVICE_MSGID),
727+
)
687728

688729
async with self._device_inbox_id_gen.get_next() as stream_id:
689730
now_ms = self._clock.time_msec()
@@ -801,7 +842,19 @@ def _add_messages_to_local_device_inbox_txn(
801842
# Only insert into the local inbox if the device exists on
802843
# this server
803844
device_id = row["device_id"]
804-
message_json = json_encoder.encode(messages_by_device[device_id])
845+
846+
with start_active_span("serialise_to_device_message"):
847+
msg = messages_by_device[device_id]
848+
set_tag(SynapseTags.TO_DEVICE_TYPE, msg["type"])
849+
set_tag(SynapseTags.TO_DEVICE_SENDER, msg["sender"])
850+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id)
851+
set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id)
852+
set_tag(
853+
SynapseTags.TO_DEVICE_MSGID,
854+
msg["content"].get(EventContentFields.TO_DEVICE_MSGID),
855+
)
856+
message_json = json_encoder.encode(msg)
857+
805858
messages_json_for_user[device_id] = message_json
806859

807860
if messages_json_for_user:
@@ -821,15 +874,20 @@ def _add_messages_to_local_device_inbox_txn(
821874
],
822875
)
823876

824-
issue9533_logger.debug(
825-
"Stored to-device messages with stream_id %i for %s",
826-
stream_id,
827-
[
828-
(user_id, device_id)
829-
for (user_id, messages_by_device) in local_by_user_then_device.items()
830-
for device_id in messages_by_device.keys()
831-
],
832-
)
877+
if issue9533_logger.isEnabledFor(logging.DEBUG):
878+
issue9533_logger.debug(
879+
"Stored to-device messages with stream_id %i: %s",
880+
stream_id,
881+
[
882+
f"{user_id}/{device_id} (msgid "
883+
f"{msg['content'].get(EventContentFields.TO_DEVICE_MSGID)})"
884+
for (
885+
user_id,
886+
messages_by_device,
887+
) in messages_by_user_then_device.items()
888+
for (device_id, msg) in messages_by_device.items()
889+
],
890+
)
833891

834892

835893
class DeviceInboxBackgroundUpdateStore(SQLBaseStore):

tests/handlers/test_appservice.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,12 @@ def test_application_services_receive_bursts_of_to_device(self):
765765
fake_device_ids = [f"device_{num}" for num in range(number_of_messages - 1)]
766766
messages = {
767767
self.exclusive_as_user: {
768-
device_id: to_device_message_content for device_id in fake_device_ids
768+
device_id: {
769+
"type": "test_to_device_message",
770+
"sender": "@some:sender",
771+
"content": to_device_message_content,
772+
}
773+
for device_id in fake_device_ids
769774
}
770775
}
771776

0 commit comments

Comments
 (0)