Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 7c135b9

Browse files
committed
Easier to follow local vs remote span tracing
The `incoming-federation-request` vs `process-federation_request` was first introduced in #11870 - Span for remote trace: `incoming-federation-request` - `child_of` reference: `origin_span_context` - `follows_from` reference: `servlet_span` - Span for local trace: `process-federation-request` - `child_of` reference: `servlet_span` (by the nature of it being active) - `follows_from` reference: `incoming-federation-request`
1 parent 786dd9b commit 7c135b9

File tree

2 files changed

+99
-45
lines changed

2 files changed

+99
-45
lines changed

synapse/federation/transport/server/_base.py

+45-37
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
from synapse.logging.context import run_in_background
2828
from synapse.logging.tracing import (
2929
get_active_span,
30+
Link,
31+
create_non_recording_span,
3032
set_attribute,
3133
span_context_from_request,
3234
start_active_span,
@@ -313,55 +315,61 @@ async def new_func(
313315

314316
# if the origin is authenticated and whitelisted, use its span context
315317
# as the parent.
316-
context = None
318+
origin_span_context = None
317319
if origin and whitelisted_homeserver(origin):
318-
context = span_context_from_request(request)
319-
320-
if context:
321-
servlet_span = get_active_span()
322-
# a scope which uses the origin's context as a parent
323-
processing_start_time = time.time()
324-
scope = start_active_span_follows_from(
320+
origin_span_context = span_context_from_request(request)
321+
322+
if origin_span_context:
323+
local_servlet_span = get_active_span()
324+
# Create a span which uses the `origin_span_context` as a parent
325+
# so we can see how the incoming payload was processed while
326+
# we're looking at the outgoing trace. Since the parent is set
327+
# to a remote span (from the origin), it won't show up in the
328+
# local trace which is why we create another span below for the
329+
# local trace. A span can only have one parent so we have to
330+
# create two separate ones.
331+
remote_parent_span = start_active_span(
325332
"incoming-federation-request",
326-
child_of=context,
327-
contexts=(servlet_span,),
328-
start_time=processing_start_time,
333+
context=origin_span_context,
334+
# Cross-link back to the local trace so we can jump
335+
# to the incoming side from the remote origin trace.
336+
links=[Link(local_servlet_span.get_span_context())],
337+
)
338+
339+
# Create a local span to appear in the local trace
340+
local_parent_span = start_active_span(
341+
"process-federation-request",
342+
# Cross-link back to the remote outgoing trace so we jump over
343+
# there.
344+
links=[Link(remote_parent_span.get_span_context())],
329345
)
330346

331347
else:
332-
# just use our context as a parent
333-
scope = start_active_span(
334-
"incoming-federation-request",
348+
# Otherwise just use our local context as a parent
349+
local_parent_span = start_active_span(
350+
"process-federation-request",
335351
)
336352

337-
try:
338-
with scope:
339-
if origin and self.RATELIMIT:
340-
with ratelimiter.ratelimit(origin) as d:
341-
await d
342-
if request._disconnected:
343-
logger.warning(
344-
"client disconnected before we started processing "
345-
"request"
346-
)
347-
return None
348-
response = await func(
349-
origin, content, request.args, *args, **kwargs
353+
# Don't need to record anything for the remote
354+
remote_parent_span = create_non_recording_span()
355+
356+
with remote_parent_span, local_parent_span:
357+
if origin and self.RATELIMIT:
358+
with ratelimiter.ratelimit(origin) as d:
359+
await d
360+
if request._disconnected:
361+
logger.warning(
362+
"client disconnected before we started processing "
363+
"request"
350364
)
351-
else:
365+
return None
352366
response = await func(
353367
origin, content, request.args, *args, **kwargs
354368
)
355-
finally:
356-
# if we used the origin's context as the parent, add a new span using
357-
# the servlet span as a parent, so that we have a link
358-
if context:
359-
scope2 = start_active_span_follows_from(
360-
"process-federation_request",
361-
contexts=(scope.span,),
362-
start_time=processing_start_time,
369+
else:
370+
response = await func(
371+
origin, content, request.args, *args, **kwargs
363372
)
364-
scope2.close()
365373

366374
return response
367375

synapse/logging/tracing.py

+54-8
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def set_fates(clotho, lachesis, atropos, father="Zues", mother="Themis"):
162162
than one caller? Will all of those calling functions have be in a context
163163
with an active span?
164164
"""
165+
from abc import ABC
165166
import contextlib
166167
import enum
167168
import inspect
@@ -204,7 +205,7 @@ def set_fates(clotho, lachesis, atropos, father="Zues", mother="Themis"):
204205

205206
# Helper class
206207

207-
# Always returns the fixed value given for any accessed property
208+
# This will always returns the fixed value given for any accessed property
208209
class _DummyLookup(object):
209210
def __init__(self, value):
210211
self.value = value
@@ -213,6 +214,21 @@ def __getattribute__(self, name):
213214
return object.__getattribute__(self, "value")
214215

215216

217+
class DummyLink(ABC):
218+
def __init__(self):
219+
self.not_implemented_message = (
220+
"opentelemetry wasn't imported so this is just a dummy link placeholder"
221+
)
222+
223+
@property
224+
def context(self):
225+
raise NotImplementedError(self.not_implemented_message)
226+
227+
@property
228+
def context(self):
229+
raise NotImplementedError(self.not_implemented_message)
230+
231+
216232
# These dependencies are optional so they can fail to import
217233
# and we
218234
try:
@@ -229,12 +245,13 @@ def __getattribute__(self, name):
229245
SpanKind = opentelemetry.trace.SpanKind
230246
SpanAttributes = opentelemetry.semconv.trace.SpanAttributes
231247
StatusCode = opentelemetry.trace.StatusCode
232-
248+
Link = opentelemetry.trace.Link
233249
except ImportError:
234250
opentelemetry = None # type: ignore[assignment]
235251
SpanKind = _DummyLookup(0)
236252
SpanAttributes = _DummyLookup("fake-attribute")
237253
StatusCode = _DummyLookup(0)
254+
Link = DummyLink
238255

239256

240257
logger = logging.getLogger(__name__)
@@ -435,6 +452,15 @@ def whitelisted_homeserver(destination: str) -> bool:
435452
# Start spans and scopes
436453

437454

455+
def create_non_recording_span():
456+
if opentelemetry is None:
457+
return contextlib.nullcontext() # type: ignore[unreachable]
458+
459+
return opentelemetry.trace.NonRecordingSpan(
460+
opentelemetry.trace.INVALID_SPAN_CONTEXT
461+
)
462+
463+
438464
def start_active_span(
439465
name: str,
440466
*,
@@ -446,11 +472,15 @@ def start_active_span(
446472
record_exception: bool = True,
447473
set_status_on_exception: bool = True,
448474
end_on_exit: bool = True,
475+
# For testing only
476+
tracer: Optional["opentelemetry.sdk.trace.TracerProvider"] = None,
449477
) -> Iterator["opentelemetry.trace.span.Span"]:
450478
if opentelemetry is None:
451479
return contextlib.nullcontext() # type: ignore[unreachable]
452480

453-
tracer = opentelemetry.trace.get_tracer(__name__)
481+
if tracer is None:
482+
tracer = opentelemetry.trace.get_tracer(__name__)
483+
454484
return tracer.start_as_current_span(
455485
name=name,
456486
context=context,
@@ -464,6 +494,8 @@ def start_active_span(
464494
)
465495

466496

497+
# TODO: I don't think we even need this function over the normal `start_active_span`.
498+
# The only difference is the `inherit_force_tracing` stuff.
467499
def start_active_span_follows_from(
468500
operation_name: str,
469501
contexts: Collection,
@@ -491,8 +523,21 @@ def start_active_span_follows_from(
491523
forced, the new span will also have tracing forced.
492524
tracer: override the opentracing tracer. By default the global tracer is used.
493525
"""
494-
# TODO
495-
pass
526+
if opentelemetry is None:
527+
return contextlib.nullcontext() # type: ignore[unreachable]
528+
529+
links = [opentelemetry.trace.Link(context) for context in contexts]
530+
span = start_active_span(
531+
name=operation_name,
532+
links=links,
533+
)
534+
535+
if inherit_force_tracing and any(
536+
is_context_forced_tracing(ctx) for ctx in contexts
537+
):
538+
force_tracing(span)
539+
540+
return span
496541

497542

498543
def start_active_span_from_edu(
@@ -529,7 +574,7 @@ def set_attribute(key: str, value: Union[str, bool, int, float]) -> None:
529574

530575
@ensure_active_span("set the status")
531576
def set_status(
532-
status: "opentelemetry.trace.StatusCode", exc: Optional(Exception)
577+
status: "opentelemetry.trace.StatusCode", exc: Optional[Exception]
533578
) -> None:
534579
"""Sets a tag on the active span"""
535580
active_span = get_active_span()
@@ -825,15 +870,16 @@ def trace_servlet(
825870
}
826871

827872
request_name = request.request_metrics.name
828-
context = span_context_from_request(request) if extract_context else None
873+
span_context = span_context_from_request(request) if extract_context else None
829874

830875
# we configure the scope not to finish the span immediately on exit, and instead
831876
# pass the span into the SynapseRequest, which will finish it once we've finished
832877
# sending the response to the client.
878+
833879
with start_active_span(
834880
request_name,
835881
kind=SpanKind.SERVER,
836-
context=context,
882+
context=span_context,
837883
end_on_exit=False,
838884
) as span:
839885
request.set_tracing_span(span)

0 commit comments

Comments
 (0)