Skip to content

[BUG]: Metrics and Traces are not Correlated #13366

Open
@shriyanskapoor

Description

@shriyanskapoor

Tracer Version(s)

3.5.1

Python Version(s)

3.12

Pip Version(s)

25.0.1

Bug Report

Setup:

  • use opentelemetry-api for manual instrumentation
  • use ddtrace sdk for autoinstrumentation and also SDK layer for opentelemetry-api to send traces via USD
  • use ddtrace's runtime metrics
  • use opentelemetry-api + opentelemetry-sdk to create metrics and send them to ddagent vis GRPC

All is fine except the custom metrics sent to datadog aren't connected to any trace / log what so ever. I even tried manually adding context as well as attributes hoping something would work but no:

from opentelemetry.context import Context, get_current
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.trace import get_current_span

with tracer.start_as_current_span(...
    queue_poll_counter: Counter = meter.create_counter(
        name="compliance.sqs.queue.polls",
        description="Number of times the SQS queue was polled",
        unit="polls",
    )

    span = get_current_span()
    trace_id = format(span.get_span_context().trace_id, "032x")
    span_id = format(span.get_span_context().span_id, "016x")
    
    queue_poll_counter.add(
        1,
        attributes={
            "queue_url": self.configs.queue_url,
            "trace_id": trace_id,
            "span_id": span_id,
            "dd.trace_id": trace_id,
            "dd.span_id": span_id,
            "dd.env": os.environ.get("DD_ENV"),
            "dd.version": os.environ.get("DD_VERSION"),
            "dd.service": os.environ.get("DD_SERVICE"),
        },
        context=span.get_span_context(), # or even get_current() from opentelemetry.context
    )

Can anyone tell me how do i connect these metrics that appear just fine but to existing or active trace/span please? Below is my entire o11y setup

Reproduction Code

configure_env.py

import os

current_environment = (
    os.environ.get("ENV")
    or os.environ.get("NAMESPACE", "").rsplit("-", 1)[-1]
    or "test"
)

def configure_env(debug: bool = False, enable_runtime_metrics: bool = True):
    """
    Configure environment variables for opentelemetry and datadog sdk
    call before calling setup_autoinstrumentation() and configure_tracing()
    """

    os.environ.update(
        {
            "DD_SERVICE": os.getenv("APP_NAME", "default-service-name"),
            "DD_VERSION": os.getenv("VERSION", "1"),
            "DD_ENV": current_environment,
            # allow usage of opentelemetry API and help it work with dd sdk, must call configure_tracing() after this
            # https://ddtrace.readthedocs.io/en/stable/api.html#opentelemetry-api
            "DD_TRACE_OTEL_ENABLED": "true",
            "DD_TRACE_AGENT_URL": "unix:///var/run/datadog/apm.socket",
            # enable auto-intrumentation of metrics (dd sdk now will collect metrics at runtime)
            # an alternative to this is calling RuntimeMetrics.enable() manually
            "DD_RUNTIME_METRICS_ENABLED": str(enable_runtime_metrics).lower(),
            "DD_TRACE_DEBUG": str(debug).lower(),
            "DD_APPSEC_ENABLED": "true",
            "DD_BOTOCORE_DISTRIBUTED_TRACING": "true",
        }
    )

configure_autoinstrumentation.py

import os

from aaa_observability.configure_env import configure_env


def configure_autoinstrumentation(
    enable_runtime_metrics: bool = True,
    auto_logs_injection: bool = True,
    debug: bool = False,
):
    """
    Enable Datadog's automatic instrumentation. **Call this method at the top of your entry point before importing anything**

    - This allows Datadog APM to automatically trace our application without manual instrumentation
    - This also automatically instrumenta our application to collect some runtime metrics and
    send to dd agent (enable_runtime_metrics)
    - This also automatically injects dd context into logs with proper format (auto_logs_injection)
    - For manual instrumentation for both tracing and metrics, use opentelemetry API instead
    """

    configure_env(debug=debug, enable_runtime_metrics=enable_runtime_metrics)

    import ddtrace.auto  # noqa: F401

    if auto_logs_injection:
        from aaa_observability.configure_logging import configure_logging

        configure_logging()

    import logging  # noqa: F401

    logger = logging.getLogger(__name__)
    env_vars = {
        "DD_SERVICE": os.getenv("DD_SERVICE"),
        "DD_VERSION": os.getenv("DD_VERSION"),
        "DD_ENV": os.getenv("DD_ENV"),
        "DD_TRACE_OTEL_ENABLED": os.getenv("DD_TRACE_OTEL_ENABLED"),
        "DD_TRACE_AGENT_URL": os.getenv("DD_TRACE_AGENT_URL"),
        "DD_RUNTIME_METRICS_ENABLED": os.getenv("DD_RUNTIME_METRICS_ENABLED"),
        "DD_TRACE_DEBUG": os.getenv("DD_TRACE_DEBUG"),
        "DD_APPSEC_ENABLED": os.getenv("DD_APPSEC_ENABLED"),
        "DD_BOTOCORE_DISTRIBUTED_TRACING": os.getenv("DD_BOTOCORE_DISTRIBUTED_TRACING"),
        "NAMESPACE": os.getenv("NAMESPACE"),
    }
    logger.log(
        logging.INFO,
        f"setup_autoinstrumentation complete. Environment variables: {env_vars}",
    )

configure_tracing.py

from opentelemetry.trace import set_tracer_provider


def configure_tracing():
    """
    Configure opentelemetry api to work with datadog sdk
    """

    from ddtrace.opentelemetry import TracerProvider

    # https://ddtrace.readthedocs.io/en/v2.21.6/api.html?highlight=set_tracer_provider#configuration
    set_tracer_provider(TracerProvider())

configure_metrics.py

import os
from typing import cast

from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.metrics import set_meter_provider
from opentelemetry.sdk.metrics import (
    Counter,
    Histogram,
    MeterProvider,
    ObservableCounter,
    ObservableGauge,
    ObservableUpDownCounter,
    UpDownCounter,
)
from opentelemetry.sdk.metrics.export import (
    AggregationTemporality,
    ConsoleMetricExporter,
    PeriodicExportingMetricReader,
)
from opentelemetry.sdk.resources import Resource
from opentelemetry.semconv.resource import ResourceAttributes

from aaa_observability.configure_env import current_environment
from aaa_observability.metric_exporters.noop_metric_exporter import (
    NoOpMetricExporter,
)
from aaa_observability.metric_exporters.types import MetricExporterType

DELTA_TEMPORALITY: dict[type, AggregationTemporality] = {
    cast(type, Counter): AggregationTemporality.DELTA,
    cast(type, UpDownCounter): AggregationTemporality.CUMULATIVE,
    cast(type, Histogram): AggregationTemporality.DELTA,
    cast(type, ObservableCounter): AggregationTemporality.DELTA,
    cast(type, ObservableUpDownCounter): AggregationTemporality.CUMULATIVE,
    cast(type, ObservableGauge): AggregationTemporality.CUMULATIVE,
}


def get_metric_exporter(exporter_type: MetricExporterType, endpoint: str | None = None):
    """
    Factory function to create a metric exporter based on the specified type.

    Args:
        exporter_type: The type of exporter to create
        endpoint: Optional endpoint URL for OTLP exporter

    Returns:
        An instance of the appropriate metric exporter
    """
    if exporter_type == MetricExporterType.NOOP:
        return NoOpMetricExporter()
    elif exporter_type == MetricExporterType.CONSOLE:
        return ConsoleMetricExporter()
    elif exporter_type == MetricExporterType.OTLP:
        otlp_endpoint = endpoint or "http://datadog-agent.datadog:4317"
        otlp_exporter = OTLPMetricExporter(
            endpoint=otlp_endpoint,
            preferred_temporality=DELTA_TEMPORALITY,
        )
        return otlp_exporter
    else:
        raise ValueError(f"Unsupported metric exporter type: {exporter_type}")


def configure_metrics(
    export_interval_millis: float | None = 5000,
    exporter_type: MetricExporterType = MetricExporterType.OTLP,
    endpoint: str | None = None,
):
    """
    Configure opentelemetry metrics to send manually instrument metrics and send to datadog agent.
    read_more: https://docs.datadoghq.com/metrics/open_telemetry/otlp_metric_types/?tab=sum

    Args:
        export_interval_millis: How often to export metrics in milliseconds
        exporter_type: Type of exporter to use (NOOP, CONSOLE, OTLP)
        endpoint: Optional endpoint URL for OTLP exporter
    """

    metric_exporter = get_metric_exporter(exporter_type, endpoint)

    resource = Resource.create(
        attributes={
            ResourceAttributes.SERVICE_NAME: os.getenv(
                "APP_NAME", "default-service-name"
            ),
            ResourceAttributes.SERVICE_VERSION: os.getenv("VERSION", "1"),
            ResourceAttributes.DEPLOYMENT_ENVIRONMENT: current_environment,
            ResourceAttributes.SERVICE_NAMESPACE: os.getenv(
                "NAMESPACE", current_environment
            ),
        }
    )
    reader = PeriodicExportingMetricReader(
        exporter=metric_exporter, export_interval_millis=export_interval_millis
    )
    provider = MeterProvider(metric_readers=[reader], resource=resource)
    set_meter_provider(provider)

and i setup like so:

from aaa_observability.metric_exporters.types import MetricExporterType


def setup(
    debug: bool = False,
    logs_auto_logs_injection: bool = True,
    metrics_enable_runtime_metrics: bool = True,
    metrics_export_interval_millis: float | None = 5_000,
    metrics_exporter_type: MetricExporterType = MetricExporterType.OTLP,
    metrics_exporter_endpoint: str | None = None,
):
    """
    Setup observability for the application\n
    Run this method at the top of your entry point before importing any 3rd party library\n
    Ensure the following envornment variables are set:
    - APP_NAME
    - VERSION
    - NAMESPACE
    - ENV
    """
    # isort: off
    from aaa_observability.configure_autoinstrumentation import (
        configure_autoinstrumentation,
    )

    configure_autoinstrumentation(
        debug=debug,
        enable_runtime_metrics=metrics_enable_runtime_metrics,
        auto_logs_injection=logs_auto_logs_injection,
    )

    from aaa_observability.configure_tracing import configure_tracing  # noqa: E402

    configure_tracing()

    from aaa_observability.configure_metrics import configure_metrics  # noqa: E402

    configure_metrics(
        export_interval_millis=metrics_export_interval_millis,
        exporter_type=metrics_exporter_type,
        endpoint=metrics_exporter_endpoint,
    )
    # isort: on

Error Logs

No response

Libraries in Use

dependencies = [
    "ddtrace>=3.5.1,<4.0",
    "opentelemetry-api>=1.30.0,<2.0",
    "opentelemetry-exporter-otlp>=1.32.1,<2.0",
    "opentelemetry-sdk>=1.30.0,<2.0",
    "pydantic>=2.9.2,<3.0",
    "python-json-logger>=2.0.7,<4.0.0",
]

Operating System

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions