fix(llmobs): ensure langchain azure openai spans are not duplicate llm marked (#14939)

Yun-Kim · Yun-Kim · commit 821a80fe8f9b · 2025-10-22T12:28:34.000-04:00
[MLOB-4230] This PR does 3 things: 1. (non-user facing) Updates our docker-compose and services.yml files to upgrade to the latest testagent version, as well as adding a env var `VCR_PROVIDER_MAP` value for the testagent configs. 2. (user-facing) fixes the langchain integration such that azure openai calls are not marked as duplicate LLM spans (if the openai integration is enabled), and instead marks them as generic workflow spans. 3. (non-user facing) Adds langchain tests for calling Azure OpenAI. These requires the testagent upgrade and the `VCR_PROVIDER_MAP` env var to allow the testagent vcr proxy to call the azure openai endpoint. We have logic in our langchain integration to mark specific LLM calls as generic workflow spans (instead of the default llm span) if we detect the corresponding integration (for the given provider, i.e. `openai/anthropic`) is also enabled and will result in a downstream LLM span. Our product experience breaks if multiple spans duplicate represent an LLM call, and we were previously missing support for azure openai.     [MLOB-4230]: https://datadoghq.atlassian.net/browse/MLOB-4230?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ (cherry picked from commit 9f7d187)
diff --git a/.gitlab/services.yml b/.gitlab/services.yml
@@ -12,7 +12,7 @@
       DD_REMOTE_CONFIGURATION_REFRESH_INTERVAL: 5s
       DD_DOGSTATSD_NON_LOCAL_TRAFFIC: true
   testagent:
-    name: registry.ddbuild.io/images/mirror/dd-apm-test-agent/ddapm-test-agent:v1.34.0
+    name: registry.ddbuild.io/images/mirror/dd-apm-test-agent/ddapm-test-agent:v1.36.0
     alias: testagent
     variables:
       LOG_LEVEL: ERROR
@@ -26,6 +26,7 @@
       DD_DISABLE_ERROR_RESPONSES: true
       ENABLED_CHECKS: trace_content_length,trace_stall,meta_tracer_version_header,trace_count_header,trace_peer_service,trace_dd_service
       SNAPSHOT_IGNORED_ATTRS: span_id,trace_id,parent_id,duration,start,metrics.system.pid,metrics.system.process_id,metrics.process_id,meta.runtime-id,meta._dd.p.tid,meta.pathway.hash,metrics._dd.tracer_kr,meta._dd.parent_id,meta.kafka.cluster_id
+      VCR_PROVIDER_MAP: azure_openai=https://llmobs-test-resource.openai.azure.com/
   mongo:
     name: registry.ddbuild.io/images/mirror/mongo:6.0.5
     alias: mongo
diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py
@@ -62,6 +62,7 @@
 ANTHROPIC_PROVIDER_NAME = "anthropic"
 BEDROCK_PROVIDER_NAME = "amazon_bedrock"
 OPENAI_PROVIDER_NAME = "openai"
+AZURE_OAI_PROVIDER_NAME = "azure"
 VERTEXAI_PROVIDER_NAME = "vertexai"
 GEMINI_PROVIDER_NAME = "google_palm"
 
@@ -189,7 +190,7 @@ def _llmobs_set_tags(
             # only the llm interface for Gemini will get instrumented
             elif model_provider.startswith(GEMINI_PROVIDER_NAME) and operation == "llm":
                 llmobs_integration = "google_generativeai"
-            elif model_provider.startswith(OPENAI_PROVIDER_NAME):
+            elif any(provider in model_provider for provider in (OPENAI_PROVIDER_NAME, AZURE_OAI_PROVIDER_NAME)):
                 llmobs_integration = "openai"
             elif operation == "chat" and model_provider.startswith(ANTHROPIC_PROVIDER_NAME):
                 llmobs_integration = "anthropic"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -121,7 +121,7 @@ services:
         volumes:
           - ddagent:/tmp/ddagent:rw
     testagent:
-        image: ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.34.0
+        image: ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.36.0
         ports:
             - "127.0.0.1:9126:8126"
         volumes:
@@ -131,6 +131,7 @@ services:
             - LOG_LEVEL=WARNING
             - SNAPSHOT_DIR=/snapshots
             - VCR_CASSETTES_DIRECTORY=/cassettes
+            - VCR_PROVIDER_MAP=azure_openai=https://llmobs-test-resource.openai.azure.com/
             - SNAPSHOT_CI=0
             - DD_POOL_TRACE_CHECK_FAILURES=true
             - DD_DISABLE_ERROR_RESPONSES=true
diff --git a/releasenotes/notes/fix-llmobs-langchain-azure-openai-e0ea489aafba7ffd.yaml b/releasenotes/notes/fix-llmobs-langchain-azure-openai-e0ea489aafba7ffd.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    LLM Observability: Resolves an issue where the ``langchain`` integration would incorrectly mark Azure OpenAI calls as duplicate llm operations even if the ``openai`` integration was enabled.
+     The ``langchain`` integration will trace Azure OpenAI spans as workflow spans if there is an equivalent llm span from the ``openai`` integration. 
diff --git a/tests/contrib/langchain/test_langchain_llmobs.py b/tests/contrib/langchain/test_langchain_llmobs.py
@@ -847,6 +847,11 @@ class TestTraceStructureWithLLMIntegrations(SubprocessTestCase):
         DD_API_KEY="<not-a-real-key>",
     )
 
+    azure_openai_env_config = dict(
+        OPENAI_API_VERSION="2024-12-01-preview",
+        AZURE_OPENAI_API_KEY=os.getenv("AZURE_OPENAI_API_KEY", "testing"),
+    )
+
     anthropic_env_config = dict(
         ANTHROPIC_API_KEY=os.getenv("ANTHROPIC_API_KEY", "testing"),
         DD_API_KEY="<not-a-real-key>",
@@ -891,6 +896,11 @@ def _call_openai_llm(OpenAI):
         llm = OpenAI(base_url="http://localhost:9126/vcr/openai")
         llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")
 
+    @staticmethod
+    def _call_azure_openai_chat(AzureChatOpenAI):
+        llm = AzureChatOpenAI(azure_endpoint="http://localhost:9126/vcr/azure_openai", deployment_name="gpt-4.1-mini")
+        llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")
+
     @staticmethod
     def _call_openai_embedding(OpenAIEmbeddings):
         embedding = OpenAIEmbeddings(base_url="http://localhost:9126/vcr/openai")
@@ -924,6 +934,15 @@ def test_llmobs_with_openai_enabled(self):
         self._call_openai_llm(OpenAI)
         self._assert_trace_structure_from_writer_call_args(["workflow", "llm"])
 
+    @run_in_subprocess(env_overrides=azure_openai_env_config)
+    def test_llmobs_with_openai_enabled_azure(self):
+        from langchain_openai import AzureChatOpenAI
+
+        patch(langchain=True, openai=True)
+        LLMObs.enable(ml_app="<ml-app-name>", integrations_enabled=False)
+        self._call_azure_openai_chat(AzureChatOpenAI)
+        self._assert_trace_structure_from_writer_call_args(["workflow", "llm"])
+
     @run_in_subprocess(env_overrides=openai_env_config)
     def test_llmobs_with_openai_enabled_non_ascii_value(self):
         """Regression test to ensure that non-ascii text values for workflow spans are not encoded."""
@@ -966,6 +985,16 @@ def test_llmobs_with_openai_disabled(self):
         self._call_openai_llm(OpenAI)
         self._assert_trace_structure_from_writer_call_args(["llm"])
 
+    @run_in_subprocess(env_overrides=azure_openai_env_config)
+    def test_llmobs_with_openai_disabled_azure(self):
+        from langchain_openai import AzureChatOpenAI
+
+        patch(langchain=True)
+
+        LLMObs.enable(ml_app="<ml-app-name>", integrations_enabled=False)
+        self._call_azure_openai_chat(AzureChatOpenAI)
+        self._assert_trace_structure_from_writer_call_args(["llm"])
+
     @run_in_subprocess(env_overrides=anthropic_env_config)
     def test_llmobs_with_anthropic_enabled(self):
         from langchain_anthropic import ChatAnthropic
diff --git a/tests/llmobs/llmobs_cassettes/azure_openai/azure_openai_openai_deployments_gpt-4.1-mini_chat_completions_api-version_2024-12-01-preview_post_5836d4fe.yaml b/tests/llmobs/llmobs_cassettes/azure_openai/azure_openai_openai_deployments_gpt-4.1-mini_chat_completions_api-version_2024-12-01-preview_post_5836d4fe.yaml
@@ -0,0 +1,109 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Can you explain what Descartes meant by ''I think,
+      therefore I am''?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":false,"temperature":0.7}'
+    headers:
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - gzip, deflate
+      ? !!python/object/apply:multidict._multidict.istr
+      - Connection
+      : - keep-alive
+      Content-Length:
+      - '172'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - User-Agent
+      : - AzureOpenAI/Python 1.109.1
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Arch
+      : - arm64
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Async
+      : - 'false'
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Lang
+      : - python
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-OS
+      : - MacOS
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Package-Version
+      : - 1.109.1
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Runtime
+      : - CPython
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Runtime-Version
+      : - 3.11.13
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-retry-count
+      : - '0'
+    method: POST
+    uri: https://llmobs-test-resource.openai.azure.com/openai/deployments/gpt-4.1-mini/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"protected_material_code\":{\"filtered\":false,\"detected\":false},\"protected_material_text\":{\"filtered\":false,\"detected\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"annotations\":[],\"content\":\"Certainly!
+        The phrase **\\\"I think, therefore I am\\\"** (originally in Latin: *Cogito,
+        ergo sum*) was coined by the French philosopher Ren\xE9 Descartes. It appears
+        in his work *Discourse on the Method* (1637) and later in *Meditations on
+        First Philosophy* (1641).\\n\\n### What Descartes Meant:\\n\\n1. **Foundation
+        of Certainty:**  \\n   Descartes was searching for an undeniable foundation
+        for knowledge. He wanted to find something that could not be doubted, as many
+        beliefs could be mistaken.\\n\\n2. **Method of Doubt:**  \\n   He began by
+        doubting everything \u2014 the evidence of the senses, the existence of the
+        physical world, even mathematical truths \u2014 to see if anything remained
+        absolutely certain.\\n\\n3. **The Indubitable Truth:**  \\n   While doubting,
+        Descartes realized that the very act of doubting implied a thinking subject.
+        If he is doubting or thinking, then he must exist in some form to be doing
+        that thinking.\\n\\n4. **\\\"I think, therefore I am\\\":**  \\n   Therefore,
+        the one thing he could not doubt was that he exists as a thinking being. The
+        act of thinking itself proved his own existence. This statement became the
+        first principle in his philosophy.\\n\\n### In summary:\\n\\nDescartes meant
+        that the fact that you are consciously thinking is proof of your own existence.
+        Even if everything else is uncertain or illusory, the very experience of thought
+        confirms that there is a \\\"self\\\" doing the thinking. It\u2019s a foundational
+        claim about knowledge and existence.\\n\\nIf you want, I can also explain
+        how this idea influenced philosophy or its criticisms!\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1760724000,\"id\":\"chatcmpl-CRj28rdRKqhnTBkcKCmXlz0vReldy\",\"model\":\"gpt-4.1-mini-2025-04-14\",\"object\":\"chat.completion\",\"prompt_filter_results\":[{\"prompt_index\":0,\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}}}],\"system_fingerprint\":\"fp_3dcd5944f5\",\"usage\":{\"completion_tokens\":342,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":24,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":366}}\n"
+    headers:
+      Content-Length:
+      - '2780'
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 17 Oct 2025 18:00:13 GMT
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      apim-request-id:
+      - 381a6525-4f15-481a-8884-dd8ec1b0d6fc
+      azureml-model-session:
+      - d213-20251016082839
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - nosniff
+      x-ms-deployment-name:
+      - gpt-4.1-mini
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - East US 2
+      x-ratelimit-limit-requests:
+      - '250'
+      x-ratelimit-limit-tokens:
+      - '250000'
+      x-ratelimit-remaining-requests:
+      - '248'
+      x-ratelimit-remaining-tokens:
+      - '249979'
+      x-request-id:
+      - 8e52694a-22a0-42fa-9e2d-54539ebd2113
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/azure_openai/azure_openai_openai_deployments_gpt-4.1-mini_chat_completions_api-version_2024-12-01-preview_post_696c0585.yaml b/tests/llmobs/llmobs_cassettes/azure_openai/azure_openai_openai_deployments_gpt-4.1-mini_chat_completions_api-version_2024-12-01-preview_post_696c0585.yaml
@@ -0,0 +1,112 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Can you explain what Descartes meant by ''I think,
+      therefore I am''?","role":"user"}],"model":null,"stream":false}'
+    headers:
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - gzip, deflate, zstd
+      ? !!python/object/apply:multidict._multidict.istr
+      - Connection
+      : - keep-alive
+      Content-Length:
+      - '137'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - User-Agent
+      : - langchain-partner-python-azure-openai
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Arch
+      : - arm64
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Async
+      : - 'false'
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Lang
+      : - python
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-OS
+      : - MacOS
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Package-Version
+      : - 1.109.1
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Raw-Response
+      : - 'true'
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Runtime
+      : - CPython
+      ? !!python/object/apply:multidict._multidict.istr
+      - X-Stainless-Runtime-Version
+      : - 3.11.13
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-retry-count
+      : - '0'
+    method: POST
+    uri: https://llmobs-test-resource.openai.azure.com/openai/deployments/gpt-4.1-mini/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"protected_material_code\":{\"filtered\":false,\"detected\":false},\"protected_material_text\":{\"filtered\":false,\"detected\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"annotations\":[],\"content\":\"Certainly!
+        The phrase **\\\"I think, therefore I am\\\"** (originally in Latin: *Cogito,
+        ergo sum*) was coined by the French philosopher Ren\xE9 Descartes. It is a
+        fundamental element of Western philosophy and appears in his work *Meditations
+        on First Philosophy* (1641).\\n\\n**What Descartes meant:**\\n\\n1. **Starting
+        point of certainty:** Descartes was seeking an indubitable foundation for
+        knowledge. He embarked on a method of radical doubt, questioning everything
+        that could possibly be doubted\u2014his senses, the physical world, even mathematical
+        truths.\\n\\n2. **The act of thinking proves existence:** In the process of
+        doubting, he realized that the very act of doubting or thinking implies a
+        thinker. If he is doubting, then he must be thinking. If he is thinking, then
+        he must exist. Thus, the fact that he thinks is proof that he exists.\\n\\n3.
+        **Existence is confirmed through self-awareness:** This statement establishes
+        the self as a thinking thing (*res cogitans*). Descartes is not saying \\\"I
+        am a body,\\\" or \\\"I exist in the physical world,\\\" but rather affirming
+        the existence of the self as a conscious being\u2014one that thinks, doubts,
+        understands, wills, imagines, and senses.\\n\\n4. **Foundation for knowledge:**
+        From this fundamental truth, Descartes hoped to build further knowledge about
+        the world, God, and existence, by basing it on something certain and clear:
+        the existence of the self as a thinking entity.\\n\\nIn summary, **\\\"I think,
+        therefore I am\\\" means that the very act of thinking is proof enough of
+        one's existence and is the first principle of philosophy that cannot be doubted.**\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1760724017,\"id\":\"chatcmpl-CRj2PkKuSFXMkYWzr4xxvmv5TtlQ9\",\"model\":\"gpt-4.1-mini-2025-04-14\",\"object\":\"chat.completion\",\"prompt_filter_results\":[{\"prompt_index\":0,\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"jailbreak\":{\"filtered\":false,\"detected\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}}}],\"system_fingerprint\":\"fp_3dcd5944f5\",\"usage\":{\"completion_tokens\":354,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":24,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":378}}\n"
+    headers:
+      Content-Length:
+      - '2822'
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 17 Oct 2025 18:00:28 GMT
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      apim-request-id:
+      - ea05cb68-e4e4-4876-b0de-cca3a2bf4c63
+      azureml-model-session:
+      - d213-20251016082839
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - nosniff
+      x-ms-deployment-name:
+      - gpt-4.1-mini
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - East US 2
+      x-ratelimit-limit-requests:
+      - '250'
+      x-ratelimit-limit-tokens:
+      - '250000'
+      x-ratelimit-remaining-requests:
+      - '247'
+      x-ratelimit-remaining-tokens:
+      - '249962'
+      x-request-id:
+      - 33717a5b-b7e0-48d3-9347-0d8df2e5a48d
+    status:
+      code: 200
+      message: OK
+version: 1

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    LLM Observability: Resolves an issue where the ``langchain`` integration would incorrectly mark Azure OpenAI calls as duplicate llm operations even if the ``openai`` integration was enabled.
 +     The ``langchain`` integration will trace Azure OpenAI spans as workflow spans if there is an equivalent llm span from the ``openai`` integration.