feat(openai): enable stream_usage when using default base URL and client (#33296)

ccurme · web-flow · commit 3a465d635b1e · 2025-10-06T10:22:36.000-04:00
diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py
@@ -168,7 +168,7 @@ def test_configurable() -> None:
             "store": None,
             "extra_body": None,
             "include_response_headers": False,
-            "stream_usage": False,
+            "stream_usage": True,
             "use_previous_response_id": False,
             "use_responses_api": None,
         },
diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
@@ -168,7 +168,7 @@ def test_configurable() -> None:
             "store": None,
             "extra_body": None,
             "include_response_headers": False,
-            "stream_usage": False,
+            "stream_usage": True,
             "use_previous_response_id": False,
             "use_responses_api": None,
         },
diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py
@@ -615,6 +615,25 @@ def validate_environment(self) -> Self:
             or os.getenv("OPENAI_ORG_ID")
             or os.getenv("OPENAI_ORGANIZATION")
         )
+
+        # Enable stream_usage by default if using default base URL and client
+        if all(
+            getattr(self, key, None) is None
+            for key in (
+                "stream_usage",
+                "openai_proxy",
+                "openai_api_base",
+                "base_url",
+                "client",
+                "root_client",
+                "async_client",
+                "root_async_client",
+                "http_client",
+                "http_async_client",
+            )
+        ):
+            self.stream_usage = True
+
         # For backwards compatibility. Before openai v1, no distinction was made
         # between azure_endpoint and base_url (openai_api_base).
         openai_api_base = self.openai_api_base
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -98,7 +98,13 @@
     is_basemodel_subclass,
 )
 from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env
-from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    SecretStr,
+    model_validator,
+)
 from pydantic.v1 import BaseModel as BaseModelV1
 from typing_extensions import Self
 
@@ -464,11 +470,19 @@ class BaseChatOpenAI(BaseChatModel):
     )
     """Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or
         None."""
-    stream_usage: bool = False
-    """Whether to include usage metadata in streaming output. If True, an additional
+    stream_usage: Optional[bool] = None
+    """Whether to include usage metadata in streaming output. If enabled, an additional
     message chunk will be generated during the stream including usage metadata.
 
+    This parameter is enabled unless ``openai_api_base`` is set or the model is
+    initialized with a custom client, as many chat completions APIs do not support
+    streaming token usage.
+
     .. versionadded:: 0.3.9
+
+    .. versionchanged:: 0.3.35
+
+        Enabled for default base URL and client.
     """
     max_retries: Optional[int] = None
     """Maximum number of retries to make when generating."""
@@ -746,6 +760,28 @@ def validate_environment(self) -> Self:
             or os.getenv("OPENAI_ORGANIZATION")
         )
         self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")
+
+        # Enable stream_usage by default if using default base URL and client
+        if (
+            all(
+                getattr(self, key, None) is None
+                for key in (
+                    "stream_usage",
+                    "openai_proxy",
+                    "openai_api_base",
+                    "base_url",
+                    "client",
+                    "root_client",
+                    "async_client",
+                    "root_async_client",
+                    "http_client",
+                    "http_async_client",
+                )
+            )
+            and "OPENAI_BASE_URL" not in os.environ
+        ):
+            self.stream_usage = True
+
         client_params: dict = {
             "api_key": (
                 self.openai_api_key.get_secret_value() if self.openai_api_key else None
@@ -1050,7 +1086,7 @@ def _should_stream_usage(
         for source in stream_usage_sources:
             if isinstance(source, bool):
                 return source
-        return self.stream_usage
+        return self.stream_usage or False
 
     def _stream(
         self,
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
@@ -23,7 +23,6 @@ def chat_model_params(self) -> dict:
             "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
             "openai_api_version": OPENAI_API_VERSION,
             "azure_endpoint": OPENAI_API_BASE,
-            "stream_usage": True,
         }
 
     @property
@@ -83,7 +82,6 @@ def chat_model_params(self) -> dict:
             "deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
             "openai_api_version": OPENAI_API_VERSION,
             "azure_endpoint": OPENAI_API_BASE,
-            "stream_usage": True,
         }
 
     @property
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -233,7 +233,7 @@ def test_openai_invoke() -> None:
 
 def test_stream() -> None:
     """Test streaming tokens from OpenAI."""
-    llm = ChatOpenAI()
+    llm = ChatOpenAI(model="gpt-4.1-mini")
 
     full: Optional[BaseMessageChunk] = None
     for chunk in llm.stream("I'm Pickle Rick"):
@@ -247,7 +247,7 @@ def test_stream() -> None:
     aggregate: Optional[BaseMessageChunk] = None
     chunks_with_token_counts = 0
     chunks_with_response_metadata = 0
-    for chunk in llm.stream("Hello", stream_usage=True):
+    for chunk in llm.stream("Hello"):
         assert isinstance(chunk.content, str)
         aggregate = chunk if aggregate is None else aggregate + chunk
         assert isinstance(chunk, AIMessageChunk)
@@ -310,13 +310,14 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
             assert chunks_with_token_counts == 0
             assert full.usage_metadata is None
 
-    llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT)  # type: ignore[call-arg]
-    await _test_stream(llm.astream("Hello"), expect_usage=False)
+    llm = ChatOpenAI(model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT)  # type: ignore[call-arg]
+    await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
     await _test_stream(
         llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True
     )
     await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True)
     llm = ChatOpenAI(
+        model="gpt-4.1-mini",
         temperature=0,
         max_tokens=MAX_TOKEN_COUNT,  # type: ignore[call-arg]
         model_kwargs={"stream_options": {"include_usage": True}},
@@ -326,7 +327,12 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
         llm.astream("Hello", stream_options={"include_usage": False}),
         expect_usage=False,
     )
-    llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True)  # type: ignore[call-arg]
+    llm = ChatOpenAI(
+        model="gpt-4.1-mini",
+        temperature=0,
+        max_tokens=MAX_TOKEN_COUNT,  # type: ignore[call-arg]
+        stream_usage=True,
+    )
     await _test_stream(llm.astream("Hello"), expect_usage=True)
     await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
 
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
@@ -22,7 +22,7 @@ def chat_model_class(self) -> type[BaseChatModel]:
 
     @property
     def chat_model_params(self) -> dict:
-        return {"model": "gpt-4o-mini", "stream_usage": True}
+        return {"model": "gpt-4o-mini"}
 
     @property
     def supports_image_inputs(self) -> bool:
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr
@@ -28,6 +28,7 @@
       'request_timeout': 60.0,
       'stop': list([
       ]),
+      'stream_usage': True,
       'temperature': 0.0,
       'validate_base_url': True,
     }),
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr
@@ -22,6 +22,7 @@
       'request_timeout': 60.0,
       'stop': list([
       ]),
+      'stream_usage': True,
       'temperature': 0.0,
     }),
     'lc': 1,
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr
@@ -22,6 +22,7 @@
       'request_timeout': 60.0,
       'stop': list([
       ]),
+      'stream_usage': True,
       'temperature': 0.0,
       'use_responses_api': True,
     }),
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py b/libs/partners/openai/tests/unit_tests/chat_models/test_azure.py
@@ -38,6 +38,7 @@ def test_initialize_more() -> None:
     assert llm.deployment_name == "35-turbo-dev"
     assert llm.openai_api_version == "2023-05-15"
     assert llm.temperature == 0
+    assert llm.stream_usage
 
     ls_params = llm._get_ls_params()
     assert ls_params.get("ls_provider") == "azure"
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -500,7 +500,8 @@ def mock_openai_completion() -> list[dict]:
 
 async def test_openai_astream(mock_openai_completion: list) -> None:
     llm_name = "gpt-4o"
-    llm = ChatOpenAI(model=llm_name, stream_usage=True)
+    llm = ChatOpenAI(model=llm_name)
+    assert llm.stream_usage
     mock_client = AsyncMock()
 
     async def mock_create(*args: Any, **kwargs: Any) -> MockAsyncContextManager:
@@ -524,10 +525,14 @@ async def mock_create(*args: Any, **kwargs: Any) -> MockAsyncContextManager:
 
 def test_openai_stream(mock_openai_completion: list) -> None:
     llm_name = "gpt-4o"
-    llm = ChatOpenAI(model=llm_name, stream_usage=True)
+    llm = ChatOpenAI(model=llm_name)
+    assert llm.stream_usage
     mock_client = MagicMock()
 
+    call_kwargs = []
+
     def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
+        call_kwargs.append(kwargs)
         return MockSyncContextManager(mock_openai_completion)
 
     mock_client.create = mock_create
@@ -539,12 +544,31 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager:
             if chunk.usage_metadata is not None:
                 usage_metadata = chunk.usage_metadata
 
+    assert call_kwargs[-1]["stream_options"] == {"include_usage": True}
     assert usage_metadata is not None
-
     assert usage_metadata["input_tokens"] == usage_chunk["usage"]["prompt_tokens"]
     assert usage_metadata["output_tokens"] == usage_chunk["usage"]["completion_tokens"]
     assert usage_metadata["total_tokens"] == usage_chunk["usage"]["total_tokens"]
 
+    # Verify no streaming outside of default base URL or clients
+    for param, value in {
+        "stream_usage": False,
+        "openai_proxy": "http://localhost:7890",
+        "openai_api_base": "https://example.com/v1",
+        "base_url": "https://example.com/v1",
+        "client": mock_client,
+        "root_client": mock_client,
+        "async_client": mock_client,
+        "root_async_client": mock_client,
+        "http_client": httpx.Client(),
+        "http_async_client": httpx.AsyncClient(),
+    }.items():
+        llm = ChatOpenAI(model=llm_name, **{param: value})  # type: ignore[arg-type]
+        assert not llm.stream_usage
+        with patch.object(llm, "client", mock_client):
+            _ = list(llm.stream("..."))
+        assert "stream_options" not in call_kwargs[-1]
+
 
 @pytest.fixture
 def mock_completion() -> dict:

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,6 @@ def chat_model_params(self) -> dict:`
`23`	`23`	`"deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],`
`24`	`24`	`"openai_api_version": OPENAI_API_VERSION,`
`25`	`25`	`"azure_endpoint": OPENAI_API_BASE,`
`26`		`- "stream_usage": True,`
`27`	`26`	`}`
`28`	`27`
`29`	`28`	`@property`
`@@ -83,7 +82,6 @@ def chat_model_params(self) -> dict:`
`83`	`82`	`"deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],`
`84`	`83`	`"openai_api_version": OPENAI_API_VERSION,`
`85`	`84`	`"azure_endpoint": OPENAI_API_BASE,`
`86`		`- "stream_usage": True,`
`87`	`85`	`}`
`88`	`86`
`89`	`87`	`@property`