Skip to content

Commit 3a465d6

Browse files
authored
feat(openai): enable stream_usage when using default base URL and client (#33296)
1 parent 0b51de4 commit 3a465d6

File tree

12 files changed

+104
-17
lines changed

12 files changed

+104
-17
lines changed

libs/langchain/tests/unit_tests/chat_models/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_configurable() -> None:
168168
"store": None,
169169
"extra_body": None,
170170
"include_response_headers": False,
171-
"stream_usage": False,
171+
"stream_usage": True,
172172
"use_previous_response_id": False,
173173
"use_responses_api": None,
174174
},

libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_configurable() -> None:
168168
"store": None,
169169
"extra_body": None,
170170
"include_response_headers": False,
171-
"stream_usage": False,
171+
"stream_usage": True,
172172
"use_previous_response_id": False,
173173
"use_responses_api": None,
174174
},

libs/partners/openai/langchain_openai/chat_models/azure.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,25 @@ def validate_environment(self) -> Self:
615615
or os.getenv("OPENAI_ORG_ID")
616616
or os.getenv("OPENAI_ORGANIZATION")
617617
)
618+
619+
# Enable stream_usage by default if using default base URL and client
620+
if all(
621+
getattr(self, key, None) is None
622+
for key in (
623+
"stream_usage",
624+
"openai_proxy",
625+
"openai_api_base",
626+
"base_url",
627+
"client",
628+
"root_client",
629+
"async_client",
630+
"root_async_client",
631+
"http_client",
632+
"http_async_client",
633+
)
634+
):
635+
self.stream_usage = True
636+
618637
# For backwards compatibility. Before openai v1, no distinction was made
619638
# between azure_endpoint and base_url (openai_api_base).
620639
openai_api_base = self.openai_api_base

libs/partners/openai/langchain_openai/chat_models/base.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,13 @@
9898
is_basemodel_subclass,
9999
)
100100
from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env
101-
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
101+
from pydantic import (
102+
BaseModel,
103+
ConfigDict,
104+
Field,
105+
SecretStr,
106+
model_validator,
107+
)
102108
from pydantic.v1 import BaseModel as BaseModelV1
103109
from typing_extensions import Self
104110

@@ -464,11 +470,19 @@ class BaseChatOpenAI(BaseChatModel):
464470
)
465471
"""Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or
466472
None."""
467-
stream_usage: bool = False
468-
"""Whether to include usage metadata in streaming output. If True, an additional
473+
stream_usage: Optional[bool] = None
474+
"""Whether to include usage metadata in streaming output. If enabled, an additional
469475
message chunk will be generated during the stream including usage metadata.
470476
477+
This parameter is enabled unless ``openai_api_base`` is set or the model is
478+
initialized with a custom client, as many chat completions APIs do not support
479+
streaming token usage.
480+
471481
.. versionadded:: 0.3.9
482+
483+
.. versionchanged:: 0.3.35
484+
485+
Enabled for default base URL and client.
472486
"""
473487
max_retries: Optional[int] = None
474488
"""Maximum number of retries to make when generating."""
@@ -746,6 +760,28 @@ def validate_environment(self) -> Self:
746760
or os.getenv("OPENAI_ORGANIZATION")
747761
)
748762
self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")
763+
764+
# Enable stream_usage by default if using default base URL and client
765+
if (
766+
all(
767+
getattr(self, key, None) is None
768+
for key in (
769+
"stream_usage",
770+
"openai_proxy",
771+
"openai_api_base",
772+
"base_url",
773+
"client",
774+
"root_client",
775+
"async_client",
776+
"root_async_client",
777+
"http_client",
778+
"http_async_client",
779+
)
780+
)
781+
and "OPENAI_BASE_URL" not in os.environ
782+
):
783+
self.stream_usage = True
784+
749785
client_params: dict = {
750786
"api_key": (
751787
self.openai_api_key.get_secret_value() if self.openai_api_key else None
@@ -1050,7 +1086,7 @@ def _should_stream_usage(
10501086
for source in stream_usage_sources:
10511087
if isinstance(source, bool):
10521088
return source
1053-
return self.stream_usage
1089+
return self.stream_usage or False
10541090

10551091
def _stream(
10561092
self,

libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def chat_model_params(self) -> dict:
2323
"deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
2424
"openai_api_version": OPENAI_API_VERSION,
2525
"azure_endpoint": OPENAI_API_BASE,
26-
"stream_usage": True,
2726
}
2827

2928
@property
@@ -83,7 +82,6 @@ def chat_model_params(self) -> dict:
8382
"deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
8483
"openai_api_version": OPENAI_API_VERSION,
8584
"azure_endpoint": OPENAI_API_BASE,
86-
"stream_usage": True,
8785
}
8886

8987
@property

libs/partners/openai/tests/integration_tests/chat_models/test_base.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def test_openai_invoke() -> None:
233233

234234
def test_stream() -> None:
235235
"""Test streaming tokens from OpenAI."""
236-
llm = ChatOpenAI()
236+
llm = ChatOpenAI(model="gpt-4.1-mini")
237237

238238
full: Optional[BaseMessageChunk] = None
239239
for chunk in llm.stream("I'm Pickle Rick"):
@@ -247,7 +247,7 @@ def test_stream() -> None:
247247
aggregate: Optional[BaseMessageChunk] = None
248248
chunks_with_token_counts = 0
249249
chunks_with_response_metadata = 0
250-
for chunk in llm.stream("Hello", stream_usage=True):
250+
for chunk in llm.stream("Hello"):
251251
assert isinstance(chunk.content, str)
252252
aggregate = chunk if aggregate is None else aggregate + chunk
253253
assert isinstance(chunk, AIMessageChunk)
@@ -310,13 +310,14 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
310310
assert chunks_with_token_counts == 0
311311
assert full.usage_metadata is None
312312

313-
llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
314-
await _test_stream(llm.astream("Hello"), expect_usage=False)
313+
llm = ChatOpenAI(model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
314+
await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
315315
await _test_stream(
316316
llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True
317317
)
318318
await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True)
319319
llm = ChatOpenAI(
320+
model="gpt-4.1-mini",
320321
temperature=0,
321322
max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
322323
model_kwargs={"stream_options": {"include_usage": True}},
@@ -326,7 +327,12 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
326327
llm.astream("Hello", stream_options={"include_usage": False}),
327328
expect_usage=False,
328329
)
329-
llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True) # type: ignore[call-arg]
330+
llm = ChatOpenAI(
331+
model="gpt-4.1-mini",
332+
temperature=0,
333+
max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
334+
stream_usage=True,
335+
)
330336
await _test_stream(llm.astream("Hello"), expect_usage=True)
331337
await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
332338

libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def chat_model_class(self) -> type[BaseChatModel]:
2222

2323
@property
2424
def chat_model_params(self) -> dict:
25-
return {"model": "gpt-4o-mini", "stream_usage": True}
25+
return {"model": "gpt-4o-mini"}
2626

2727
@property
2828
def supports_image_inputs(self) -> bool:

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
'request_timeout': 60.0,
2929
'stop': list([
3030
]),
31+
'stream_usage': True,
3132
'temperature': 0.0,
3233
'validate_base_url': True,
3334
}),

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
'request_timeout': 60.0,
2323
'stop': list([
2424
]),
25+
'stream_usage': True,
2526
'temperature': 0.0,
2627
}),
2728
'lc': 1,

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
'request_timeout': 60.0,
2323
'stop': list([
2424
]),
25+
'stream_usage': True,
2526
'temperature': 0.0,
2627
'use_responses_api': True,
2728
}),

0 commit comments

Comments
 (0)