Skip to content

Commit 4813a9a

Browse files
fix(core): include llm_output in streaming LLMResult
Fixes #34057 Previously, streaming mode did not include the `llm_output` field in the `LLMResult` object passed to `on_llm_end` callbacks. This broke integrations like Langfuse that rely on this field to extract metadata such as model name. This commit ensures that `llm_output` is always present in streaming mode by passing an empty dict `{}` in all streaming methods (`stream` and `astream`) for both `BaseLLM` and `BaseChatModel`. Changes: - Updated `BaseLLM.stream()` to include `llm_output={}` in LLMResult - Updated `BaseLLM.astream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.stream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.astream()` to include `llm_output={}` in LLMResult - Added test to verify `llm_output` is present in streaming callbacks
1 parent ee3373a commit 4813a9a

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

libs/core/langchain_core/language_models/chat_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ def stream(
583583
run_manager.on_llm_error(err, response=LLMResult(generations=[]))
584584
raise err
585585

586-
run_manager.on_llm_end(LLMResult(generations=[[generation]]))
586+
run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
587587

588588
@override
589589
async def astream(
@@ -712,7 +712,7 @@ async def astream(
712712
raise err
713713

714714
await run_manager.on_llm_end(
715-
LLMResult(generations=[[generation]]),
715+
LLMResult(generations=[[generation]], llm_output={}),
716716
)
717717

718718
# --- Custom methods ---

libs/core/langchain_core/language_models/llms.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def stream(
564564
run_manager.on_llm_error(err, response=LLMResult(generations=[]))
565565
raise err
566566

567-
run_manager.on_llm_end(LLMResult(generations=[[generation]]))
567+
run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
568568

569569
@override
570570
async def astream(
@@ -635,7 +635,9 @@ async def astream(
635635
await run_manager.on_llm_error(err, response=LLMResult(generations=[]))
636636
raise err
637637

638-
await run_manager.on_llm_end(LLMResult(generations=[[generation]]))
638+
await run_manager.on_llm_end(
639+
LLMResult(generations=[[generation]], llm_output={})
640+
)
639641

640642
# --- Custom methods ---
641643

libs/core/tests/unit_tests/fake/test_fake_chat_model.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77

88
from typing_extensions import override
99

10-
from langchain_core.callbacks.base import AsyncCallbackHandler
10+
from langchain_core.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
1111
from langchain_core.language_models import (
1212
FakeListChatModel,
1313
FakeMessagesListChatModel,
1414
GenericFakeChatModel,
1515
ParrotFakeChatModel,
1616
)
1717
from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
18-
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
18+
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
1919
from tests.unit_tests.stubs import (
2020
_any_id_ai_message,
2121
_any_id_ai_message_chunk,
@@ -253,3 +253,43 @@ def test_fake_messages_list_chat_model_sleep_delay() -> None:
253253
elapsed = time.time() - start
254254

255255
assert elapsed >= sleep_time
256+
257+
258+
def test_stream_llm_result_contains_llm_output() -> None:
259+
"""Test that streaming mode includes llm_output in LLMResult."""
260+
261+
class LLMResultCaptureHandler(BaseCallbackHandler):
262+
"""Callback handler that captures LLMResult from on_llm_end."""
263+
264+
def __init__(self) -> None:
265+
self.llm_results: list[LLMResult] = []
266+
267+
@override
268+
def on_llm_end(
269+
self,
270+
response: LLMResult,
271+
*,
272+
run_id: UUID,
273+
parent_run_id: UUID | None = None,
274+
**kwargs: Any,
275+
) -> None:
276+
"""Capture the LLMResult."""
277+
self.llm_results.append(response)
278+
279+
model = GenericFakeChatModel(messages=cycle([AIMessage(content="hello world")]))
280+
handler = LLMResultCaptureHandler()
281+
282+
# Consume the stream to trigger on_llm_end
283+
chunks = list(model.stream("test", config={"callbacks": [handler]}))
284+
285+
# Verify we got chunks
286+
assert len(chunks) > 0
287+
288+
# Verify on_llm_end was called
289+
assert len(handler.llm_results) == 1
290+
291+
# Verify llm_output field exists in the LLMResult
292+
llm_result = handler.llm_results[0]
293+
assert hasattr(llm_result, "llm_output")
294+
assert llm_result.llm_output is not None
295+
assert isinstance(llm_result.llm_output, dict)

0 commit comments

Comments
 (0)