fix(core): include llm_output in streaming LLMResult

zhangzhefang-github · zhangzhefang-github · commit 4813a9ab34b2 · 2025-11-21T23:53:14.000+08:00
Fixes #34057 Previously, streaming mode did not include the `llm_output` field in the `LLMResult` object passed to `on_llm_end` callbacks. This broke integrations like Langfuse that rely on this field to extract metadata such as model name. This commit ensures that `llm_output` is always present in streaming mode by passing an empty dict `{}` in all streaming methods (`stream` and `astream`) for both `BaseLLM` and `BaseChatModel`. Changes: - Updated `BaseLLM.stream()` to include `llm_output={}` in LLMResult - Updated `BaseLLM.astream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.stream()` to include `llm_output={}` in LLMResult - Updated `BaseChatModel.astream()` to include `llm_output={}` in LLMResult - Added test to verify `llm_output` is present in streaming callbacks
diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
@@ -583,7 +583,7 @@ def stream(
                 run_manager.on_llm_error(err, response=LLMResult(generations=[]))
                 raise err
 
-            run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+            run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
 
     @override
     async def astream(
@@ -712,7 +712,7 @@ async def astream(
             raise err
 
         await run_manager.on_llm_end(
-            LLMResult(generations=[[generation]]),
+            LLMResult(generations=[[generation]], llm_output={}),
         )
 
     # --- Custom methods ---
diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py
@@ -564,7 +564,7 @@ def stream(
                 run_manager.on_llm_error(err, response=LLMResult(generations=[]))
                 raise err
 
-            run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+            run_manager.on_llm_end(LLMResult(generations=[[generation]], llm_output={}))
 
     @override
     async def astream(
@@ -635,7 +635,9 @@ async def astream(
             await run_manager.on_llm_error(err, response=LLMResult(generations=[]))
             raise err
 
-        await run_manager.on_llm_end(LLMResult(generations=[[generation]]))
+        await run_manager.on_llm_end(
+            LLMResult(generations=[[generation]], llm_output={})
+        )
 
     # --- Custom methods ---
 
diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
@@ -7,15 +7,15 @@
 
 from typing_extensions import override
 
-from langchain_core.callbacks.base import AsyncCallbackHandler
+from langchain_core.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
 from langchain_core.language_models import (
     FakeListChatModel,
     FakeMessagesListChatModel,
     GenericFakeChatModel,
     ParrotFakeChatModel,
 )
 from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
+from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
 from tests.unit_tests.stubs import (
     _any_id_ai_message,
     _any_id_ai_message_chunk,
@@ -253,3 +253,43 @@ def test_fake_messages_list_chat_model_sleep_delay() -> None:
     elapsed = time.time() - start
 
     assert elapsed >= sleep_time
+
+
+def test_stream_llm_result_contains_llm_output() -> None:
+    """Test that streaming mode includes llm_output in LLMResult."""
+
+    class LLMResultCaptureHandler(BaseCallbackHandler):
+        """Callback handler that captures LLMResult from on_llm_end."""
+
+        def __init__(self) -> None:
+            self.llm_results: list[LLMResult] = []
+
+        @override
+        def on_llm_end(
+            self,
+            response: LLMResult,
+            *,
+            run_id: UUID,
+            parent_run_id: UUID | None = None,
+            **kwargs: Any,
+        ) -> None:
+            """Capture the LLMResult."""
+            self.llm_results.append(response)
+
+    model = GenericFakeChatModel(messages=cycle([AIMessage(content="hello world")]))
+    handler = LLMResultCaptureHandler()
+
+    # Consume the stream to trigger on_llm_end
+    chunks = list(model.stream("test", config={"callbacks": [handler]}))
+
+    # Verify we got chunks
+    assert len(chunks) > 0
+
+    # Verify on_llm_end was called
+    assert len(handler.llm_results) == 1
+
+    # Verify llm_output field exists in the LLMResult
+    llm_result = handler.llm_results[0]
+    assert hasattr(llm_result, "llm_output")
+    assert llm_result.llm_output is not None
+    assert isinstance(llm_result.llm_output, dict)