fix(core): ensure llm_output is always dict in LLMResult, never None

zhangzhefang-github · zhangzhefang-github · commit b2abc0de6e2a · 2025-11-22T00:58:55.000+08:00
This commit comprehensively fixes issue #34057 where streaming mode was returning LLMResult with llm_output: None instead of llm_output: {}. Root cause: Multiple code paths were creating ChatResult/LLMResult without explicitly setting llm_output={}, causing it to default to None. Changes: - chat_models.py: Added llm_output={} to cache retrieval paths (sync/async), generate_from_stream(), and SimpleChatModel._generate() - llms.py: Added llm_output={} to SimpleLLM._generate() and _agenerate() - fake_chat_models.py: Fixed all 4 fake model _generate() methods - event_stream.py: Improved llm_output serialization in on_llm_end() - test_runnable_events_v1.py: Updated test expectations Tests: - test_astream_events_from_model: PASSED ✓ - test_event_stream_with_simple_chain: PASSED ✓ - All linting checks: PASSED ✓
diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
@@ -206,7 +206,8 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
                 message=message_chunk_to_message(generation.message),
                 generation_info=generation.generation_info,
             )
-        ]
+        ],
+        llm_output={},
     )
 
 
@@ -1135,7 +1136,7 @@ def _generate_with_cache(
                 cache_val = llm_cache.lookup(prompt, llm_string)
                 if isinstance(cache_val, list):
                     converted_generations = self._convert_cached_generations(cache_val)
-                    return ChatResult(generations=converted_generations)
+                    return ChatResult(generations=converted_generations, llm_output={})
             elif self.cache is None:
                 pass
             else:
@@ -1253,7 +1254,7 @@ async def _agenerate_with_cache(
                 cache_val = await llm_cache.alookup(prompt, llm_string)
                 if isinstance(cache_val, list):
                     converted_generations = self._convert_cached_generations(cache_val)
-                    return ChatResult(generations=converted_generations)
+                    return ChatResult(generations=converted_generations, llm_output={})
             elif self.cache is None:
                 pass
             else:
@@ -1742,7 +1743,7 @@ def _generate(
         output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs)
         message = AIMessage(content=output_str)
         generation = ChatGeneration(message=message)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     @abstractmethod
     def _call(
diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -44,7 +44,7 @@ def _generate(
         else:
             self.i = 0
         generation = ChatGeneration(message=response)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     @property
     @override
@@ -213,7 +213,7 @@ async def _agenerate(
         output_str = "fake response"
         message = AIMessage(content=output_str)
         generation = ChatGeneration(message=message)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     @property
     def _llm_type(self) -> str:
@@ -261,7 +261,7 @@ def _generate(
         message = next(self.messages)
         message_ = AIMessage(content=message) if isinstance(message, str) else message
         generation = ChatGeneration(message=message_)
-        return ChatResult(generations=[generation])
+        return ChatResult(generations=[generation], llm_output={})
 
     def _stream(
         self,
@@ -386,7 +386,9 @@ def _generate(
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        return ChatResult(generations=[ChatGeneration(message=messages[-1])])
+        return ChatResult(
+            generations=[ChatGeneration(message=messages[-1])], llm_output={}
+        )
 
     @property
     def _llm_type(self) -> str:
diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py
@@ -1504,7 +1504,7 @@ def _generate(
                 else self._call(prompt, stop=stop, **kwargs)
             )
             generations.append([Generation(text=text)])
-        return LLMResult(generations=generations)
+        return LLMResult(generations=generations, llm_output={})
 
     async def _agenerate(
         self,
@@ -1522,4 +1522,4 @@ async def _agenerate(
                 else await self._acall(prompt, stop=stop, **kwargs)
             )
             generations.append([Generation(text=text)])
-        return LLMResult(generations=generations)
+        return LLMResult(generations=generations, llm_output={})
diff --git a/libs/core/langchain_core/tracers/event_stream.py b/libs/core/langchain_core/tracers/event_stream.py
@@ -486,13 +486,23 @@ async def on_llm_end(
 
         if run_info["run_type"] == "chat_model":
             generations = cast("list[list[ChatGenerationChunk]]", response.generations)
-            for gen in generations:
-                if output != {}:
-                    break
-                for chunk in gen:
-                    output = chunk.message
-                    break
-
+            output = {
+                "generations": [
+                    [
+                        {
+                            "text": chunk.text,
+                            "generation_info": chunk.generation_info,
+                            "type": chunk.type,
+                            "message": chunk.message,
+                        }
+                        for chunk in gen
+                    ]
+                    for gen in generations
+                ],
+                "llm_output": response.llm_output,
+                "run": None,
+                "type": "LLMResult",
+            }
             event = "on_chat_model_end"
         elif run_info["run_type"] == "llm":
             generations = cast("list[list[GenerationChunk]]", response.generations)
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
@@ -1809,7 +1809,7 @@ async def test_with_llm() -> None:
                                 }
                             ]
                         ],
-                        "llm_output": None,
+                        "llm_output": {},
                         "run": None,
                         "type": "LLMResult",
                     },

Original file line number	Diff line number	Diff line change
`@@ -1504,7 +1504,7 @@ def _generate(`
`1504`	`1504`	`else self._call(prompt, stop=stop, **kwargs)`
`1505`	`1505`	`)`
`1506`	`1506`	`generations.append([Generation(text=text)])`
`1507`		`- return LLMResult(generations=generations)`
	`1507`	`+ return LLMResult(generations=generations, llm_output={})`
`1508`	`1508`
`1509`	`1509`	`async def _agenerate(`
`1510`	`1510`	`self,`
`@@ -1522,4 +1522,4 @@ async def _agenerate(`
`1522`	`1522`	`else await self._acall(prompt, stop=stop, **kwargs)`
`1523`	`1523`	`)`
`1524`	`1524`	`generations.append([Generation(text=text)])`
`1525`		`- return LLMResult(generations=generations)`
	`1525`	`+ return LLMResult(generations=generations, llm_output={})`
Original file line number	Diff line number	Diff line change
`@@ -1809,7 +1809,7 @@ async def test_with_llm() -> None:`
`1809`	`1809`	`}`
`1810`	`1810`	`]`
`1811`	`1811`	`],`
`1812`		`- "llm_output": None,`
	`1812`	`+ "llm_output": {},`
`1813`	`1813`	`"run": None,`
`1814`	`1814`	`"type": "LLMResult",`
`1815`	`1815`	`},`