@@ -477,6 +477,49 @@ def _validate_and_filter_ollama_arguments(self, function_name: str, arguments: D
477477 logging .debug (f"[OLLAMA_FIX] Error validating arguments for { function_name } : { e } " )
478478 return arguments
479479
480+ def _handle_ollama_sequential_logic (self , iteration_count : int , accumulated_tool_results : List [Any ],
481+ response_text : str , messages : List [Dict ]) -> tuple :
482+ """
483+ Handle Ollama sequential tool execution logic to prevent premature tool summary generation.
484+
485+ This method implements the two-step process:
486+ 1. After reaching threshold with tool results, add explicit final answer prompt
487+ 2. Only generate tool summary if LLM still doesn't respond after explicit prompt
488+
489+ Args:
490+ iteration_count: Current iteration count
491+ accumulated_tool_results: List of tool results from all iterations
492+ response_text: Current LLM response text
493+ messages: Message history list to potentially modify
494+
495+ Returns:
496+ tuple: (should_break, final_response_text, iteration_count)
497+ - should_break: Whether to break the iteration loop
498+ - final_response_text: Text to use as final response (None if continuing)
499+ - iteration_count: Updated iteration count
500+ """
501+ if not (self ._is_ollama_provider () and iteration_count >= self .OLLAMA_SUMMARY_ITERATION_THRESHOLD ):
502+ return False , None , iteration_count
503+
504+ # For Ollama: if we have meaningful tool results but empty responses,
505+ # give LLM one final chance with explicit prompt for final answer
506+ if accumulated_tool_results and iteration_count == self .OLLAMA_SUMMARY_ITERATION_THRESHOLD :
507+ # Add explicit prompt asking for final answer
508+ messages .append ({
509+ "role" : "user" ,
510+ "content" : self .OLLAMA_FINAL_ANSWER_PROMPT
511+ })
512+ # Continue to next iteration to get the final response
513+ iteration_count += 1
514+ return False , None , iteration_count
515+ else :
516+ # If still no response after final answer prompt, generate summary
517+ tool_summary = self ._generate_ollama_tool_summary (accumulated_tool_results , response_text )
518+ if tool_summary :
519+ return True , tool_summary , iteration_count
520+
521+ return False , None , iteration_count
522+
480523 def _needs_system_message_skip (self ) -> bool :
481524 """Check if this model requires skipping system messages"""
482525 if not self .model :
@@ -1132,11 +1175,15 @@ def get_response(
11321175
11331176 # Special handling for Ollama to prevent infinite loops
11341177 # Only generate summary after multiple iterations to allow sequential execution
1135- if iteration_count >= self .OLLAMA_SUMMARY_ITERATION_THRESHOLD :
1136- tool_summary = self ._generate_ollama_tool_summary (accumulated_tool_results , response_text )
1137- if tool_summary :
1138- final_response_text = tool_summary
1139- break
1178+ should_break , tool_summary_text , iteration_count = self ._handle_ollama_sequential_logic (
1179+ iteration_count , accumulated_tool_results , response_text , messages
1180+ )
1181+ if should_break :
1182+ final_response_text = tool_summary_text
1183+ break
1184+ elif tool_summary_text is None and iteration_count > self .OLLAMA_SUMMARY_ITERATION_THRESHOLD :
1185+ # Continue iteration after adding final answer prompt
1186+ continue
11401187
11411188 # Safety check: prevent infinite loops for any provider
11421189 if iteration_count >= 5 :
@@ -1911,11 +1958,15 @@ async def get_response_async(
19111958
19121959 # Special handling for Ollama to prevent infinite loops
19131960 # Only generate summary after multiple iterations to allow sequential execution
1914- if iteration_count >= self .OLLAMA_SUMMARY_ITERATION_THRESHOLD :
1915- tool_summary = self ._generate_ollama_tool_summary (accumulated_tool_results , response_text )
1916- if tool_summary :
1917- final_response_text = tool_summary
1918- break
1961+ should_break , tool_summary_text , iteration_count = self ._handle_ollama_sequential_logic (
1962+ iteration_count , accumulated_tool_results , response_text , messages
1963+ )
1964+ if should_break :
1965+ final_response_text = tool_summary_text
1966+ break
1967+ elif tool_summary_text is None and iteration_count > self .OLLAMA_SUMMARY_ITERATION_THRESHOLD :
1968+ # Continue iteration after adding final answer prompt
1969+ continue
19191970
19201971 # Safety check: prevent infinite loops for any provider
19211972 if iteration_count >= 5 :
@@ -2417,18 +2468,14 @@ def response(
24172468 )
24182469
24192470 if stream :
2420- if verbose :
2421- with Live (display_generating ("" , start_time ), console = console or self .console , refresh_per_second = 4 ) as live :
2422- for chunk in litellm .completion (** completion_params ):
2423- content = self ._process_streaming_chunk (chunk )
2424- if content :
2425- response_text += content
2426- live .update (display_generating (response_text , start_time ))
2427- else :
2471+ with Live (display_generating ("" , start_time ), console = console or self .console , refresh_per_second = 4 ) as live :
24282472 for chunk in litellm .completion (** completion_params ):
24292473 content = self ._process_streaming_chunk (chunk )
24302474 if content :
24312475 response_text += content
2476+ live .update (display_generating (response_text , start_time ))
2477+ if content :
2478+ response_text += content
24322479 else :
24332480 response = litellm .completion (** completion_params )
24342481 response_text = response .choices [0 ].message .content .strip () if response .choices [0 ].message .content else ""
@@ -2517,18 +2564,14 @@ async def aresponse(
25172564 )
25182565
25192566 if stream :
2520- if verbose :
2521- with Live (display_generating ("" , start_time ), console = console or self .console , refresh_per_second = 4 ) as live :
2522- async for chunk in await litellm .acompletion (** completion_params ):
2523- content = self ._process_streaming_chunk (chunk )
2524- if content :
2525- response_text += content
2526- live .update (display_generating (response_text , start_time ))
2527- else :
2567+ with Live (display_generating ("" , start_time ), console = console or self .console , refresh_per_second = 4 ) as live :
25282568 async for chunk in await litellm .acompletion (** completion_params ):
25292569 content = self ._process_streaming_chunk (chunk )
25302570 if content :
25312571 response_text += content
2572+ live .update (display_generating (response_text , start_time ))
2573+ if content :
2574+ response_text += content
25322575 else :
25332576 response = await litellm .acompletion (** completion_params )
25342577 response_text = response .choices [0 ].message .content .strip () if response .choices [0 ].message .content else ""
0 commit comments