@@ -1953,166 +1953,40 @@ def _start_stream(self, prompt: str, **kwargs) -> Generator[str, None, None]:
19531953 # Reset the final display flag for each new conversation
19541954 self ._final_display_shown = False
19551955
1956- # Determine which streaming method to use based on LLM type
1957- if self ._using_custom_llm :
1958- # Use custom LLM streaming
1959- yield from self ._custom_llm_stream (prompt , ** kwargs )
1960- else :
1961- # Use OpenAI client streaming
1962- yield from self ._openai_stream (prompt , ** kwargs )
1956+ # Temporarily disable verbose mode to prevent console output during streaming
1957+ original_verbose = self .verbose
1958+ self .verbose = False
1959+
1960+ # Use the existing chat logic but capture and yield chunks
1961+ # This approach reuses all existing logic without duplication
1962+ response = self .chat (prompt , ** kwargs )
1963+
1964+ # Restore original verbose mode
1965+ self .verbose = original_verbose
1966+
1967+ if response :
1968+ # Simulate streaming by yielding the response in word chunks
1969+ # This provides a consistent streaming experience regardless of LLM type
1970+ words = str (response ).split ()
1971+ chunk_size = max (1 , len (words ) // 20 ) # Split into ~20 chunks for smooth streaming
19631972
1973+ for i in range (0 , len (words ), chunk_size ):
1974+ chunk_words = words [i :i + chunk_size ]
1975+ chunk = ' ' .join (chunk_words )
1976+
1977+ # Add space after chunk unless it's the last one
1978+ if i + chunk_size < len (words ):
1979+ chunk += ' '
1980+
1981+ yield chunk
1982+
19641983 except Exception as e :
19651984 # Graceful fallback to non-streaming if streaming fails
19661985 logging .warning (f"Streaming failed, falling back to regular response: { e } " )
19671986 response = self .chat (prompt , ** kwargs )
19681987 if response :
19691988 yield response
19701989
1971- def _custom_llm_stream (self , prompt : str , ** kwargs ) -> Generator [str , None , None ]:
1972- """Handle streaming for custom LLM providers via LiteLLM."""
1973- try :
1974- # Handle knowledge search
1975- if self .knowledge :
1976- search_results = self .knowledge .search (prompt , agent_id = self .agent_id )
1977- if search_results :
1978- if isinstance (search_results , dict ) and 'results' in search_results :
1979- knowledge_content = "\n " .join ([result ['memory' ] for result in search_results ['results' ]])
1980- else :
1981- knowledge_content = "\n " .join (search_results )
1982- prompt = f"{ prompt } \n \n Knowledge: { knowledge_content } "
1983-
1984- # Handle tools
1985- tools = kwargs .get ('tools' )
1986- if tools is None or (isinstance (tools , list ) and len (tools ) == 0 ):
1987- tool_param = self .tools
1988- else :
1989- tool_param = tools
1990-
1991- # Convert MCP tool objects to OpenAI format if needed
1992- if tool_param is not None :
1993- from ..mcp .mcp import MCP
1994- if isinstance (tool_param , MCP ) and hasattr (tool_param , 'to_openai_tool' ):
1995- logging .debug ("Converting MCP tool to OpenAI format" )
1996- openai_tool = tool_param .to_openai_tool ()
1997- if openai_tool :
1998- if isinstance (openai_tool , list ):
1999- tool_param = openai_tool
2000- else :
2001- tool_param = [openai_tool ]
2002- logging .debug (f"Converted MCP tool: { tool_param } " )
2003-
2004- # Store chat history length for potential rollback
2005- chat_history_length = len (self .chat_history )
2006-
2007- # Normalize prompt content for consistent chat history storage
2008- normalized_content = prompt
2009- if isinstance (prompt , list ):
2010- normalized_content = next ((item ["text" ] for item in prompt if item .get ("type" ) == "text" ), "" )
2011-
2012- # Prevent duplicate messages
2013- if not (self .chat_history and
2014- self .chat_history [- 1 ].get ("role" ) == "user" and
2015- self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2016- self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2017-
2018- # Get streaming response from LLM
2019- # Since LLM.get_response doesn't expose streaming chunks directly,
2020- # we need to get the full response and yield it in chunks
2021- response_text = self .llm_instance .get_response (
2022- prompt = prompt ,
2023- system_prompt = self ._build_system_prompt (tool_param ),
2024- chat_history = self .chat_history ,
2025- temperature = kwargs .get ('temperature' , 0.2 ),
2026- tools = tool_param ,
2027- output_json = kwargs .get ('output_json' ),
2028- output_pydantic = kwargs .get ('output_pydantic' ),
2029- verbose = self .verbose ,
2030- markdown = self .markdown ,
2031- self_reflect = self .self_reflect ,
2032- max_reflect = self .max_reflect ,
2033- min_reflect = self .min_reflect ,
2034- console = self .console ,
2035- agent_name = self .name ,
2036- agent_role = self .role ,
2037- agent_tools = [t .__name__ if hasattr (t , '__name__' ) else str (t ) for t in (tool_param or [])],
2038- task_name = kwargs .get ('task_name' ),
2039- task_description = kwargs .get ('task_description' ),
2040- task_id = kwargs .get ('task_id' ),
2041- execute_tool_fn = self .execute_tool ,
2042- reasoning_steps = kwargs .get ('reasoning_steps' , self .reasoning_steps ),
2043- stream = True
2044- )
2045-
2046- # Add to chat history
2047- self .chat_history .append ({"role" : "assistant" , "content" : response_text })
2048-
2049- # For custom LLMs, we simulate streaming by yielding words in chunks
2050- # This is a fallback since the LLM class doesn't expose individual chunks
2051- words = response_text .split ()
2052- chunk_size = max (1 , len (words ) // 20 ) # Split into roughly 20 chunks
2053-
2054- for i in range (0 , len (words ), chunk_size ):
2055- chunk = ' ' .join (words [i :i + chunk_size ])
2056- if i + chunk_size < len (words ):
2057- chunk += ' '
2058- yield chunk
2059-
2060- except Exception as e :
2061- logging .error (f"Custom LLM streaming error: { e } " )
2062- yield f"Error: { e } "
2063-
2064- def _openai_stream (self , prompt : str , ** kwargs ) -> Generator [str , None , None ]:
2065- """Handle streaming for OpenAI client."""
2066- try :
2067- # Use the new _build_messages helper method
2068- messages , original_prompt = self ._build_messages (prompt , kwargs .get ('temperature' , 0.2 ),
2069- kwargs .get ('output_json' ), kwargs .get ('output_pydantic' ))
2070-
2071- # Store chat history length for potential rollback
2072- chat_history_length = len (self .chat_history )
2073-
2074- # Normalize original_prompt for consistent chat history storage
2075- normalized_content = original_prompt
2076- if isinstance (original_prompt , list ):
2077- normalized_content = next ((item ["text" ] for item in original_prompt if item .get ("type" ) == "text" ), "" )
2078-
2079- # Prevent duplicate messages
2080- if not (self .chat_history and
2081- self .chat_history [- 1 ].get ("role" ) == "user" and
2082- self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2083- self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2084-
2085- # Get streaming response from OpenAI client
2086- response = self ._chat_completion (messages ,
2087- temperature = kwargs .get ('temperature' , 0.2 ),
2088- tools = kwargs .get ('tools' ),
2089- reasoning_steps = kwargs .get ('reasoning_steps' , self .reasoning_steps ),
2090- stream = True , # Enable streaming
2091- task_name = kwargs .get ('task_name' ),
2092- task_description = kwargs .get ('task_description' ),
2093- task_id = kwargs .get ('task_id' ))
2094-
2095- if hasattr (response , '__iter__' ): # Check if it's a streaming response
2096- collected_content = ""
2097- for chunk in response :
2098- if chunk .choices and chunk .choices [0 ].delta and chunk .choices [0 ].delta .content :
2099- chunk_content = chunk .choices [0 ].delta .content
2100- collected_content += chunk_content
2101- yield chunk_content
2102-
2103- # Add completed response to chat history
2104- if collected_content :
2105- self .chat_history .append ({"role" : "assistant" , "content" : collected_content })
2106- else :
2107- # Fallback for non-streaming response
2108- response_text = response .choices [0 ].message .content .strip ()
2109- self .chat_history .append ({"role" : "assistant" , "content" : response_text })
2110- yield response_text
2111-
2112- except Exception as e :
2113- logging .error (f"OpenAI streaming error: { e } " )
2114- yield f"Error: { e } "
2115-
21161990 def execute (self , task , context = None ):
21171991 """Execute a task synchronously - backward compatibility method"""
21181992 if hasattr (task , 'description' ):
0 commit comments