@@ -1937,7 +1937,191 @@ def run(self):
19371937
19381938 def start (self , prompt : str , ** kwargs ):
19391939 """Start the agent with a prompt. This is a convenience method that wraps chat()."""
1940- return self .chat (prompt , ** kwargs )
1940+ # Check if streaming is enabled and user wants streaming chunks
1941+ if self .stream and kwargs .get ('stream' , True ):
1942+ return self ._start_stream (prompt , ** kwargs )
1943+ else :
1944+ return self .chat (prompt , ** kwargs )
1945+
1946+ def _start_stream (self , prompt : str , ** kwargs ):
1947+ """Generator method that yields streaming chunks from the agent."""
1948+ # Import here to avoid circular imports
1949+ from typing import Generator
1950+
1951+ # Reset the final display flag for each new conversation
1952+ self ._final_display_shown = False
1953+
1954+ # Search for existing knowledge if any knowledge is provided
1955+ if self .knowledge :
1956+ search_results = self .knowledge .search (prompt , agent_id = self .agent_id )
1957+ if search_results :
1958+ # Check if search_results is a list of dictionaries or strings
1959+ if isinstance (search_results , dict ) and 'results' in search_results :
1960+ # Extract memory content from the results
1961+ knowledge_content = "\n " .join ([result ['memory' ] for result in search_results ['results' ]])
1962+ else :
1963+ # If search_results is a list of strings, join them directly
1964+ knowledge_content = "\n " .join (search_results )
1965+
1966+ # Append found knowledge to the prompt
1967+ prompt = f"{ prompt } \n \n Knowledge: { knowledge_content } "
1968+
1969+ # Get streaming response using the internal streaming method
1970+ for chunk in self ._chat_stream (prompt , ** kwargs ):
1971+ yield chunk
1972+
1973+ def _chat_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
1974+ """Internal streaming method that yields chunks from the LLM response."""
1975+
1976+ # Use the same logic as chat() but yield chunks instead of returning final response
1977+ if self ._using_custom_llm :
1978+ # For custom LLM, yield chunks from the LLM instance
1979+ for chunk in self ._custom_llm_stream (prompt , temperature , tools , output_json , output_pydantic , reasoning_steps , ** kwargs ):
1980+ yield chunk
1981+ else :
1982+ # For standard OpenAI client, yield chunks from the streaming response
1983+ for chunk in self ._openai_stream (prompt , temperature , tools , output_json , output_pydantic , reasoning_steps , ** kwargs ):
1984+ yield chunk
1985+
1986+ def _custom_llm_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
1987+ """Handle streaming for custom LLM instances."""
1988+ try :
1989+ # Special handling for MCP tools when using provider/model format
1990+ if tools is None or (isinstance (tools , list ) and len (tools ) == 0 ):
1991+ tool_param = self .tools
1992+ else :
1993+ tool_param = tools
1994+
1995+ # Convert MCP tool objects to OpenAI format if needed
1996+ if tool_param is not None :
1997+ from ..mcp .mcp import MCP
1998+ if isinstance (tool_param , MCP ) and hasattr (tool_param , 'to_openai_tool' ):
1999+ openai_tool = tool_param .to_openai_tool ()
2000+ if openai_tool :
2001+ if isinstance (openai_tool , list ):
2002+ tool_param = openai_tool
2003+ else :
2004+ tool_param = [openai_tool ]
2005+
2006+ # Store chat history length for potential rollback
2007+ chat_history_length = len (self .chat_history )
2008+
2009+ # Normalize prompt content for consistent chat history storage
2010+ normalized_content = prompt
2011+ if isinstance (prompt , list ):
2012+ normalized_content = next ((item ["text" ] for item in prompt if item .get ("type" ) == "text" ), "" )
2013+
2014+ # Prevent duplicate messages
2015+ if not (self .chat_history and
2016+ self .chat_history [- 1 ].get ("role" ) == "user" and
2017+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2018+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2019+
2020+ # Get streaming response from LLM instance
2021+ if hasattr (self .llm_instance , 'get_response_stream' ):
2022+ # Use streaming method if available
2023+ stream_response = self .llm_instance .get_response_stream (
2024+ prompt = prompt ,
2025+ system_prompt = self ._build_system_prompt (tools ),
2026+ chat_history = self .chat_history ,
2027+ temperature = temperature ,
2028+ tools = tool_param ,
2029+ output_json = output_json ,
2030+ output_pydantic = output_pydantic ,
2031+ verbose = self .verbose ,
2032+ markdown = self .markdown ,
2033+ console = self .console ,
2034+ agent_name = self .name ,
2035+ agent_role = self .role ,
2036+ agent_tools = [t .__name__ if hasattr (t , '__name__' ) else str (t ) for t in (tools if tools is not None else self .tools )],
2037+ reasoning_steps = reasoning_steps ,
2038+ execute_tool_fn = self .execute_tool
2039+ )
2040+
2041+ accumulated_response = ""
2042+ for chunk in stream_response :
2043+ accumulated_response += chunk
2044+ yield chunk
2045+
2046+ # Add final response to chat history
2047+ self .chat_history .append ({"role" : "assistant" , "content" : accumulated_response })
2048+
2049+ else :
2050+ # Fallback to regular response if streaming not available
2051+ response_text = self .llm_instance .get_response (
2052+ prompt = prompt ,
2053+ system_prompt = self ._build_system_prompt (tools ),
2054+ chat_history = self .chat_history ,
2055+ temperature = temperature ,
2056+ tools = tool_param ,
2057+ output_json = output_json ,
2058+ output_pydantic = output_pydantic ,
2059+ verbose = self .verbose ,
2060+ markdown = self .markdown ,
2061+ console = self .console ,
2062+ agent_name = self .name ,
2063+ agent_role = self .role ,
2064+ agent_tools = [t .__name__ if hasattr (t , '__name__' ) else str (t ) for t in (tools if tools is not None else self .tools )],
2065+ reasoning_steps = reasoning_steps ,
2066+ execute_tool_fn = self .execute_tool ,
2067+ stream = True
2068+ )
2069+
2070+ self .chat_history .append ({"role" : "assistant" , "content" : response_text })
2071+ # Yield the complete response as a single chunk
2072+ yield response_text
2073+
2074+ except Exception as e :
2075+ # Rollback chat history on error
2076+ self .chat_history = self .chat_history [:chat_history_length ]
2077+ yield f"Error: { str (e )} "
2078+
2079+ def _openai_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
2080+ """Handle streaming for standard OpenAI client."""
2081+ try :
2082+ # Use the new _build_messages helper method
2083+ messages , original_prompt = self ._build_messages (prompt , temperature , output_json , output_pydantic )
2084+
2085+ # Store chat history length for potential rollback
2086+ chat_history_length = len (self .chat_history )
2087+
2088+ # Normalize original_prompt for consistent chat history storage
2089+ normalized_content = original_prompt
2090+ if isinstance (original_prompt , list ):
2091+ normalized_content = next ((item ["text" ] for item in original_prompt if item .get ("type" ) == "text" ), "" )
2092+
2093+ # Prevent duplicate messages
2094+ if not (self .chat_history and
2095+ self .chat_history [- 1 ].get ("role" ) == "user" and
2096+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2097+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2098+
2099+ # Get streaming response from OpenAI client
2100+ if self ._openai_client is None :
2101+ raise ValueError ("OpenAI client is not initialized. Please provide OPENAI_API_KEY or use a custom LLM provider." )
2102+
2103+ # Stream the response using OpenAI client
2104+ accumulated_response = ""
2105+ for chunk in self ._openai_client .chat_completion_with_tools_stream (
2106+ messages = messages ,
2107+ model = self .llm ,
2108+ temperature = temperature ,
2109+ tools = self ._format_tools_for_completion (tools ),
2110+ execute_tool_fn = self .execute_tool ,
2111+ reasoning_steps = reasoning_steps ,
2112+ verbose = self .verbose ,
2113+ max_iterations = 10
2114+ ):
2115+ accumulated_response += chunk
2116+ yield chunk
2117+
2118+ # Add the accumulated response to chat history
2119+ self .chat_history .append ({"role" : "assistant" , "content" : accumulated_response })
2120+
2121+ except Exception as e :
2122+ # Rollback chat history on error
2123+ self .chat_history = self .chat_history [:chat_history_length ]
2124+ yield f"Error: { str (e )} "
19412125
19422126 def execute (self , task , context = None ):
19432127 """Execute a task synchronously - backward compatibility method"""
0 commit comments