@@ -2037,23 +2037,97 @@ def _start_stream(self, prompt: str, **kwargs) -> Generator[str, None, None]:
20372037 raise
20382038
20392039 else :
2040- # For OpenAI-style models, fall back to the chat method for now
2041- # TODO: Implement OpenAI streaming in future iterations
2042- response = self .chat (prompt , ** kwargs )
2040+ # For OpenAI-style models, implement proper streaming without display
2041+ # Handle knowledge search
2042+ actual_prompt = prompt
2043+ if self .knowledge :
2044+ search_results = self .knowledge .search (prompt , agent_id = self .agent_id )
2045+ if search_results :
2046+ if isinstance (search_results , dict ) and 'results' in search_results :
2047+ knowledge_content = "\n " .join ([result ['memory' ] for result in search_results ['results' ]])
2048+ else :
2049+ knowledge_content = "\n " .join (search_results )
2050+ actual_prompt = f"{ prompt } \n \n Knowledge: { knowledge_content } "
2051+
2052+ # Handle tools properly
2053+ tools = kwargs .get ('tools' , self .tools )
2054+ if tools is None or (isinstance (tools , list ) and len (tools ) == 0 ):
2055+ tool_param = self .tools
2056+ else :
2057+ tool_param = tools
20432058
2044- if response :
2045- # Simulate streaming by yielding the response in word chunks
2046- words = str (response ).split ()
2047- chunk_size = max (1 , len (words ) // 20 )
2059+ # Build messages using the helper method
2060+ messages , original_prompt = self ._build_messages (actual_prompt , kwargs .get ('temperature' , 0.2 ),
2061+ kwargs .get ('output_json' ), kwargs .get ('output_pydantic' ))
2062+
2063+ # Store chat history length for potential rollback
2064+ chat_history_length = len (self .chat_history )
2065+
2066+ # Normalize original_prompt for consistent chat history storage
2067+ normalized_content = original_prompt
2068+ if isinstance (original_prompt , list ):
2069+ normalized_content = next ((item ["text" ] for item in original_prompt if item .get ("type" ) == "text" ), "" )
2070+
2071+ # Prevent duplicate messages in chat history
2072+ if not (self .chat_history and
2073+ self .chat_history [- 1 ].get ("role" ) == "user" and
2074+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2075+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2076+
2077+ try :
2078+ # Check if OpenAI client is available
2079+ if self ._openai_client is None :
2080+ raise ValueError ("OpenAI client is not initialized. Please provide OPENAI_API_KEY or use a custom LLM provider." )
20482081
2049- for i in range (0 , len (words ), chunk_size ):
2050- chunk_words = words [i :i + chunk_size ]
2051- chunk = ' ' .join (chunk_words )
2052-
2053- if i + chunk_size < len (words ):
2054- chunk += ' '
2082+ # Format tools for OpenAI
2083+ formatted_tools = self ._format_tools_for_completion (tool_param )
2084+
2085+ # Create streaming completion directly without display function
2086+ if formatted_tools :
2087+ # With tools - need to handle tool calls
2088+ completion = self ._openai_client .client .chat .completions .create (
2089+ model = self .llm ,
2090+ messages = messages ,
2091+ temperature = kwargs .get ('temperature' , 0.2 ),
2092+ tools = formatted_tools ,
2093+ stream = True
2094+ )
2095+ else :
2096+ # Simple text completion
2097+ completion = self ._openai_client .client .chat .completions .create (
2098+ model = self .llm ,
2099+ messages = messages ,
2100+ temperature = kwargs .get ('temperature' , 0.2 ),
2101+ stream = True
2102+ )
2103+
2104+ # Stream the response chunks without display
2105+ response_text = ""
2106+ for chunk in completion :
2107+ if chunk .choices [0 ].delta .content is not None :
2108+ chunk_content = chunk .choices [0 ].delta .content
2109+ response_text += chunk_content
2110+ yield chunk_content
2111+
2112+ # Add complete response to chat history
2113+ if response_text :
2114+ self .chat_history .append ({"role" : "assistant" , "content" : response_text })
20552115
2056- yield chunk
2116+ except Exception as e :
2117+ # Rollback chat history on error
2118+ self .chat_history = self .chat_history [:chat_history_length ]
2119+ logging .error (f"OpenAI streaming error: { e } " )
2120+ # Fall back to simulated streaming
2121+ response = self .chat (prompt , ** kwargs )
2122+ if response :
2123+ words = str (response ).split ()
2124+ chunk_size = max (1 , len (words ) // 20 )
2125+ for i in range (0 , len (words ), chunk_size ):
2126+ chunk_words = words [i :i + chunk_size ]
2127+ chunk = ' ' .join (chunk_words )
2128+ if i + chunk_size < len (words ):
2129+ chunk += ' '
2130+ yield chunk
20572131
20582132 # Restore original verbose mode
20592133 self .verbose = original_verbose
0 commit comments