@@ -2037,23 +2037,147 @@ def _start_stream(self, prompt: str, **kwargs) -> Generator[str, None, None]:
20372037 raise
20382038
20392039 else :
2040- # For OpenAI-style models, fall back to the chat method for now
2041- # TODO: Implement OpenAI streaming in future iterations
2042- response = self .chat (prompt , ** kwargs )
2040+ # For OpenAI-style models, implement proper streaming without display
2041+ # Handle knowledge search
2042+ actual_prompt = prompt
2043+ if self .knowledge :
2044+ search_results = self .knowledge .search (prompt , agent_id = self .agent_id )
2045+ if search_results :
2046+ if isinstance (search_results , dict ) and 'results' in search_results :
2047+ knowledge_content = "\n " .join ([result ['memory' ] for result in search_results ['results' ]])
2048+ else :
2049+ knowledge_content = "\n " .join (search_results )
2050+ actual_prompt = f"{ prompt } \n \n Knowledge: { knowledge_content } "
20432051
2044- if response :
2045- # Simulate streaming by yielding the response in word chunks
2046- words = str (response ).split ()
2047- chunk_size = max (1 , len (words ) // 20 )
2052+ # Handle tools properly
2053+ tools = kwargs .get ('tools' , self .tools )
2054+ if tools is None or (isinstance (tools , list ) and len (tools ) == 0 ):
2055+ tool_param = self .tools
2056+ else :
2057+ tool_param = tools
2058+
2059+ # Build messages using the helper method
2060+ messages , original_prompt = self ._build_messages (actual_prompt , kwargs .get ('temperature' , 0.2 ),
2061+ kwargs .get ('output_json' ), kwargs .get ('output_pydantic' ))
2062+
2063+ # Store chat history length for potential rollback
2064+ chat_history_length = len (self .chat_history )
2065+
2066+ # Normalize original_prompt for consistent chat history storage
2067+ normalized_content = original_prompt
2068+ if isinstance (original_prompt , list ):
2069+ normalized_content = next ((item ["text" ] for item in original_prompt if item .get ("type" ) == "text" ), "" )
2070+
2071+ # Prevent duplicate messages in chat history
2072+ if not (self .chat_history and
2073+ self .chat_history [- 1 ].get ("role" ) == "user" and
2074+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2075+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2076+
2077+ try :
2078+ # Check if OpenAI client is available
2079+ if self ._openai_client is None :
2080+ raise ValueError ("OpenAI client is not initialized. Please provide OPENAI_API_KEY or use a custom LLM provider." )
2081+
2082+ # Format tools for OpenAI
2083+ formatted_tools = self ._format_tools_for_completion (tool_param )
2084+
2085+ # Create streaming completion directly without display function
2086+ completion_args = {
2087+ "model" : self .llm ,
2088+ "messages" : messages ,
2089+ "temperature" : kwargs .get ('temperature' , 0.2 ),
2090+ "stream" : True
2091+ }
2092+ if formatted_tools :
2093+ completion_args ["tools" ] = formatted_tools
2094+
2095+ completion = self ._openai_client .sync_client .chat .completions .create (** completion_args )
2096+
2097+ # Stream the response chunks without display
2098+ response_text = ""
2099+ tool_calls_data = []
20482100
2049- for i in range (0 , len (words ), chunk_size ):
2050- chunk_words = words [i :i + chunk_size ]
2051- chunk = ' ' .join (chunk_words )
2101+ for chunk in completion :
2102+ delta = chunk .choices [0 ].delta
20522103
2053- if i + chunk_size < len (words ):
2054- chunk += ' '
2104+ # Handle text content
2105+ if delta .content is not None :
2106+ chunk_content = delta .content
2107+ response_text += chunk_content
2108+ yield chunk_content
20552109
2056- yield chunk
2110+ # Handle tool calls (accumulate but don't yield as chunks)
2111+ if hasattr (delta , 'tool_calls' ) and delta .tool_calls :
2112+ for tool_call_delta in delta .tool_calls :
2113+ # Extend tool_calls_data list to accommodate the tool call index
2114+ while len (tool_calls_data ) <= tool_call_delta .index :
2115+ tool_calls_data .append ({'id' : '' , 'function' : {'name' : '' , 'arguments' : '' }})
2116+
2117+ # Accumulate tool call data
2118+ if tool_call_delta .id :
2119+ tool_calls_data [tool_call_delta .index ]['id' ] = tool_call_delta .id
2120+ if tool_call_delta .function .name :
2121+ tool_calls_data [tool_call_delta .index ]['function' ]['name' ] = tool_call_delta .function .name
2122+ if tool_call_delta .function .arguments :
2123+ tool_calls_data [tool_call_delta .index ]['function' ]['arguments' ] += tool_call_delta .function .arguments
2124+
2125+ # Handle any tool calls that were accumulated
2126+ if tool_calls_data :
2127+ # Add assistant message with tool calls to chat history
2128+ assistant_message = {"role" : "assistant" , "content" : response_text }
2129+ if tool_calls_data :
2130+ assistant_message ["tool_calls" ] = [
2131+ {
2132+ "id" : tc ['id' ],
2133+ "type" : "function" ,
2134+ "function" : tc ['function' ]
2135+ } for tc in tool_calls_data if tc ['id' ]
2136+ ]
2137+ self .chat_history .append (assistant_message )
2138+
2139+ # Execute tool calls and add results to chat history
2140+ for tool_call in tool_calls_data :
2141+ if tool_call ['id' ] and tool_call ['function' ]['name' ]:
2142+ try :
2143+ tool_result = self .execute_tool (
2144+ tool_call ['function' ]['name' ],
2145+ tool_call ['function' ]['arguments' ]
2146+ )
2147+ # Add tool result to chat history
2148+ self .chat_history .append ({
2149+ "role" : "tool" ,
2150+ "tool_call_id" : tool_call ['id' ],
2151+ "content" : str (tool_result )
2152+ })
2153+ except Exception as tool_error :
2154+ logging .error (f"Tool execution error in streaming: { tool_error } " )
2155+ # Add error result to chat history
2156+ self .chat_history .append ({
2157+ "role" : "tool" ,
2158+ "tool_call_id" : tool_call ['id' ],
2159+ "content" : f"Error: { str (tool_error )} "
2160+ })
2161+ else :
2162+ # Add complete response to chat history (text-only response)
2163+ if response_text :
2164+ self .chat_history .append ({"role" : "assistant" , "content" : response_text })
2165+
2166+ except Exception as e :
2167+ # Rollback chat history on error
2168+ self .chat_history = self .chat_history [:chat_history_length ]
2169+ logging .error (f"OpenAI streaming error: { e } " )
2170+ # Fall back to simulated streaming
2171+ response = self .chat (prompt , ** kwargs )
2172+ if response :
2173+ words = str (response ).split ()
2174+ chunk_size = max (1 , len (words ) // 20 )
2175+ for i in range (0 , len (words ), chunk_size ):
2176+ chunk_words = words [i :i + chunk_size ]
2177+ chunk = ' ' .join (chunk_words )
2178+ if i + chunk_size < len (words ):
2179+ chunk += ' '
2180+ yield chunk
20572181
20582182 # Restore original verbose mode
20592183 self .verbose = original_verbose
0 commit comments