cancellation - openai (strands-agents#73)

pgrayy · web-flow · commit 1b5f9a61cdf7 · 2025-11-21T15:32:09.000-05:00
diff --git a/src/strands/experimental/bidi/models/novasonic.py b/src/strands/experimental/bidi/models/novasonic.py
@@ -147,7 +147,7 @@ async def start(
             RuntimeError: If user calls start again without first stopping.
         """
         if self._connection_id:
-            raise RuntimeError("call stop before starting again")
+            raise RuntimeError("model already started | call stop before starting again")
 
         logger.debug("nova connection starting")
 
@@ -233,7 +233,7 @@ async def receive(self) -> AsyncIterable[BidiOutputEvent]:  # type: ignore[overr
             RuntimeError: If start has not been called.
         """
         if not self._connection_id:
-            raise RuntimeError("must call start")
+            raise RuntimeError("model not started | call start before receiving")
 
         logger.debug("nova event stream starting")
         yield BidiConnectionStartEvent(connection_id=self._connection_id, model=self.model_id)
@@ -260,7 +260,7 @@ async def send(self, content: BidiInputEvent | ToolResultEvent) -> None:
             ValueError: If content type not supported (e.g., image content).
         """
         if not self._connection_id:
-            raise RuntimeError("must call start")
+            raise RuntimeError("model not started | call start before sending")
 
         if isinstance(content, BidiTextInputEvent):
             await self._send_text_content(content.text)
@@ -271,7 +271,7 @@ async def send(self, content: BidiInputEvent | ToolResultEvent) -> None:
             if tool_result:
                 await self._send_tool_result(tool_result)
         else:
-            raise ValueError(f"content_type={type(content)} | content not supported by nova sonic")
+            raise ValueError(f"content_type={type(content)} | content not supported")
 
     async def _start_audio_connection(self) -> None:
         """Internal: Start audio input connection (call once before sending audio chunks)."""
diff --git a/src/strands/experimental/bidi/models/openai.py b/src/strands/experimental/bidi/models/openai.py
@@ -16,13 +16,11 @@
 from ....types._events import ToolResultEvent, ToolUseStreamEvent
 from ....types.content import Messages
 from ....types.tools import ToolResult, ToolSpec, ToolUse
+from .._async import stop_all
 from ..types.events import (
     BidiAudioInputEvent,
     BidiAudioStreamEvent,
-    BidiConnectionCloseEvent,
     BidiConnectionStartEvent,
-    BidiErrorEvent,
-    BidiImageInputEvent,
     BidiInputEvent,
     BidiInterruptionEvent,
     BidiOutputEvent,
@@ -70,6 +68,8 @@ class BidiOpenAIRealtimeModel(BidiModel):
     function calling, and event conversion to Strands format.
     """
 
+    _websocket: ClientConnection
+
     def __init__(
         self,
         model: str = DEFAULT_MODEL,
@@ -104,9 +104,7 @@ def __init__(
                 )
 
         # Connection state (initialized in start())
-        self.websocket: ClientConnection
-        self.connection_id: str
-        self._active: bool = False
+        self._connection_id: str | None = None
 
         self._function_call_buffer: dict[str, Any] = {}
 
@@ -127,45 +125,35 @@ async def start(
             messages: Conversation history to initialize with.
             **kwargs: Additional configuration options.
         """
-        if self._active:
-            raise RuntimeError("Connection already active. Close the existing connection before creating a new one.")
+        if self._connection_id:
+            raise RuntimeError("model already started | call stop before starting again")
 
         logger.info("openai realtime connection starting")
 
-        try:
-            # Initialize connection state
-            self.connection_id = str(uuid.uuid4())
-            self._active = True
-            self._function_call_buffer = {}
-
-            # Establish WebSocket connection
-            url = f"{OPENAI_REALTIME_URL}?model={self.model}"
+        # Initialize connection state
+        self._connection_id = str(uuid.uuid4())
 
-            headers = [("Authorization", f"Bearer {self.api_key}")]
-            if self.organization:
-                headers.append(("OpenAI-Organization", self.organization))
-            if self.project:
-                headers.append(("OpenAI-Project", self.project))
+        self._function_call_buffer = {}
 
-            self.websocket = await websockets.connect(url, additional_headers=headers)
-            logger.info("connection_id=<%s> | websocket connected successfully", self.connection_id)
+        # Establish WebSocket connection
+        url = f"{OPENAI_REALTIME_URL}?model={self.model}"
 
-            # Configure session
-            session_config = self._build_session_config(system_prompt, tools)
-            await self._send_event({"type": "session.update", "session": session_config})
+        headers = [("Authorization", f"Bearer {self.api_key}")]
+        if self.organization:
+            headers.append(("OpenAI-Organization", self.organization))
+        if self.project:
+            headers.append(("OpenAI-Project", self.project))
 
-            # Add conversation history if provided
-            if messages:
-                await self._add_conversation_history(messages)
+        self._websocket = await websockets.connect(url, additional_headers=headers)
+        logger.info("connection_id=<%s> | websocket connected successfully", self._connection_id)
 
-        except Exception as e:
-            self._active = False
-            logger.error("error=<%s> | openai connection failed", e)
-            raise
+        # Configure session
+        session_config = self._build_session_config(system_prompt, tools)
+        await self._send_event({"type": "session.update", "session": session_config})
 
-    def _require_active(self) -> bool:
-        """Check if session is active."""
-        return self._active
+        # Add conversation history if provided
+        if messages:
+            await self._add_conversation_history(messages)
 
     def _create_text_event(self, text: str, role: str, is_final: bool = True) -> BidiTranscriptStreamEvent:
         """Create standardized transcript event.
@@ -275,27 +263,16 @@ async def _add_conversation_history(self, messages: Messages) -> None:
 
     async def receive(self) -> AsyncIterable[BidiOutputEvent]:  # type: ignore
         """Receive OpenAI events and convert to Strands TypedEvent format."""
-        # Emit connection start event
-        yield BidiConnectionStartEvent(connection_id=self.connection_id, model=self.model)
-
-        try:
-            while self._active:
-                async for message in self.websocket:
-                    if not self._active:
-                        break  # type: ignore
+        if not self._connection_id:
+            raise RuntimeError("model not started | call start before receiving")
 
-                    openai_event = json.loads(message)
+        yield BidiConnectionStartEvent(connection_id=self._connection_id, model=self.model)
 
-                    for event in self._convert_openai_event(openai_event) or []:
-                        yield event
+        async for message in self._websocket:
+            openai_event = json.loads(message)
 
-        except Exception as e:
-            logger.error("error=<%s> | error receiving openai realtime event", e)
-            yield BidiErrorEvent(error=e)
-        finally:
-            # Emit connection close event
-            yield BidiConnectionCloseEvent(connection_id=self.connection_id, reason="complete")
-            self._active = False
+            for event in self._convert_openai_event(openai_event) or []:
+                yield event
 
     def _convert_openai_event(self, openai_event: dict[str, Any]) -> list[BidiOutputEvent] | None:
         """Convert OpenAI events to Strands TypedEvent format."""
@@ -557,26 +534,24 @@ async def send(
 
         Args:
             content: Typed event (BidiTextInputEvent, BidiAudioInputEvent, BidiImageInputEvent, or ToolResultEvent).
+
+        Raises:
+            ValueError: If content type not supported (e.g., image content).
         """
-        if not self._require_active():
-            return
-
-        try:
-            # Note: TypedEvent inherits from dict, so isinstance checks for TypedEvent must come first
-            if isinstance(content, BidiTextInputEvent):
-                await self._send_text_content(content.text)
-            elif isinstance(content, BidiAudioInputEvent):
-                await self._send_audio_content(content)
-            elif isinstance(content, BidiImageInputEvent):
-                # BidiImageInputEvent - not supported by OpenAI Realtime yet
-                logger.warning("Image input not supported by OpenAI Realtime API")
-            elif isinstance(content, ToolResultEvent):
-                tool_result = content.get("tool_result")
-                if tool_result:
-                    await self._send_tool_result(tool_result)
-        except Exception as e:
-            logger.error("error=<%s> | error sending content to openai", e)
-            raise  # Propagate exception for debugging in experimental code
+        if not self._connection_id:
+            raise RuntimeError("model not started | call start before sending")
+
+        # Note: TypedEvent inherits from dict, so isinstance checks for TypedEvent must come first
+        if isinstance(content, BidiTextInputEvent):
+            await self._send_text_content(content.text)
+        elif isinstance(content, BidiAudioInputEvent):
+            await self._send_audio_content(content)
+        elif isinstance(content, ToolResultEvent):
+            tool_result = content.get("tool_result")
+            if tool_result:
+                await self._send_tool_result(tool_result)
+        else:
+            raise ValueError(f"content_type={type(content)} | content not supported")
 
     async def _send_audio_content(self, audio_input: BidiAudioInputEvent) -> None:
         """Internal: Send audio content to OpenAI for processing."""
@@ -599,7 +574,7 @@ async def _send_tool_result(self, tool_result: ToolResult) -> None:
 
         logger.debug("tool_use_id=<%s> | sending openai tool result", tool_use_id)
 
-        # Extract result content
+        # TODO: We need to extract all content and content types
         result_data: dict[Any, Any] | str = {}
         if "content" in tool_result:
             # Extract text from content blocks
@@ -616,25 +591,23 @@ async def _send_tool_result(self, tool_result: ToolResult) -> None:
 
     async def stop(self) -> None:
         """Close session and cleanup resources."""
-        if not self._active:
-            return
-
         logger.debug("openai realtime connection cleanup starting")
-        self._active = False
 
-        try:
-            await self.websocket.close()
-        except Exception as e:
-            logger.warning("error=<%s> | error closing openai realtime websocket", e)
+        async def stop_websocket() -> None:
+            if not hasattr(self, "_websocket"):
+                return
+
+            await self._websocket.close()
+
+        async def stop_connection() -> None:
+            self._connection_id = None
+
+        await stop_all(stop_websocket, stop_connection)
 
         logger.debug("openai realtime connection closed")
 
     async def _send_event(self, event: dict[str, Any]) -> None:
         """Send event to OpenAI via WebSocket."""
-        try:
-            message = json.dumps(event)
-            await self.websocket.send(message)
-            logger.debug("event_type=<%s> | openai event sent", event.get("type"))
-        except Exception as e:
-            logger.error("error=<%s> | error sending openai event", e)
-            raise
+        message = json.dumps(event)
+        await self._websocket.send(message)
+        logger.debug("event_type=<%s> | openai event sent", event.get("type"))
diff --git a/tests/strands/experimental/bidi/models/test_novasonic.py b/tests/strands/experimental/bidi/models/test_novasonic.py
@@ -166,7 +166,7 @@ async def test_send_edge_cases(nova_model):
         mime_type="image/jpeg",
     )
 
-    with pytest.raises(ValueError, match=r"content not supported by nova sonic"):
+    with pytest.raises(ValueError, match=r"content not supported"):
         await nova_model.send(image_event)
 
     await nova_model.stop()
diff --git a/tests/strands/experimental/bidi/models/test_openai.py b/tests/strands/experimental/bidi/models/test_openai.py
@@ -93,8 +93,6 @@ def test_model_initialization(api_key, model_name):
     model_default = BidiOpenAIRealtimeModel(api_key="test-key")
     assert model_default.model == "gpt-realtime"
     assert model_default.api_key == "test-key"
-    assert model_default._active is False
-    assert model_default.websocket is None
 
     # Test with custom model
     model_custom = BidiOpenAIRealtimeModel(model=model_name, api_key=api_key)
@@ -129,14 +127,12 @@ async def test_connection_lifecycle(mock_websockets_connect, model, system_promp
 
     # Test basic connection
     await model.start()
-    assert model._active is True
-    assert model.connection_id is not None
-    assert model.websocket == mock_ws
+    assert model._connection_id is not None
+    assert model._websocket == mock_ws
     mock_connect.assert_called_once()
 
     # Test close
     await model.stop()
-    assert model._active is False
     mock_ws.close.assert_called_once()
 
     # Test connection with system prompt
@@ -202,20 +198,20 @@ async def async_connect(*args, **kwargs):
     # Test double connection
     model2 = BidiOpenAIRealtimeModel(model=model_name, api_key=api_key)
     await model2.start()
-    with pytest.raises(RuntimeError, match="Connection already active"):
+    with pytest.raises(RuntimeError, match=r"call stop before starting again"):
         await model2.start()
     await model2.stop()
 
     # Test close when not connected
     model3 = BidiOpenAIRealtimeModel(model=model_name, api_key=api_key)
     await model3.stop()  # Should not raise
 
-    # Test close error handling (should not raise, just log)
+    # Test close error
     model4 = BidiOpenAIRealtimeModel(model=model_name, api_key=api_key)
     await model4.start()
     mock_ws.close.side_effect = Exception("Close failed")
-    await model4.stop()  # Should not raise
-    assert model4._active is False
+    with pytest.raises(ExceptionGroup):  # noqa: F821
+        await model4.stop()
 
 
 # Send Method Tests
@@ -279,7 +275,8 @@ async def test_send_edge_cases(mock_websockets_connect, model):
 
     # Test send when inactive
     text_input = BidiTextInputEvent(text="Hello", role="user")
-    await model.send(text_input)
+    with pytest.raises(RuntimeError, match=r"call start before sending"):
+        await model.send(text_input)
     mock_ws.send.assert_not_called()
 
     # Test image input (not supported, base64 encoded, no encoding parameter)
@@ -289,15 +286,8 @@ async def test_send_edge_cases(mock_websockets_connect, model):
         image=image_b64,
         mime_type="image/jpeg",
     )
-    with unittest.mock.patch("strands.experimental.bidi.models.openai.logger") as mock_logger:
+    with pytest.raises(ValueError, match=r"content not supported"):
         await model.send(image_input)
-        mock_logger.warning.assert_called_with("Image input not supported by OpenAI Realtime API")
-
-    # Test unknown content type
-    unknown_content = {"unknown_field": "value"}
-    with unittest.mock.patch("strands.experimental.bidi.models.openai.logger") as mock_logger:
-        await model.send(unknown_content)
-        assert mock_logger.warning.called
 
     await model.stop()
 
@@ -318,7 +308,7 @@ async def test_receive_lifecycle_events(mock_websockets_connect, model):
 
     # First event should be connection start (new TypedEvent format)
     assert first_event.get("type") == "bidi_connection_start"
-    assert first_event.get("connection_id") == model.connection_id
+    assert first_event.get("connection_id") == model._connection_id
     assert first_event.get("model") == model.model
 
     # Close to trigger session end
@@ -332,9 +322,6 @@ async def test_receive_lifecycle_events(mock_websockets_connect, model):
     except StopAsyncIteration:
         pass
 
-    # Last event should be connection close (new TypedEvent format)
-    assert events[-1].get("type") == "bidi_connection_close"
-
 
 @pytest.mark.asyncio
 async def test_event_conversion(mock_websockets_connect, model):
@@ -463,12 +450,6 @@ def test_tool_conversion(model, tool_spec):
 
 def test_helper_methods(model):
     """Test various helper methods."""
-    # Test _require_active
-    assert model._require_active() is False
-    model._active = True
-    assert model._require_active() is True
-    model._active = False
-
     # Test _create_text_event (now returns BidiTranscriptStreamEvent)
     text_event = model._create_text_event("Hello", "user")
     assert isinstance(text_event, BidiTranscriptStreamEvent)

Original file line number	Diff line number	Diff line change
`@@ -166,7 +166,7 @@ async def test_send_edge_cases(nova_model):`
`166`	`166`	`mime_type="image/jpeg",`
`167`	`167`	`)`
`168`	`168`
`169`		`- with pytest.raises(ValueError, match=r"content not supported by nova sonic"):`
	`169`	`+ with pytest.raises(ValueError, match=r"content not supported"):`
`170`	`170`	`await nova_model.send(image_event)`
`171`	`171`
`172`	`172`	`await nova_model.stop()`