@@ -302,7 +302,12 @@ async def _send_interrupt(self, event: RealtimeModelSendInterrupt) -> None:
302302
303303 elapsed_time_ms = (datetime .now () - self ._audio_start_time ).total_seconds () * 1000
304304 if elapsed_time_ms > 0 and elapsed_time_ms < self ._audio_length_ms :
305- await self ._emit_event (RealtimeModelAudioInterruptedEvent ())
305+ await self ._emit_event (
306+ RealtimeModelAudioInterruptedEvent (
307+ item_id = self ._current_item_id ,
308+ content_index = self ._current_audio_content_index or 0 ,
309+ )
310+ )
306311 converted = _ConversionHelper .convert_interrupt (
307312 self ._current_item_id ,
308313 self ._current_audio_content_index or 0 ,
@@ -331,7 +336,12 @@ async def _handle_audio_delta(self, parsed: ResponseAudioDeltaEvent) -> None:
331336 # Calculate audio length in ms using 24KHz pcm16le
332337 self ._audio_length_ms += self ._calculate_audio_length_ms (audio_bytes )
333338 await self ._emit_event (
334- RealtimeModelAudioEvent (data = audio_bytes , response_id = parsed .response_id )
339+ RealtimeModelAudioEvent (
340+ data = audio_bytes ,
341+ response_id = parsed .response_id ,
342+ item_id = parsed .item_id ,
343+ content_index = parsed .content_index ,
344+ )
335345 )
336346
337347 def _calculate_audio_length_ms (self , audio_bytes : bytes ) -> float :
@@ -429,7 +439,12 @@ async def _handle_ws_event(self, event: dict[str, Any]):
429439 if parsed .type == "response.audio.delta" :
430440 await self ._handle_audio_delta (parsed )
431441 elif parsed .type == "response.audio.done" :
432- await self ._emit_event (RealtimeModelAudioDoneEvent ())
442+ await self ._emit_event (
443+ RealtimeModelAudioDoneEvent (
444+ item_id = parsed .item_id ,
445+ content_index = parsed .content_index ,
446+ )
447+ )
433448 elif parsed .type == "input_audio_buffer.speech_started" :
434449 await self ._send_interrupt (RealtimeModelSendInterrupt ())
435450 elif parsed .type == "response.created" :
0 commit comments