Skip to content

Realtime: update model to have a single send_event method #1111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions src/agents/realtime/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,17 @@
"""The name of a realtime model."""


RealtimeAudioFormat: TypeAlias = Union[Literal["pcm16", "g711_ulaw", "g711_alaw"], str]


class RealtimeClientMessage(TypedDict):
"""A raw message to be sent to the model."""

type: str # explicitly required
other_data: NotRequired[dict[str, Any]]
"""Merged into the message body."""


class RealtimeUserInputText(TypedDict):
type: Literal["input_text"]
text: str


class RealtimeUserInputMessage(TypedDict):
type: Literal["message"]
role: Literal["user"]
content: list[RealtimeUserInputText]


RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]


RealtimeAudioFormat: TypeAlias = Union[Literal["pcm16", "g711_ulaw", "g711_alaw"], str]


class RealtimeInputAudioTranscriptionConfig(TypedDict):
language: NotRequired[str]
model: NotRequired[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"] | str]
Expand Down Expand Up @@ -124,3 +112,17 @@ class RealtimeRunConfig(TypedDict):
"""Whether tracing is disabled for this run."""

# TODO (rm) Add history audio storage config


class RealtimeUserInputText(TypedDict):
type: Literal["input_text"]
text: str


class RealtimeUserInputMessage(TypedDict):
type: Literal["message"]
role: Literal["user"]
content: list[RealtimeUserInputText]


RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
39 changes: 4 additions & 35 deletions src/agents/realtime/model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from __future__ import annotations

import abc
from typing import Any, Callable
from typing import Callable

from typing_extensions import NotRequired, TypedDict

from ..util._types import MaybeAwaitable
from .config import (
RealtimeClientMessage,
RealtimeSessionModelSettings,
RealtimeUserInput,
)
from .model_events import RealtimeModelEvent, RealtimeModelToolCallEvent
from .model_events import RealtimeModelEvent
from .model_inputs import RealtimeModelSendEvent


class RealtimeModelListener(abc.ABC):
Expand Down Expand Up @@ -60,40 +59,10 @@ def remove_listener(self, listener: RealtimeModelListener) -> None:
pass

@abc.abstractmethod
async def send_event(self, event: RealtimeClientMessage) -> None:
async def send_event(self, event: RealtimeModelSendEvent) -> None:
"""Send an event to the model."""
pass

@abc.abstractmethod
async def send_message(
self, message: RealtimeUserInput, other_event_data: dict[str, Any] | None = None
) -> None:
"""Send a message to the model."""
pass

@abc.abstractmethod
async def send_audio(self, audio: bytes, *, commit: bool = False) -> None:
"""Send a raw audio chunk to the model.

Args:
audio: The audio data to send.
commit: Whether to commit the audio buffer to the model. If the model does not do turn
detection, this can be used to indicate the turn is completed.
"""
pass

@abc.abstractmethod
async def send_tool_output(
self, tool_call: RealtimeModelToolCallEvent, output: str, start_response: bool
) -> None:
"""Send tool output to the model."""
pass

@abc.abstractmethod
async def interrupt(self) -> None:
"""Interrupt the model. For example, could be triggered by a guardrail."""
pass

@abc.abstractmethod
async def close(self) -> None:
"""Close the session."""
Expand Down
4 changes: 1 addition & 3 deletions src/agents/realtime/model_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ class RealtimeModelInputAudioTranscriptionCompletedEvent:
item_id: str
transcript: str

type: Literal["conversation.item.input_audio_transcription.completed"] = (
"conversation.item.input_audio_transcription.completed"
)
type: Literal["input_audio_transcription_completed"] = "input_audio_transcription_completed"


@dataclass
Expand Down
90 changes: 90 additions & 0 deletions src/agents/realtime/model_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal, Union

from typing_extensions import NotRequired, TypeAlias, TypedDict

from .model_events import RealtimeModelToolCallEvent


class RealtimeModelRawClientMessage(TypedDict):
"""A raw message to be sent to the model."""

type: str # explicitly required
other_data: NotRequired[dict[str, Any]]
"""Merged into the message body."""


class RealtimeModelInputTextContent(TypedDict):
"""A piece of text to be sent to the model."""

type: Literal["input_text"]
text: str


class RealtimeModelUserInputMessage(TypedDict):
"""A message to be sent to the model."""

type: Literal["message"]
role: Literal["user"]
content: list[RealtimeModelInputTextContent]


RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]
"""A user input to be sent to the model."""


# Model messages


@dataclass
class RealtimeModelSendRawMessage:
"""Send a raw message to the model."""

message: RealtimeModelRawClientMessage
"""The message to send."""


@dataclass
class RealtimeModelSendUserInput:
"""Send a user input to the model."""

user_input: RealtimeModelUserInput
"""The user input to send."""


@dataclass
class RealtimeModelSendAudio:
"""Send audio to the model."""

audio: bytes
commit: bool = False


@dataclass
class RealtimeModelSendToolOutput:
"""Send tool output to the model."""

tool_call: RealtimeModelToolCallEvent
"""The tool call to send."""

output: str
"""The output to send."""

start_response: bool
"""Whether to start a response."""


@dataclass
class RealtimeModelSendInterrupt:
"""Send an interrupt to the model."""


RealtimeModelSendEvent: TypeAlias = Union[
RealtimeModelSendRawMessage,
RealtimeModelSendUserInput,
RealtimeModelSendAudio,
RealtimeModelSendToolOutput,
RealtimeModelSendInterrupt,
]
Loading