Skip to content

Commit 0e2aefc

Browse files
committed
fix review comments
1 parent f7f3e5f commit 0e2aefc

File tree

7 files changed

+181
-25
lines changed

7 files changed

+181
-25
lines changed

examples/realtime-twilio-sip/server.ts

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
OpenAIRealtimeSIP,
88
RealtimeItem,
99
RealtimeSession,
10+
type RealtimeSessionOptions,
1011
} from '@openai/agents/realtime';
1112
import { getStartingAgent, WELCOME_MESSAGE } from './agents';
1213

@@ -44,20 +45,27 @@ async function main() {
4445
const activeCallTasks = new Map<string, Promise<void>>();
4546
const startingAgent = getStartingAgent();
4647

47-
function getDefaultInstructions(): string {
48-
if (typeof startingAgent.instructions === 'string') {
49-
return startingAgent.instructions;
50-
}
51-
return 'You are a helpful triage agent for ABC customer service.';
52-
}
48+
// Reuse the same session options when accepting the call and when instantiating the session so
49+
// the SIP payload remains in sync with the live websocket session.
50+
const sessionOptions: Partial<RealtimeSessionOptions> = {
51+
model: 'gpt-realtime',
52+
config: {
53+
audio: {
54+
input: {
55+
turnDetection: { type: 'semantic_vad', interruptResponse: true },
56+
},
57+
},
58+
},
59+
};
5360

5461
async function acceptCall(callId: string): Promise<void> {
5562
try {
56-
await openai.realtime.calls.accept(callId, {
57-
type: 'realtime',
58-
model: 'gpt-realtime',
59-
instructions: getDefaultInstructions(),
60-
});
63+
// Build the initial session config using the agent data and session options
64+
const initialConfig = await OpenAIRealtimeSIP.buildInitialConfig(
65+
startingAgent,
66+
sessionOptions,
67+
);
68+
await openai.realtime.calls.accept(callId, initialConfig);
6169
console.info(`Accepted call ${callId}`);
6270
} catch (error) {
6371
if (error instanceof APIError && error.status === 404) {
@@ -97,17 +105,7 @@ async function main() {
97105
async function observeCall(callId: string): Promise<void> {
98106
const session = new RealtimeSession(startingAgent, {
99107
transport: new OpenAIRealtimeSIP(),
100-
model: 'gpt-realtime',
101-
config: {
102-
audio: {
103-
input: {
104-
turnDetection: {
105-
type: 'semantic_vad',
106-
interruptResponse: true,
107-
},
108-
},
109-
},
110-
},
108+
...sessionOptions,
111109
});
112110

113111
session.on('history_added', (item: RealtimeItem) => logHistoryItem(item));

packages/agents-realtime/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export {
4949
OpenAIRealtimeEventTypes,
5050
DEFAULT_OPENAI_REALTIME_MODEL,
5151
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG,
52+
RealtimeSessionPayload,
5253
} from './openaiRealtimeBase';
5354

5455
export { RealtimeOutputGuardrail } from './guardrail';

packages/agents-realtime/src/openaiRealtimeBase.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ export type OpenAIRealtimeModels =
4848
| 'gpt-4o-mini-realtime-preview-2024-12-17'
4949
| 'gpt-realtime'
5050
| 'gpt-realtime-2025-08-28'
51+
| 'gpt-realtime-mini'
52+
| 'gpt-realtime-mini-2025-10-06'
5153
| (string & {}); // ensures autocomplete works
5254

5355
/**
@@ -105,6 +107,13 @@ export type OpenAIRealtimeEventTypes = {
105107
disconnected: [];
106108
} & RealtimeTransportEventTypes;
107109

110+
/**
111+
* Shape of the payload that the Realtime API expects for session.create/update operations.
112+
* This closely mirrors the REST `CallAcceptParams` type so that callers can feed the payload
113+
* directly into the `openai.realtime.calls.accept` helper without casts.
114+
*/
115+
export type RealtimeSessionPayload = { type: 'realtime' } & Record<string, any>;
116+
108117
export abstract class OpenAIRealtimeBase
109118
extends EventEmitterDelegate<OpenAIRealtimeEventTypes>
110119
implements RealtimeTransportLayer
@@ -523,10 +532,12 @@ export abstract class OpenAIRealtimeBase
523532
);
524533
}
525534

526-
protected _getMergedSessionConfig(config: Partial<RealtimeSessionConfig>) {
535+
protected _getMergedSessionConfig(
536+
config: Partial<RealtimeSessionConfig>,
537+
): RealtimeSessionPayload {
527538
const newConfig = toNewSessionConfig(config);
528539

529-
const sessionData: Record<string, any> = {
540+
const sessionData: RealtimeSessionPayload = {
530541
type: 'realtime',
531542
instructions: newConfig.instructions,
532543
model: newConfig.model ?? this.#model,
@@ -588,6 +599,21 @@ export abstract class OpenAIRealtimeBase
588599
return sessionData;
589600
}
590601

602+
/**
603+
* Build the payload object expected by the Realtime API when creating or updating a session.
604+
*
605+
* The helper centralises the conversion from camelCase runtime config to the snake_case payload
606+
* required by the Realtime API so transports that need a one-off payload (for example SIP call
607+
* acceptance) can reuse the same logic without duplicating private state.
608+
*
609+
* @param config - The session config to merge with defaults.
610+
*/
611+
buildSessionPayload(
612+
config: Partial<RealtimeSessionConfig>,
613+
): RealtimeSessionPayload {
614+
return this._getMergedSessionConfig(config);
615+
}
616+
591617
private static buildTurnDetectionConfig(
592618
c: RealtimeTurnDetectionConfig | undefined,
593619
): RealtimeTurnDetectionConfigAsIs | undefined {
@@ -735,7 +761,7 @@ export abstract class OpenAIRealtimeBase
735761
* @param config - The session config to update.
736762
*/
737763
updateSessionConfig(config: Partial<RealtimeSessionConfig>): void {
738-
const sessionData = this._getMergedSessionConfig(config);
764+
const sessionData = this.buildSessionPayload(config);
739765

740766
this.sendEvent({
741767
type: 'session.update',

packages/agents-realtime/src/openaiRealtimeSip.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ import {
44
OpenAIRealtimeWebSocket,
55
OpenAIRealtimeWebSocketOptions,
66
} from './openaiRealtimeWebsocket';
7+
import type { RealtimeSessionPayload } from './openaiRealtimeBase';
8+
import type { RealtimeSessionConfig } from './clientMessages';
9+
import {
10+
RealtimeSession,
11+
type RealtimeSessionOptions,
12+
type RealtimeContextData,
13+
} from './realtimeSession';
14+
import { RealtimeAgent } from './realtimeAgent';
715

816
/**
917
* Transport layer that connects to an existing SIP-initiated Realtime call via call ID.
@@ -13,6 +21,46 @@ export class OpenAIRealtimeSIP extends OpenAIRealtimeWebSocket {
1321
super(options);
1422
}
1523

24+
/**
25+
* Build the initial session payload for a SIP-attached session, matching the config that a RealtimeSession would send on connect.
26+
*
27+
* This enables SIP deployments to accept an incoming call with a payload that already reflects
28+
* the active agent's instructions, tools, prompt, and tracing metadata without duplicating the
29+
* session logic outside of the SDK. The returned object structurally matches the REST
30+
* `CallAcceptParams` interface, so it can be forwarded directly to
31+
* `openai.realtime.calls.accept(...)`.
32+
*
33+
* @param agent - The starting agent used to seed the session instructions, tools, and prompt.
34+
* @param options - Optional session options that mirror the ones passed to the RealtimeSession constructor.
35+
* @param overrides - Additional config overrides applied on top of the session options.
36+
*/
37+
static async buildInitialConfig<TBaseContext = unknown>(
38+
agent:
39+
| RealtimeAgent<TBaseContext>
40+
| RealtimeAgent<RealtimeContextData<TBaseContext>>,
41+
options: Partial<RealtimeSessionOptions<TBaseContext>> = {},
42+
overrides: Partial<RealtimeSessionConfig> = {},
43+
): Promise<RealtimeSessionPayload> {
44+
const sessionConfig = await RealtimeSession.computeInitialSessionConfig(
45+
agent,
46+
options,
47+
overrides,
48+
);
49+
const transport = new OpenAIRealtimeSIP();
50+
return transport.buildSessionPayload(sessionConfig);
51+
}
52+
53+
override sendAudio(
54+
_audio: ArrayBuffer,
55+
_options: { commit?: boolean } = {},
56+
): never {
57+
// SIP integrations stream audio to OpenAI directly through the telephony provider, so the
58+
// transport deliberately prevents userland code from sending duplicate buffers.
59+
throw new Error(
60+
'OpenAIRealtimeSIP does not support sending audio buffers; audio is handled by the SIP call.',
61+
);
62+
}
63+
1664
async connect(options: RealtimeTransportLayerConnectOptions): Promise<void> {
1765
if (!options.callId) {
1866
throw new UserError(

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,53 @@ export class RealtimeSession<
406406
return fullConfig;
407407
}
408408

409+
/**
410+
* Compute the initial session config that the current session will use when connecting.
411+
*
412+
* This mirrors the configuration payload we send during `connect`, including dynamic values
413+
* such as the upstream agent instructions, tool definitions, and prompt content generated at
414+
* runtime. Keeping this helper exposed allows transports or orchestration layers to precompute
415+
* a CallAccept-compatible payload without opening a socket.
416+
*
417+
* @param overrides - Additional config overrides applied on top of the session options.
418+
*/
419+
async getInitialSessionConfig(
420+
overrides: Partial<RealtimeSessionConfig> = {},
421+
): Promise<Partial<RealtimeSessionConfig>> {
422+
await this.#setCurrentAgent(this.initialAgent);
423+
return this.#getSessionConfig({
424+
...(this.options.config ?? {}),
425+
...(overrides ?? {}),
426+
});
427+
}
428+
429+
/**
430+
* Convenience helper to compute the initial session config without manually instantiating and connecting a session.
431+
*
432+
* This is primarily useful for integrations that must provide the session configuration to a
433+
* third party (for example the SIP `calls.accept` endpoint) before the actual realtime session
434+
* is attached. The helper instantiates a throwaway session so all agent-driven dynamic fields
435+
* resolve in exactly the same way as the live session path.
436+
*
437+
* @param agent - The starting agent for the session.
438+
* @param options - Session options used to seed the config calculation.
439+
* @param overrides - Additional config overrides applied on top of the provided options.
440+
*/
441+
static async computeInitialSessionConfig<TBaseContext = unknown>(
442+
agent:
443+
| RealtimeAgent<TBaseContext>
444+
| RealtimeAgent<RealtimeContextData<TBaseContext>>,
445+
options: Partial<RealtimeSessionOptions<TBaseContext>> = {},
446+
overrides: Partial<RealtimeSessionConfig> = {},
447+
): Promise<Partial<RealtimeSessionConfig>> {
448+
const session = new RealtimeSession(agent, options);
449+
try {
450+
return await session.getInitialSessionConfig(overrides);
451+
} finally {
452+
session.close();
453+
}
454+
}
455+
409456
async updateAgent(newAgent: RealtimeAgent<TBaseContext>) {
410457
this.#currentAgent.emit('agent_handoff', this.#context, newAgent);
411458
this.emit('agent_handoff', this.#context, this.#currentAgent, newAgent);

packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
22
import { OpenAIRealtimeBase } from '../src/openaiRealtimeBase';
33
import { OpenAIRealtimeWebSocket } from '../src/openaiRealtimeWebsocket';
44
import { OpenAIRealtimeSIP } from '../src/openaiRealtimeSip';
5+
import { RealtimeAgent } from '../src/realtimeAgent';
56

67
let lastFakeSocket: any;
78
vi.mock('ws', () => {
@@ -406,4 +407,31 @@ describe('OpenAIRealtimeWebSocket', () => {
406407
);
407408
sip.close();
408409
});
410+
411+
it('OpenAIRealtimeSIP buildInitialConfig returns realtime payload seeded from agent', async () => {
412+
const agent = new RealtimeAgent({
413+
name: 'sip-agent',
414+
handoffs: [],
415+
instructions: 'Respond politely.',
416+
});
417+
const payload = await OpenAIRealtimeSIP.buildInitialConfig(
418+
agent,
419+
{
420+
model: 'gpt-realtime',
421+
config: { audio: { output: { speed: 1.5 } } },
422+
},
423+
{ audio: { output: { speed: 2 } } },
424+
);
425+
expect(payload.type).toBe('realtime');
426+
expect(payload.model).toBe('gpt-realtime');
427+
expect(payload.instructions).toBe('Respond politely.');
428+
expect(payload.audio?.output?.speed).toBe(2);
429+
});
430+
431+
it('OpenAIRealtimeSIP sendAudio throws', () => {
432+
const sip = new OpenAIRealtimeSIP();
433+
expect(() => sip.sendAudio(new ArrayBuffer(1))).toThrow(
434+
'OpenAIRealtimeSIP does not support sending audio buffers',
435+
);
436+
});
409437
});

packages/agents-realtime/test/realtimeSession.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ describe('RealtimeSession', () => {
126126
expect(t.connectCalls[0]?.url).toBe('ws://example');
127127
});
128128

129+
it('forwards callId in connect options to transport', async () => {
130+
const t = new FakeTransport();
131+
const agent = new RealtimeAgent({ name: 'A', handoffs: [] });
132+
const s = new RealtimeSession(agent, { transport: t });
133+
await s.connect({ apiKey: 'test', callId: 'call_123' });
134+
expect(t.connectCalls[0]?.callId).toBe('call_123');
135+
});
136+
129137
it('includes default transcription config when connecting', async () => {
130138
const t = new FakeTransport();
131139
const agent = new RealtimeAgent({ name: 'A', handoffs: [] });

0 commit comments

Comments
 (0)