Skip to content

feat: The "session request" is no longer managed by the client. #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions apps/browser-example/src/pages/WebRTCExample.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import {
} from "../components/RealtimeSessionView"
import { RealtimeClient } from "@tsorta/browser/WebRTC"
import { PageProps } from "./props"
import { RealtimeConversationItem } from "@tsorta/browser/openai"
import {
RealtimeConversationItem,
RealtimeSessionCreateResponse,
} from "@tsorta/browser/openai"

export function WebRTCExample({
apiKey,
Expand Down Expand Up @@ -37,17 +40,25 @@ export function WebRTCExample({

const client = new RealtimeClient(
navigator,
// @ts-expect-error TS6133: 'sessionRequested' is declared but its value is never read.
({ sessionRequested }) => {
async () => {
// NOTE: For the sake of the example, we're using a "real" OpenAI API
// key rather than a Realtime API Session ephemeral key, as you
// should do in a production app. So this sessionRequested argument
// isn't useful in the example, but in a production app you can use
// it to request a session with the these parameters.
return apiKey
// key in *the browser*. **DO NOT DO THIS**. You should make this request
// for the ephemeral key on a backend server where you can protect
// the key.

const r = await fetch("https://api.openai.com/v1/realtime/sessions", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(sessionRequest),
})
const data = (await r.json()) as RealtimeSessionCreateResponse

return data.client_secret.value
},
audioElementRef.current,
sessionRequest
audioElementRef.current
)
setClient(client)

Expand All @@ -59,7 +70,13 @@ export function WebRTCExample({
setConversation(event.conversation)
})

await client.start()
try {
await client.start()
} catch (e) {
// TODO: put an alert on the top to show error
console.error("Error starting session", e)
return
}

onSessionStatusChanged("recording")
},
Expand Down
94 changes: 26 additions & 68 deletions packages/browser/src/WebRTC/RealtimeClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ const RealtimeClientDefaultOptions: RealtimeClientOptions = {
baseUrl: "https://api.openai.com/v1/realtime",
}

interface EphemeralApiKeyOptions {
sessionRequested: RealtimeSessionCreateRequest
}
/**
* A TypeScript client for the OpenAI Realtime API using WebRTC in the browser.
*/
Expand All @@ -78,16 +75,12 @@ export class RealtimeClient {
/**
* Create a new client.
* @param getRealtimeEphemeralAPIKey This is a function that you should implement to return the Ephemeral OpenAI API key that is used to authenticate with the OpenAI Realtime API. It should be an ephemeral key as described at https://platform.openai.com/docs/guides/realtime-webrtc#creating-an-ephemeral-token. You will probably need to make a call to your server here to fetch the key.
* @param sessionRequested The session parameters you want from the Realtime API. If these are found to be different it will re-request them to try to match this session.
*/
constructor(
private readonly navigator: Navigator,
private readonly getRealtimeEphemeralAPIKey: (
options: EphemeralApiKeyOptions
) => Promise<string> | string,
private readonly getRealtimeEphemeralAPIKey: () => Promise<string> | string,
private readonly audioElement: HTMLAudioElement,
private readonly sessionRequested: RealtimeSessionCreateRequest,
options: Partial<RealtimeClientOptions> = RealtimeClientDefaultOptions
options: Partial<RealtimeClientOptions> = RealtimeClientDefaultOptions,
) {
const opt = { ...RealtimeClientDefaultOptions, ...options }
this.recordedAudioChunkDuration = opt.recordedAudioChunkDuration
Expand All @@ -101,7 +94,7 @@ export class RealtimeClient {
*/
public addEventListener<TEventName extends keyof RealtimeClientEventMap>(
event: TEventName,
listener: EventTargetListener<RealtimeClientEventMap[TEventName]>
listener: EventTargetListener<RealtimeClientEventMap[TEventName]>,
): void {
this.emitter.addEventListener(event, listener)
}
Expand Down Expand Up @@ -204,15 +197,15 @@ export class RealtimeClient {
this.audioChunks.push(...audioChunks)
this.emitter.dispatchTypedEvent(
"recordedAudioChanged",
new RecordedAudioChangedEvent(this.audioChunks)
new RecordedAudioChangedEvent(this.audioChunks),
)
}

private setRecordedAudio(audioChunks: Blob[]) {
this.audioChunks = audioChunks
this.emitter.dispatchTypedEvent(
"recordedAudioChanged",
new RecordedAudioChangedEvent(this.audioChunks)
new RecordedAudioChangedEvent(this.audioChunks),
)
}

Expand All @@ -225,9 +218,7 @@ export class RealtimeClient {

let apiKey: string
try {
apiKey = await this.getRealtimeEphemeralAPIKey({
sessionRequested: this.sessionRequested,
})
apiKey = await this.getRealtimeEphemeralAPIKey()
} catch (err) {
throw new Error("getRealtimeEphemeralAPIKey handler failed.", {
cause: err,
Expand Down Expand Up @@ -274,7 +265,7 @@ export class RealtimeClient {
// Listen for server-sent events on the data channel
this.dataChannel.addEventListener(
"message",
this.receiveServerMessage.bind(this)
this.receiveServerMessage.bind(this),
)
this.dataChannel.addEventListener("error", (e) => {
log.error("Data channel error from server: %o", e.error)
Expand Down Expand Up @@ -308,7 +299,7 @@ export class RealtimeClient {
this.session = undefined
this.emitter.dispatchTypedEvent(
"sessionUpdated",
new SessionUpdatedEvent(this.session)
new SessionUpdatedEvent(this.session),
)
}
}
Expand All @@ -323,7 +314,7 @@ export class RealtimeClient {

this.emitter.dispatchTypedEvent(
"serverEvent",
new RealtimeServerEventEvent(parsedEvent)
new RealtimeServerEventEvent(parsedEvent),
)
}

Expand Down Expand Up @@ -383,49 +374,15 @@ export class RealtimeClient {
client.session = sessionEvent.session
client.emitter.dispatchTypedEvent(
"sessionCreated",
new SessionCreatedEvent(sessionEvent.session)
new SessionCreatedEvent(sessionEvent.session),
)

if (!client.sessionRequested) {
throw new Error("No session request")
}

// NOTE: When we create a session with OpenAI, it ignores things like input_audio_transcription?.model !== "whisper-1"; So we update it again if it doesn't match the session.
let updatedSession: RealtimeSessionCreateRequest = {
...client.sessionRequested,
}
let hasSessionMismatch = false

for (const key of Object.keys(client.sessionRequested) as Array<
keyof RealtimeSessionCreateRequest
>) {
const requestValue = client.sessionRequested[key]
const sessionValue = sessionEvent.session[key]

if (compareValuesIgnoreNullProperties(requestValue, sessionValue)) {
continue
}
log.debug(
`session mismatch on ${key}: %o !== %o`,
requestValue,
sessionValue
)
hasSessionMismatch = true
}
if (hasSessionMismatch) {
const updateSessionEvent: RealtimeClientEventSessionUpdate = {
type: "session.update",
session: updatedSession,
}
client.sendClientEvent(updateSessionEvent)
}
},
"session.updated": (client, event) => {
const sessionEvent = event as RealtimeServerEventSessionUpdated
client.session = sessionEvent.session
client.emitter.dispatchTypedEvent(
"sessionUpdated",
new SessionUpdatedEvent(sessionEvent.session)
new SessionUpdatedEvent(sessionEvent.session),
)
},
"conversation.item.created": (client, event) => {
Expand All @@ -434,7 +391,7 @@ export class RealtimeClient {
client.conversation.push(conversationEvent.item)
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
},
"response.audio_transcript.delta": (client, event) => {
Expand All @@ -445,7 +402,7 @@ export class RealtimeClient {
client.conversation,
deltaEvent.item_id,
deltaEvent.content_index,
deltaEvent
deltaEvent,
)
if (!foundItem) {
// error was logged in findConversationItemContent
Expand All @@ -462,15 +419,15 @@ export class RealtimeClient {
} else {
if (foundContent.type !== "input_audio") {
log.error(
`${event.type} Unexpected content type ${foundContent.type} for audio transcript`
`${event.type} Unexpected content type ${foundContent.type} for audio transcript`,
)
return
}
foundContent.transcript += deltaEvent.delta
}
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
},
"response.text.delta": (client, event) => {
Expand Down Expand Up @@ -500,15 +457,15 @@ export class RealtimeClient {
{ log },
client.conversation,
output.id!,
event
event,
)
if (!conversationItem) {
// TODO: findConversationItem already logged an error, we should probably pass in a value that tells it not to log
// no existing item is there, for some reason maybe we missed it in the stream somehow? We'll just add it:
client.conversation.push(output)
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
continue
}
Expand All @@ -523,43 +480,44 @@ export class RealtimeClient {
// force update the conversation state:
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
}
},
"response.audio_transcript.done": (client, event) => {
patchConversationItemWithCompletedTranscript(
{ log },
client.conversation,
event as RealtimeServerEventResponseAudioTranscriptDone
event as RealtimeServerEventResponseAudioTranscriptDone,
)
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
},
"conversation.item.input_audio_transcription.completed": (
client,
event
event,
) => {
patchConversationItemWithCompletedTranscript(
{ log },
client.conversation,
event
event,
)
client.emitter.dispatchTypedEvent(
"conversationChanged",
new ConversationChangedEvent(client.conversation)
new ConversationChangedEvent(client.conversation),
)
},
}
}

type RealtimeServerEventHandler<
TRealtimeServerEventType extends RealtimeServerEvent["type"] = RealtimeServerEvent["type"]
TRealtimeServerEventType extends
RealtimeServerEvent["type"] = RealtimeServerEvent["type"],
> = (
client: RealtimeClient,
event: Extract<RealtimeServerEvent, { type: TRealtimeServerEventType }>
event: Extract<RealtimeServerEvent, { type: TRealtimeServerEventType }>,
) => void

type RealtimeServerEventNames = RealtimeServerEvent["type"]
Expand Down
3 changes: 3 additions & 0 deletions packages/browser/src/openai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ export type RealtimeSession = components["schemas"]["RealtimeSession"]
export type RealtimeSessionCreateRequest =
components["schemas"]["RealtimeSessionCreateRequest"]

export type RealtimeSessionCreateResponse =
components["schemas"]["RealtimeSessionCreateResponse"]

export type RealTimeSessionModels = RealtimeSessionCreateRequest["model"]

/** Part of the @see RealtimeServerEventResponseDone event and others.
Expand Down