Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Sources/Core/Models/ServerEvent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ import MetaCodable
public var token: String
}

public struct Usage: Equatable, Hashable, Codable, Sendable {
let type: String
let seconds: Int
}

/// Returned when an error occurs.
/// - Parameter eventId: The unique ID of the server event.
/// - Parameter error: Details of the error.
Expand Down Expand Up @@ -90,7 +95,7 @@ import MetaCodable
contentIndex: Int,
transcript: String,
logprobs: [LogProb]?,
usage: Response.Usage
usage: Usage
)

/// Returned when the text value of an input audio transcription content part is updated.
Expand All @@ -107,6 +112,7 @@ import MetaCodable
itemId: String,
contentIndex: Int,
delta: String,
obfuscation: String,
logprobs: [LogProb]?
)

Expand Down Expand Up @@ -536,7 +542,7 @@ extension ServerEvent: Identifiable {
case let .conversationItemDone(id, _, _): id
case let .conversationItemRetrieved(id, _): id
case let .conversationItemInputAudioTranscriptionCompleted(id, _, _, _, _, _): id
case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _): id
case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _, _): id
case let .conversationItemInputAudioTranscriptionSegment(id, _, _, _, _, _, _, _): id
case let .conversationItemInputAudioTranscriptionFailed(id, _, _, _): id
case let .conversationItemTruncated(id, _, _, _): id
Expand Down
21 changes: 12 additions & 9 deletions Sources/Core/Models/Session.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ import HelperCoders
}

/// Configuration for turn detection
public struct TurnDetection: Codable, Equatable, Hashable, Sendable {
@Codable public struct TurnDetection: Equatable, Hashable, Sendable {
/// The type of turn detection.
public enum VAD: String, Codable, Equatable, Hashable, Sendable {
case server = "server_vad"
Expand All @@ -108,6 +108,7 @@ import HelperCoders
}

/// Whether or not to automatically generate a response when a VAD stop event occurs.
@Default(ifMissing: false)
public var createResponse: Bool

/// Used only for `semantic` mode. The eagerness of the model to respond.
Expand Down Expand Up @@ -262,20 +263,21 @@ import HelperCoders
public var input: Input

/// Configuration for output audio.
public var output: Output
public var output: Output?

/// Creates a new `Audio` configuration.
///
/// - Parameter input: Configuration for input audio.
/// - Parameter output: Configuration for output audio.
public init(input: Input, output: Output) {
/// - Parameter output: Configuration for output audio. Output is nil when this is a transcription session
public init(input: Input, output: Output?) {
self.input = input
self.output = output
}
}

/// The type of session to create.
public let type: String = "realtime"
/// Valid values are "realtime" or "transcription"
public let type: String

/// Unique identifier for the session
public var id: String?
Expand All @@ -289,8 +291,8 @@ import HelperCoders
///
/// The model can be instructed on response content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently").
///
/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior.
public var instructions: String
/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior. Instructions are nil in the event of a Transcription session
public var instructions: String?

/// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
///
Expand All @@ -301,7 +303,7 @@ import HelperCoders
public var modalities: [Modality]?

/// The Realtime model used for this session.
public var model: Model
public var model: Model?

/// Reference to a prompt template and its variables.
public var prompt: Prompt?
Expand All @@ -317,8 +319,9 @@ import HelperCoders
/// Tools available to the model.
public var tools: [Tool]?

public init(id: String? = nil, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
public init(id: String? = nil, type: String, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
self.id = id
self.type = type
self.tools = tools
self.model = model
self.audio = audio
Expand Down
28 changes: 25 additions & 3 deletions Sources/UI/Conversation.swift
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ public final class Conversation: @unchecked Sendable {
try await client.connect(using: request)
}

public func connect(ephemeralKey: String, model: Model = .gptRealtime) async throws {
public func connect(ephemeralKey: String) async throws {
do {
try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
} catch let error as WebRTCConnector.WebRTCError {
guard case .invalidEphemeralKey = error else { throw error }
throw ConversationError.invalidEphemeralKey
Expand Down Expand Up @@ -176,10 +176,32 @@ private extension Conversation {
if let sessionUpdateCallback { try updateSession(withChanges: sessionUpdateCallback) }
case let .sessionUpdated(_, session):
self.session = session
case let .conversationItemCreated(_, item, _):
case let .conversationItemAdded(_, item, nil):
entries.append(item)
case let .conversationItemAdded(_, item, previousItemId?):
if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
entries.insert(item, at: entryIndex + 1)
} else {
entries.append(item)
}
case let .conversationItemCreated(_, item, nil):
entries.append(item)
case let .conversationItemCreated(_, item, previousItemId?):
if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
entries.insert(item, at: entryIndex + 1)
} else {
entries.append(item)
}
case let .conversationItemDeleted(_, itemId):
entries.removeAll { $0.id == itemId }
case let .conversationItemInputAudioTranscriptionDelta(_, itemId, contentIndex, delta, _, _):
updateEvent(id: itemId) { message in
guard case let .inputAudio(audio) = message.content[contentIndex] else { return }

message.content[contentIndex] = .inputAudio(
.init(audio: audio.audio, transcript: audio.transcript ?? "" + delta)
)
}
case let .conversationItemInputAudioTranscriptionCompleted(_, itemId, contentIndex, transcript, _, _):
updateEvent(id: itemId) { message in
guard case let .inputAudio(audio) = message.content[contentIndex] else { return }
Expand Down
4 changes: 2 additions & 2 deletions Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public extension RealtimeAPI {
}

/// Connect to the OpenAI WebRTC Realtime API with the given authentication token and model.
static func webRTC(ephemeralKey: String, model: Model = .gptRealtime) async throws -> RealtimeAPI {
return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
static func webRTC(ephemeralKey: String) async throws -> RealtimeAPI {
return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
}
}
6 changes: 2 additions & 4 deletions Sources/WebRTC/Extensions/URLRequest+WebRTC.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ import FoundationNetworking
fileprivate let baseURL = URL(string: "https://api.openai.com/v1/realtime/calls")!

package extension URLRequest {
static func webRTCConnectionRequest(ephemeralKey: String, model: Model) -> URLRequest {
var request = URLRequest(url: baseURL.appending(queryItems: [
URLQueryItem(name: "model", value: model.rawValue),
]))
static func webRTCConnectionRequest(ephemeralKey: String) -> URLRequest {
var request = URLRequest(url: baseURL)

request.httpMethod = "POST"
request.setValue("Bearer \(ephemeralKey)", forHTTPHeaderField: "Authorization")
Expand Down
1 change: 1 addition & 0 deletions Sources/WebRTC/WebRTCConnector.swift
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ extension WebRTCConnector: LKRTCDataChannelDelegate {
do { try stream.yield(decoder.decode(ServerEvent.self, from: buffer.data)) }
catch {
print("Failed to decode server event: \(String(data: buffer.data, encoding: .utf8) ?? "<invalid utf8>")")
print("Error: \(error)")
stream.finish(throwing: error)
}
}
Expand Down