m1guelpf · jblwilliams · Oct 29, 2025 · Oct 29, 2025
diff --git a/Sources/Core/Models/ServerEvent.swift b/Sources/Core/Models/ServerEvent.swift
@@ -22,6 +22,11 @@ import MetaCodable
 		public var token: String
 	}
 
+    public struct Usage: Equatable, Hashable, Codable, Sendable {
+        let type: String
+        let seconds: Int
+    }
+
 	/// Returned when an error occurs.
 	/// - Parameter eventId: The unique ID of the server event.
 	/// - Parameter error: Details of the error.
@@ -90,7 +95,7 @@ import MetaCodable
 		contentIndex: Int,
 		transcript: String,
 		logprobs: [LogProb]?,
-		usage: Response.Usage
+		usage: Usage
 	)
 
 	/// Returned when the text value of an input audio transcription content part is updated.
@@ -107,6 +112,7 @@ import MetaCodable
 		itemId: String,
 		contentIndex: Int,
 		delta: String,
+        obfuscation: String,
 		logprobs: [LogProb]?
 	)
 
@@ -536,7 +542,7 @@ extension ServerEvent: Identifiable {
 			case let .conversationItemDone(id, _, _): id
 			case let .conversationItemRetrieved(id, _): id
 			case let .conversationItemInputAudioTranscriptionCompleted(id, _, _, _, _, _): id
-			case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _): id
+			case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _, _): id
 			case let .conversationItemInputAudioTranscriptionSegment(id, _, _, _, _, _, _, _): id
 			case let .conversationItemInputAudioTranscriptionFailed(id, _, _, _): id
 			case let .conversationItemTruncated(id, _, _, _): id

diff --git a/Sources/Core/Models/Session.swift b/Sources/Core/Models/Session.swift
@@ -95,7 +95,7 @@ import HelperCoders
 			}
 
 			/// Configuration for turn detection
-			public struct TurnDetection: Codable, Equatable, Hashable, Sendable {
+			@Codable public struct TurnDetection: Equatable, Hashable, Sendable {
 				/// The type of turn detection.
 				public enum VAD: String, Codable, Equatable, Hashable, Sendable {
 					case server = "server_vad"
@@ -108,6 +108,7 @@ import HelperCoders
 				}
 
 				/// Whether or not to automatically generate a response when a VAD stop event occurs.
+                @Default(ifMissing: false)
 				public var createResponse: Bool
 
 				/// Used only for `semantic` mode. The eagerness of the model to respond.
@@ -262,20 +263,21 @@ import HelperCoders
 		public var input: Input
 
 		/// Configuration for output audio.
-		public var output: Output
+		public var output: Output?
 
 		/// Creates a new `Audio` configuration.
 		///
 		/// - Parameter input: Configuration for input audio.
-		/// - Parameter output: Configuration for output audio.
-		public init(input: Input, output: Output) {
+		/// - Parameter output: Configuration for output audio. Output is nil when this is a transcription session
+		public init(input: Input, output: Output?) {
 			self.input = input
 			self.output = output
 		}
 	}
 
 	/// The type of session to create.
-	public let type: String = "realtime"
+    /// Valid values are "realtime" or "transcription"
+	public let type: String
 
 	/// Unique identifier for the session
 	public var id: String?
@@ -289,8 +291,8 @@ import HelperCoders
 	///
 	/// The model can be instructed on response content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently").
 	///
-	/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior.
-	public var instructions: String
+	/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior. Instructions are nil in the event of a Transcription session
+	public var instructions: String?
 
 	/// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
 	///
@@ -301,7 +303,7 @@ import HelperCoders
 	public var modalities: [Modality]?
 
 	/// The Realtime model used for this session.
-	public var model: Model
+	public var model: Model?
 
 	/// Reference to a prompt template and its variables.
 	public var prompt: Prompt?
@@ -317,8 +319,9 @@ import HelperCoders
 	/// Tools available to the model.
 	public var tools: [Tool]?
 
-	public init(id: String? = nil, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
+    public init(id: String? = nil, type: String, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
 		self.id = id
+        self.type = type
 		self.tools = tools
 		self.model = model
 		self.audio = audio

diff --git a/Sources/UI/Conversation.swift b/Sources/UI/Conversation.swift
@@ -93,9 +93,9 @@ public final class Conversation: @unchecked Sendable {
 		try await client.connect(using: request)
 	}
 
-	public func connect(ephemeralKey: String, model: Model = .gptRealtime) async throws {
+	public func connect(ephemeralKey: String) async throws {
 		do {
-			try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
+			try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
 		} catch let error as WebRTCConnector.WebRTCError {
 			guard case .invalidEphemeralKey = error else { throw error }
 			throw ConversationError.invalidEphemeralKey
@@ -176,10 +176,32 @@ private extension Conversation {
 				if let sessionUpdateCallback { try updateSession(withChanges: sessionUpdateCallback) }
 			case let .sessionUpdated(_, session):
 				self.session = session
-			case let .conversationItemCreated(_, item, _):
+            case let .conversationItemAdded(_, item, nil):
+                entries.append(item)
+            case let .conversationItemAdded(_, item, previousItemId?):
+                if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
+                    entries.insert(item, at: entryIndex + 1)
+                } else {
+                    entries.append(item)
+                }
+			case let .conversationItemCreated(_, item, nil):
 				entries.append(item)
+            case let .conversationItemCreated(_, item, previousItemId?):
+                if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
+                    entries.insert(item, at: entryIndex + 1)
+                } else {
+                    entries.append(item)
+                }
 			case let .conversationItemDeleted(_, itemId):
 				entries.removeAll { $0.id == itemId }
+            case let .conversationItemInputAudioTranscriptionDelta(_, itemId, contentIndex, delta, _, _):
+                updateEvent(id: itemId) { message in
+                    guard case let .inputAudio(audio) = message.content[contentIndex] else { return }
+
+                    message.content[contentIndex] = .inputAudio(
+                        .init(audio: audio.audio, transcript: audio.transcript ?? "" + delta)
+                    )
+                }
 			case let .conversationItemInputAudioTranscriptionCompleted(_, itemId, contentIndex, transcript, _, _):
 				updateEvent(id: itemId) { message in
 					guard case let .inputAudio(audio) = message.content[contentIndex] else { return }

diff --git a/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift b/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift
@@ -11,7 +11,7 @@ public extension RealtimeAPI {
 	}
 
 	/// Connect to the OpenAI WebRTC Realtime API with the given authentication token and model.
-	static func webRTC(ephemeralKey: String, model: Model = .gptRealtime) async throws -> RealtimeAPI {
-		return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
+	static func webRTC(ephemeralKey: String) async throws -> RealtimeAPI {
+		return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
 	}
 }
diff --git a/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift b/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift
@@ -7,10 +7,8 @@ import FoundationNetworking
 fileprivate let baseURL = URL(string: "https://api.openai.com/v1/realtime/calls")!
 
 package extension URLRequest {
-	static func webRTCConnectionRequest(ephemeralKey: String, model: Model) -> URLRequest {
-		var request = URLRequest(url: baseURL.appending(queryItems: [
-			URLQueryItem(name: "model", value: model.rawValue),
-		]))
+	static func webRTCConnectionRequest(ephemeralKey: String) -> URLRequest {
+		var request = URLRequest(url: baseURL)
 
 		request.httpMethod = "POST"
 		request.setValue("Bearer \(ephemeralKey)", forHTTPHeaderField: "Authorization")

diff --git a/Sources/WebRTC/WebRTCConnector.swift b/Sources/WebRTC/WebRTCConnector.swift
@@ -196,6 +196,7 @@ extension WebRTCConnector: LKRTCDataChannelDelegate {
 		do { try stream.yield(decoder.decode(ServerEvent.self, from: buffer.data)) }
 		catch {
 			print("Failed to decode server event: \(String(data: buffer.data, encoding: .utf8) ?? "<invalid utf8>")")
+            print("Error: \(error)")
 			stream.finish(throwing: error)
 		}
 	}