Skip to content

Commit cb00a08

Browse files
authored
Update TranscriptionResult API (#376)
* Make `TranscriptionResult` an open class and ensure thread safety * Add `AudioProcessing.startStreamingRecordingLive` to record with an `AsyncStream`
1 parent e2702d9 commit cb00a08

File tree

4 files changed

+128
-10
lines changed

4 files changed

+128
-10
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,4 @@ jobs:
104104
name: test-results-${{ matrix.run-config['name']}}-on-${{ inputs.macos-runner }}
105105
path: |
106106
~/Library/Developer/Xcode/DerivedData/**/Logs/Test/*.xcresult
107-
retention-days: 5
107+
retention-days: 5

Sources/WhisperKit/Core/Audio/AudioProcessor.swift

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ public protocol AudioProcessing {
9696
/// Starts recording audio from the specified input device, resetting the previous state
9797
func startRecordingLive(inputDeviceID: DeviceID?, callback: (([Float]) -> Void)?) throws
9898

99+
/// Starts live audio recording and provides an async stream of audio samples.
100+
///
101+
/// - Parameter inputDeviceID: The device ID of the input audio device to use for recording.
102+
/// This parameter is only valid on macOS; iOS always uses the default input device.
103+
/// - Returns: A tuple containing the async stream of audio sample buffers and the stream's continuation.
104+
func startStreamingRecordingLive(inputDeviceID: DeviceID?) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation)
105+
99106
/// Pause recording
100107
func pauseRecording()
101108

@@ -128,10 +135,14 @@ public extension AudioProcessing {
128135
try AudioProcessor.loadAudio(fromPath: audioFilePath)
129136
}.value
130137
}
131-
138+
132139
func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)?) throws {
133140
try startRecordingLive(inputDeviceID: inputDeviceID, callback: callback)
134141
}
142+
143+
func startStreamingRecordingLive(inputDeviceID: DeviceID? = nil) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation) {
144+
return startStreamingRecordingLive(inputDeviceID: inputDeviceID)
145+
}
135146

136147
func resumeRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)?) throws {
137148
try resumeRecordingLive(inputDeviceID: inputDeviceID, callback: callback)
@@ -1023,6 +1034,28 @@ public extension AudioProcessor {
10231034
lastInputDevice = inputDeviceID
10241035
}
10251036

1037+
/// Starts live audio recording and returns an async stream that yields sample buffers.
1038+
/// Recording stops automatically when the stream terminates.
1039+
func startStreamingRecordingLive(inputDeviceID: DeviceID? = nil) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation) {
1040+
let (stream, continuation) = AsyncThrowingStream<[Float], Error>.makeStream(bufferingPolicy: .unbounded)
1041+
1042+
continuation.onTermination = { [weak self] _ in
1043+
guard let self = self else { return }
1044+
self.audioBufferCallback = nil
1045+
self.stopRecording()
1046+
}
1047+
1048+
do {
1049+
try self.startRecordingLive(inputDeviceID: inputDeviceID) { @Sendable floats in
1050+
continuation.yield(floats)
1051+
}
1052+
} catch {
1053+
continuation.finish(throwing: error)
1054+
}
1055+
1056+
return (stream, continuation)
1057+
}
1058+
10261059
func resumeRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)? = nil) throws {
10271060
try? setupAudioSessionForDevice()
10281061

Sources/WhisperKit/Core/Models.swift

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -501,14 +501,17 @@ public struct DecodingResult {
501501
}
502502
}
503503

504-
// Structs
505-
506-
public struct TranscriptionResult: Codable, Sendable {
507-
public var text: String
508-
public var segments: [TranscriptionSegment]
509-
public var language: String
510-
public var timings: TranscriptionTimings
511-
public var seekTime: Float?
504+
/// Reference-type container for transcription output.
505+
/// The stored properties stay thread-safe because each one uses
506+
/// `TranscriptionPropertyLock`, so reads/writes hop through a private `NSLock`
507+
/// before the value is accessed, making this shared `@unchecked Sendable` class
508+
/// safe to hand across concurrent contexts.
509+
open class TranscriptionResult: Codable, @unchecked Sendable {
510+
@TranscriptionPropertyLock public var text: String
511+
@TranscriptionPropertyLock public var segments: [TranscriptionSegment]
512+
@TranscriptionPropertyLock public var language: String
513+
@TranscriptionPropertyLock public var timings: TranscriptionTimings
514+
@TranscriptionPropertyLock public var seekTime: Float?
512515

513516
public init(
514517
text: String,
@@ -600,12 +603,38 @@ public struct TranscriptionResult: Codable, Sendable {
600603
}
601604
}
602605

606+
/// Value-type equivalent of `TranscriptionResult` without property locking.
607+
public struct TranscriptionResultStruct: Codable, Sendable {
608+
public var text: String
609+
public var segments: [TranscriptionSegment]
610+
public var language: String
611+
public var timings: TranscriptionTimings
612+
public var seekTime: Float?
613+
614+
public init(
615+
text: String,
616+
segments: [TranscriptionSegment],
617+
language: String,
618+
timings: TranscriptionTimings,
619+
seekTime: Float? = nil
620+
) {
621+
self.text = text
622+
self.segments = segments
623+
self.language = language
624+
self.timings = timings
625+
self.seekTime = seekTime
626+
}
627+
}
628+
629+
603630
public extension TranscriptionResult {
604631
var allWords: [WordTiming] {
605632
return segments.compactMap { $0.words }.flatMap { $0 }
606633
}
607634
}
608635

636+
// Structs
637+
609638
public struct TranscriptionSegment: Hashable, Codable, Sendable {
610639
public var id: Int
611640
public var seek: Int

Sources/WhisperKit/Utilities/Concurrency.swift

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Copyright © 2024 Argmax, Inc. All rights reserved.
33

44
import Foundation
5+
import os.lock
56

67
/// An actor that provides thread-safe early stopping functionality using UUIDs as keys
78
public actor EarlyStopActor {
@@ -31,3 +32,58 @@ public actor EarlyStopActor {
3132
return shouldStop.removeValue(forKey: uuid)
3233
}
3334
}
35+
36+
/// Serializes access to a value with an `os_unfair_lock` so mutation stays
37+
/// thread-safe. The wrapper is used by `TranscriptionResult`, which is marked
38+
/// `@unchecked Sendable`; guarding each property with this lock helps keep the
39+
/// result instance safe when shared across concurrent contexts.
40+
@propertyWrapper
41+
public struct TranscriptionPropertyLock<Value: Codable & Sendable>: Sendable, Codable {
42+
private let lock: UnfairLock
43+
private var value: Value
44+
45+
public init(wrappedValue: Value) {
46+
self.lock = UnfairLock()
47+
self.value = wrappedValue
48+
}
49+
public init(from decoder: Swift.Decoder) throws {
50+
self.lock = UnfairLock()
51+
self.value = try Value(from: decoder)
52+
}
53+
54+
public func encode(to encoder: Encoder) throws {
55+
try lock.withLock {
56+
try value.encode(to: encoder)
57+
}
58+
59+
}
60+
61+
public var wrappedValue: Value {
62+
get {
63+
lock.withLock {
64+
return value
65+
}
66+
}
67+
set {
68+
lock.withLock {
69+
value = newValue
70+
}
71+
}
72+
}
73+
}
74+
75+
/// Thin wrapper around `os_unfair_lock` that exposes a Swift-friendly
76+
/// `withLock` helper. This lock is non-reentrant and optimized for low
77+
/// contention, matching the semantics of Core Foundation’s unfair lock.
78+
@usableFromInline
79+
final class UnfairLock: @unchecked Sendable {
80+
@usableFromInline
81+
var lock = os_unfair_lock()
82+
83+
@inlinable
84+
func withLock<T>(_ body: () throws -> T) rethrows -> T {
85+
os_unfair_lock_lock(&lock)
86+
defer { os_unfair_lock_unlock(&lock) }
87+
return try body()
88+
}
89+
}

0 commit comments

Comments
 (0)