Skip to content

Commit 5626e1d

Browse files
authored
Update TranscriptionResult API
* Make `TranscriptionResult` an open class and ensure thread safety * Add `AudioProcessing.startStreamingRecordingLive` to record with an `AsyncStream`
1 parent 4ef384e commit 5626e1d

File tree

6 files changed

+154
-34
lines changed

6 files changed

+154
-34
lines changed

.github/workflows/development-tests.yml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ jobs:
1414
cancel-in-progress: true
1515
uses: ./.github/workflows/unit-tests.yml
1616
with:
17-
ios-version: "18.6"
18-
ios-device: "iPhone 16"
19-
watchos-version: "11.5"
20-
visionos-version: "2.5"
21-
macos-runner: "macos-15"
17+
ios-version: "26.1"
18+
ios-device: "iPhone 17"
19+
watchos-version: "26.1"
20+
visionos-version: "26.1"
21+
macos-runner: "macos-26"
22+
xcode-version: "latest-stable"
2223

2324
check-approvals:
2425
runs-on: ubuntu-latest
@@ -50,18 +51,18 @@ jobs:
5051
strategy:
5152
matrix:
5253
include:
53-
- os: macos-13-xlarge
54-
ios-version: "17.2"
55-
ios-device: "iPhone 14"
56-
watchos-version: "10.2"
57-
visionos-version: "1.0"
58-
xcode-version: "15.2"
5954
- os: macos-14
6055
ios-version: "17.2"
6156
ios-device: "iPhone 15"
6257
watchos-version: "10.2"
6358
visionos-version: "1.0"
64-
xcode-version: "15.2"
59+
xcode-version: "16.1"
60+
- os: macos-15
61+
ios-version: "18.5"
62+
ios-device: "iPhone 16"
63+
watchos-version: "11.5"
64+
visionos-version: "2.5"
65+
xcode-version: "16.4"
6566
uses: ./.github/workflows/unit-tests.yml
6667
with:
6768
macos-runner: ${{ matrix.os }}

.github/workflows/release-tests.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,24 @@ jobs:
1111
strategy:
1212
matrix:
1313
include:
14-
- os: macos-13-xlarge
15-
ios-version: "17.2" # TODO: Download older simulators for macOS 13
16-
ios-device: "iPhone 14"
17-
watchos-version: "10.2"
18-
visionos-version: "1.0"
19-
xcode-version: "15.2"
2014
- os: macos-14
2115
ios-version: "17.5"
2216
ios-device: "iPhone 15"
2317
watchos-version: "10.2"
2418
visionos-version: "1.0"
25-
xcode-version: "15.4"
19+
xcode-version: "16.1"
2620
- os: macos-15
27-
ios-version: "18.6" # Latest available version
21+
ios-version: "18.5"
2822
ios-device: "iPhone 16"
2923
watchos-version: "11.5"
3024
visionos-version: "2.5"
25+
xcode-version: "16.4"
26+
- os: macos-26
27+
ios-version: "26.1"
28+
ios-device: "iPhone 17"
29+
watchos-version: "26.1"
30+
visionos-version: "26.1"
31+
macos-runner: "macos-26"
3132
xcode-version: "latest-stable"
3233
uses: ./.github/workflows/unit-tests.yml
3334
with:

.github/workflows/unit-tests.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ jobs:
4343
}
4444
- {
4545
name: "watchOS",
46-
condition: "${{ inputs.macos-runner == 'macos-15' }}",
46+
condition: "${{ inputs.macos-runner == 'macos-26' }}",
4747
clean-destination: "generic/platform=watchOS",
48-
test-destination: "platform=watchOS Simulator,OS=${{ inputs.watchos-version }},name=Apple Watch Ultra 2 (49mm)",
48+
test-destination: "platform=watchOS Simulator,OS=${{ inputs.watchos-version }},name=Apple Watch Ultra 3 (49mm)",
4949
}
5050
- {
5151
name: "visionOS",
52-
condition: "${{ inputs.macos-runner == 'macos-15' }}",
52+
condition: "${{ inputs.macos-runner == 'macos-26' }}",
5353
clean-destination: "generic/platform=visionOS",
5454
test-destination: "platform=visionOS Simulator,OS=${{ inputs.visionos-version }},name=Apple Vision Pro",
5555
}
@@ -83,7 +83,7 @@ jobs:
8383
echo "Destinations for testing:"
8484
xcodebuild test-without-building -testPlan UnitTestsPlan -scheme whisperkit-Package -showdestinations
8585
- name: Boot Simulator and Wait
86-
if: ${{ matrix.run-config['condition'] == true }} && ${{ matrix.run-config['name'] != 'macOS' }} && ${{ inputs.macos-runner == 'macos-15' }}
86+
if: ${{ matrix.run-config['condition'] == true }} && ${{ matrix.run-config['name'] != 'macOS' }} && ${{ inputs.macos-runner == 'macos-26' }}
8787
# Slower runners require some time to fully boot the simulator
8888
# Parse the simulator name from the destination string, boot it, and wait
8989
run: |
@@ -104,4 +104,4 @@ jobs:
104104
name: test-results-${{ matrix.run-config['name']}}-on-${{ inputs.macos-runner }}
105105
path: |
106106
~/Library/Developer/Xcode/DerivedData/**/Logs/Test/*.xcresult
107-
retention-days: 5
107+
retention-days: 5

Sources/WhisperKit/Core/Audio/AudioProcessor.swift

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ public protocol AudioProcessing {
9696
/// Starts recording audio from the specified input device, resetting the previous state
9797
func startRecordingLive(inputDeviceID: DeviceID?, callback: (([Float]) -> Void)?) throws
9898

99+
/// Starts live audio recording and provides an async stream of audio samples.
100+
///
101+
/// - Parameter inputDeviceID: The device ID of the input audio device to use for recording.
102+
/// This parameter is only valid on macOS; iOS always uses the default input device.
103+
/// - Returns: A tuple containing the async stream of audio sample buffers and the stream's continuation.
104+
func startStreamingRecordingLive(inputDeviceID: DeviceID?) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation)
105+
99106
/// Pause recording
100107
func pauseRecording()
101108

@@ -128,10 +135,14 @@ public extension AudioProcessing {
128135
try AudioProcessor.loadAudio(fromPath: audioFilePath)
129136
}.value
130137
}
131-
138+
132139
func startRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)?) throws {
133140
try startRecordingLive(inputDeviceID: inputDeviceID, callback: callback)
134141
}
142+
143+
func startStreamingRecordingLive(inputDeviceID: DeviceID? = nil) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation) {
144+
return startStreamingRecordingLive(inputDeviceID: inputDeviceID)
145+
}
135146

136147
func resumeRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)?) throws {
137148
try resumeRecordingLive(inputDeviceID: inputDeviceID, callback: callback)
@@ -1023,6 +1034,28 @@ public extension AudioProcessor {
10231034
lastInputDevice = inputDeviceID
10241035
}
10251036

1037+
/// Starts live audio recording and returns an async stream that yields sample buffers.
1038+
/// Recording stops automatically when the stream terminates.
1039+
func startStreamingRecordingLive(inputDeviceID: DeviceID? = nil) -> (AsyncThrowingStream<[Float], Error>, AsyncThrowingStream<[Float], Error>.Continuation) {
1040+
let (stream, continuation) = AsyncThrowingStream<[Float], Error>.makeStream(bufferingPolicy: .unbounded)
1041+
1042+
continuation.onTermination = { [weak self] _ in
1043+
guard let self = self else { return }
1044+
self.audioBufferCallback = nil
1045+
self.stopRecording()
1046+
}
1047+
1048+
do {
1049+
try self.startRecordingLive(inputDeviceID: inputDeviceID) { @Sendable floats in
1050+
continuation.yield(floats)
1051+
}
1052+
} catch {
1053+
continuation.finish(throwing: error)
1054+
}
1055+
1056+
return (stream, continuation)
1057+
}
1058+
10261059
func resumeRecordingLive(inputDeviceID: DeviceID? = nil, callback: (([Float]) -> Void)? = nil) throws {
10271060
try? setupAudioSessionForDevice()
10281061

Sources/WhisperKit/Core/Models.swift

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -501,14 +501,17 @@ public struct DecodingResult {
501501
}
502502
}
503503

504-
// Structs
505-
506-
public struct TranscriptionResult: Codable, Sendable {
507-
public var text: String
508-
public var segments: [TranscriptionSegment]
509-
public var language: String
510-
public var timings: TranscriptionTimings
511-
public var seekTime: Float?
504+
/// Reference-type container for transcription output.
505+
/// The stored properties stay thread-safe because each one uses
506+
/// `TranscriptionPropertyLock`, so reads/writes hop through a private `NSLock`
507+
/// before the value is accessed, making this shared `@unchecked Sendable` class
508+
/// safe to hand across concurrent contexts.
509+
open class TranscriptionResult: Codable, @unchecked Sendable {
510+
@TranscriptionPropertyLock public var text: String
511+
@TranscriptionPropertyLock public var segments: [TranscriptionSegment]
512+
@TranscriptionPropertyLock public var language: String
513+
@TranscriptionPropertyLock public var timings: TranscriptionTimings
514+
@TranscriptionPropertyLock public var seekTime: Float?
512515

513516
public init(
514517
text: String,
@@ -600,12 +603,38 @@ public struct TranscriptionResult: Codable, Sendable {
600603
}
601604
}
602605

606+
/// Value-type equivalent of `TranscriptionResult` without property locking.
607+
public struct TranscriptionResultStruct: Codable, Sendable {
608+
public var text: String
609+
public var segments: [TranscriptionSegment]
610+
public var language: String
611+
public var timings: TranscriptionTimings
612+
public var seekTime: Float?
613+
614+
public init(
615+
text: String,
616+
segments: [TranscriptionSegment],
617+
language: String,
618+
timings: TranscriptionTimings,
619+
seekTime: Float? = nil
620+
) {
621+
self.text = text
622+
self.segments = segments
623+
self.language = language
624+
self.timings = timings
625+
self.seekTime = seekTime
626+
}
627+
}
628+
629+
603630
public extension TranscriptionResult {
604631
var allWords: [WordTiming] {
605632
return segments.compactMap { $0.words }.flatMap { $0 }
606633
}
607634
}
608635

636+
// Structs
637+
609638
public struct TranscriptionSegment: Hashable, Codable, Sendable {
610639
public var id: Int
611640
public var seek: Int

Sources/WhisperKit/Utilities/Concurrency.swift

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Copyright © 2024 Argmax, Inc. All rights reserved.
33

44
import Foundation
5+
import os.lock
56

67
/// An actor that provides thread-safe early stopping functionality using UUIDs as keys
78
public actor EarlyStopActor {
@@ -31,3 +32,58 @@ public actor EarlyStopActor {
3132
return shouldStop.removeValue(forKey: uuid)
3233
}
3334
}
35+
36+
/// Serializes access to a value with an `os_unfair_lock` so mutation stays
37+
/// thread-safe. The wrapper is used by `TranscriptionResult`, which is marked
38+
/// `@unchecked Sendable`; guarding each property with this lock helps keep the
39+
/// result instance safe when shared across concurrent contexts.
40+
@propertyWrapper
41+
public struct TranscriptionPropertyLock<Value: Codable & Sendable>: Sendable, Codable {
42+
private let lock: UnfairLock
43+
private var value: Value
44+
45+
public init(wrappedValue: Value) {
46+
self.lock = UnfairLock()
47+
self.value = wrappedValue
48+
}
49+
public init(from decoder: Swift.Decoder) throws {
50+
self.lock = UnfairLock()
51+
self.value = try Value(from: decoder)
52+
}
53+
54+
public func encode(to encoder: Encoder) throws {
55+
try lock.withLock {
56+
try value.encode(to: encoder)
57+
}
58+
59+
}
60+
61+
public var wrappedValue: Value {
62+
get {
63+
lock.withLock {
64+
return value
65+
}
66+
}
67+
set {
68+
lock.withLock {
69+
value = newValue
70+
}
71+
}
72+
}
73+
}
74+
75+
/// Thin wrapper around `os_unfair_lock` that exposes a Swift-friendly
76+
/// `withLock` helper. This lock is non-reentrant and optimized for low
77+
/// contention, matching the semantics of Core Foundation’s unfair lock.
78+
@usableFromInline
79+
final class UnfairLock: @unchecked Sendable {
80+
@usableFromInline
81+
var lock = os_unfair_lock()
82+
83+
@inlinable
84+
func withLock<T>(_ body: () throws -> T) rethrows -> T {
85+
os_unfair_lock_lock(&lock)
86+
defer { os_unfair_lock_unlock(&lock) }
87+
return try body()
88+
}
89+
}

0 commit comments

Comments
 (0)