@@ -10,14 +10,154 @@ import AVFoundation
10
10
11
11
@objc
12
12
public class AppleSpeechImpl : NSObject {
13
+ private let speechSynthesizer = AVSpeechSynthesizer ( )
14
+
13
15
@objc
14
- public func isAvailable( ) -> Bool {
15
- return true // AVSpeechSynthesizer is available on all iOS versions we support
16
+ public func getVoices( _ resolve: @escaping ( [ Any ] ) -> Void , reject: @escaping ( String , String , Error ? ) -> Void ) {
17
+ Task {
18
+ if #available( iOS 17 . 0 , * ) {
19
+ await withCheckedContinuation { continuation in
20
+ AVSpeechSynthesizer . requestPersonalVoiceAuthorization { _ in
21
+ continuation. resume ( )
22
+ }
23
+ }
24
+ }
25
+
26
+ let allVoices = AVSpeechSynthesisVoice . speechVoices ( )
27
+ let voiceInfos = allVoices. map { $0. toDictionary ( ) }
28
+ resolve ( voiceInfos)
29
+ }
16
30
}
17
31
18
32
@objc
19
- public func generate( _ text: String , options: [ String : Any ] ? , resolve: @escaping ( Any ? ) -> Void , reject: @escaping ( String , String , Error ? ) -> Void ) {
20
- // TODO: Implement text-to-speech functionality
21
- resolve ( nil )
33
+ public func generateAudio( _ text: String , options: [ String : Any ] , resolve: @escaping ( Data ) -> Void , reject: @escaping ( String , String , Error ? ) -> Void ) {
34
+ let utterance = AVSpeechUtterance ( string: text)
35
+
36
+ utterance. voice = if let voiceId = options [ " voice " ] as? String {
37
+ AVSpeechSynthesisVoice ( identifier: voiceId)
38
+ } else if let language = options [ " language " ] as? String {
39
+ AVSpeechSynthesisVoice ( language: language)
40
+ } else {
41
+ nil
42
+ }
43
+
44
+ var collectedBuffers : [ AVAudioPCMBuffer ] = [ ]
45
+
46
+ var resolveCallback : ( ( Data ) -> Void ) ? = resolve
47
+ var rejectCallback : ( ( String , String , Error ? ) -> Void ) ? = reject
48
+
49
+ speechSynthesizer. write ( utterance) { buffer in
50
+ guard let pcm = buffer as? AVAudioPCMBuffer else { return }
51
+
52
+ if pcm. frameLength == 0 {
53
+ guard let resolve = resolveCallback, let reject = rejectCallback else { return }
54
+
55
+ do {
56
+ let data = try AppleSpeechImpl . wavData ( from: collectedBuffers)
57
+ resolve ( data)
58
+ } catch {
59
+ reject ( " AppleSpeech " , " Error generating WAV data " , error)
60
+ }
61
+
62
+ resolveCallback = nil
63
+ rejectCallback = nil
64
+ return
65
+ }
66
+
67
+ collectedBuffers. append ( pcm)
68
+ }
69
+ }
70
+ }
71
+
72
+ extension AppleSpeechImpl {
73
+ /// Build a single WAV file by generating the header using the first buffer's
74
+ /// format and then concatenating the raw PCM payloads of all subsequent buffers.
75
+ /// Assumes all buffers share the same format and are WAV-compatible.
76
+ static func wavData( from buffers: [ AVAudioPCMBuffer ] ) throws -> Data {
77
+ guard let first = buffers. first else {
78
+ throw NSError ( domain: " WAV " , code: - 2 ,
79
+ userInfo: [ NSLocalizedDescriptionKey: " No audio buffers collected " ] )
80
+ }
81
+
82
+ let channels = Int ( first. format. channelCount)
83
+ let sampleRate = Int ( first. format. sampleRate)
84
+ let isFloat32 = ( first. format. commonFormat == . pcmFormatFloat32)
85
+ let bitsPerSample = isFloat32 ? 32 : 16
86
+ let byteRate = sampleRate * channels * bitsPerSample / 8
87
+ let blockAlign = channels * bitsPerSample / 8
88
+
89
+ // Helper: little-endian encoders
90
+ func le16( _ v: Int ) -> [ UInt8 ] { [ UInt8 ( v & 0xff ) , UInt8 ( ( v >> 8 ) & 0xff ) ] }
91
+ func le32( _ v: Int ) -> [ UInt8 ] {
92
+ [ UInt8 ( v & 0xff ) , UInt8 ( ( v >> 8 ) & 0xff ) ,
93
+ UInt8 ( ( v >> 16 ) & 0xff ) , UInt8 ( ( v >> 24 ) & 0xff ) ]
94
+ }
95
+
96
+ // Estimate capacity from actual valid bytes in each buffer
97
+ let estimatedCapacity = buffers. reduce ( 0 ) { acc, buf in
98
+ let audioBuffer = buf. audioBufferList. pointee. mBuffers
99
+ return acc + Int( audioBuffer. mDataByteSize)
100
+ }
101
+
102
+ var payload = Data ( )
103
+ payload. reserveCapacity ( estimatedCapacity)
104
+
105
+ // Concatenate payloads using mDataByteSize, which is kept in sync with frameLength
106
+ for buf in buffers {
107
+ let m = buf. audioBufferList. pointee. mBuffers
108
+ let byteCount = Int ( m. mDataByteSize)
109
+ if let p = m. mData {
110
+ payload. append ( contentsOf: UnsafeRawBufferPointer ( start: p, count: byteCount) )
111
+ }
112
+ }
113
+
114
+ let dataChunkSize = payload. count
115
+ let fmtChunkSize = 16
116
+ let riffChunkSize = 4 + ( 8 + fmtChunkSize) + ( 8 + dataChunkSize)
117
+
118
+ var header = Data ( )
119
+ header. append ( contentsOf: Array ( " RIFF " . utf8) )
120
+ header. append ( contentsOf: le32 ( riffChunkSize) )
121
+ header. append ( contentsOf: Array ( " WAVE " . utf8) )
122
+
123
+ // fmt chunk
124
+ header. append ( contentsOf: Array ( " fmt " . utf8) )
125
+ header. append ( contentsOf: le32 ( fmtChunkSize) )
126
+ header. append ( contentsOf: le16 ( isFloat32 ? 3 : 1 ) ) // 3 = IEEE float, 1 = PCM
127
+ header. append ( contentsOf: le16 ( channels) )
128
+ header. append ( contentsOf: le32 ( sampleRate) )
129
+ header. append ( contentsOf: le32 ( byteRate) )
130
+ header. append ( contentsOf: le16 ( blockAlign) )
131
+ header. append ( contentsOf: le16 ( bitsPerSample) )
132
+
133
+ // data chunk
134
+ header. append ( contentsOf: Array ( " data " . utf8) )
135
+ header. append ( contentsOf: le32 ( dataChunkSize) )
136
+
137
+ var out = Data ( capacity: header. count + payload. count)
138
+ out. append ( header)
139
+ out. append ( payload)
140
+
141
+ return out
142
+ }
143
+ }
144
+
145
+ extension AVSpeechSynthesisVoice {
146
+ func toDictionary( ) -> [ String : Any ] {
147
+ var data = [
148
+ " identifier " : self . identifier,
149
+ " name " : self . name,
150
+ " language " : self . language,
151
+ " quality " : quality,
152
+ " isPersonalVoice " : false ,
153
+ " isNoveltyVoice " : false
154
+ ] as [ String : Any ]
155
+
156
+ if #available( iOS 17 . 0 , * ) {
157
+ data [ " isPersonalVoice " ] = self . voiceTraits. contains ( . isPersonalVoice)
158
+ data [ " isNoveltyVoice " ] = self . voiceTraits. contains ( . isNoveltyVoice)
159
+ }
160
+
161
+ return data
22
162
}
23
163
}
0 commit comments