Skip to content

Commit 992ef7b

Browse files
authored
feat: transcribe (#89)
* init * feat: update, array buffer support * tweaks * tweaks * tweaks
1 parent 4dda3ba commit 992ef7b

File tree

14 files changed

+615
-83
lines changed

14 files changed

+615
-83
lines changed

apps/example-apple/src/schema-demos.ts

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
import { createAppleProvider } from '@react-native-ai/apple'
2-
import NativeAppleEmbeddings from '@react-native-ai/apple/src/NativeAppleEmbeddings'
3-
import {
4-
cosineSimilarity,
5-
embed,
6-
embedMany,
7-
generateObject,
8-
generateText,
9-
streamText,
10-
tool,
11-
} from 'ai'
2+
import { experimental_transcribe, generateObject, streamText, tool } from 'ai'
123
import { z } from 'zod'
134

145
const getWeather = tool({
@@ -125,6 +116,18 @@ export async function basicArrayDemo() {
125116
return response.object
126117
}
127118

119+
export async function basicTranscribeDemo() {
120+
const file = await fetch(
121+
'https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav'
122+
)
123+
const audio = await file.arrayBuffer()
124+
const response = await experimental_transcribe({
125+
model: apple.transcriptionModel(),
126+
audio,
127+
})
128+
return response.text
129+
}
130+
128131
export const schemaDemos = {
129132
basicString: { name: 'String', func: basicStringDemo },
130133
basicStringStreaming: {
@@ -136,6 +139,7 @@ export const schemaDemos = {
136139
basicBoolean: { name: 'Boolean', func: basicBooleanDemo },
137140
basicObject: { name: 'Object', func: basicObjectDemo },
138141
basicArray: { name: 'Array', func: basicArrayDemo },
142+
basicTranscribe: { name: 'Transcribe', func: basicTranscribeDemo },
139143
}
140144

141145
export type DemoKey = keyof typeof schemaDemos

apps/example-apple/src/screens/SampleRuns.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,4 @@ const styles = StyleSheet.create({
119119
spinner: {
120120
marginRight: 8,
121121
},
122-
})
122+
})

packages/apple-llm/ios/embeddings/AppleEmbeddingsImpl.swift

Lines changed: 65 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -75,126 +75,128 @@ public class AppleEmbeddingsImpl: NSObject {
7575
case unsupportedLanguage(String)
7676
}
7777

78+
@available(iOS 16, *)
7879
private func convertToNLLanguage(_ languageString: String) throws -> NLLanguage {
79-
switch languageString.lowercased() {
80-
case "english", "en":
80+
let locale = Locale(identifier: languageString)
81+
guard let languageCode = locale.language.languageCode else {
82+
throw LanguageConversionError.unsupportedLanguage("Cannot extract language code from locale: \(locale)")
83+
}
84+
85+
switch languageCode {
86+
case "en":
8187
return .english
82-
case "french", "fr":
88+
case "fr":
8389
return .french
84-
case "spanish", "es":
90+
case "es":
8591
return .spanish
86-
case "german", "de":
92+
case "de":
8793
return .german
88-
case "italian", "it":
94+
case "it":
8995
return .italian
90-
case "portuguese", "pt":
96+
case "pt":
9197
return .portuguese
92-
case "russian", "ru":
98+
case "ru":
9399
return .russian
94-
case "turkish", "tr":
100+
case "tr":
95101
return .turkish
96-
case "chinese", "zh":
102+
case "zh":
97103
return .simplifiedChinese
98-
case "arabic", "ar":
104+
case "ar":
99105
return .arabic
100-
case "czech", "cs":
106+
case "cs":
101107
return .czech
102-
case "dutch", "nl":
108+
case "nl":
103109
return .dutch
104-
case "finnish", "fi":
110+
case "fi":
105111
return .finnish
106-
case "hebrew", "he":
112+
case "he":
107113
return .hebrew
108-
case "hindi", "hi":
114+
case "hi":
109115
return .hindi
110-
case "hungarian", "hu":
116+
case "hu":
111117
return .hungarian
112-
case "icelandic", "is":
118+
case "is":
113119
return .icelandic
114-
case "indonesian", "id":
120+
case "id":
115121
return .indonesian
116-
case "japanese", "ja":
122+
case "ja":
117123
return .japanese
118-
case "korean", "ko":
124+
case "ko":
119125
return .korean
120-
case "malay", "ms":
126+
case "ms":
121127
return .malay
122-
case "norwegian", "no":
128+
case "no":
123129
return .norwegian
124-
case "polish", "pl":
130+
case "pl":
125131
return .polish
126-
case "romanian", "ro":
132+
case "ro":
127133
return .romanian
128-
case "slovak", "sk":
134+
case "sk":
129135
return .slovak
130-
case "swedish", "sv":
136+
case "sv":
131137
return .swedish
132-
case "thai", "th":
138+
case "th":
133139
return .thai
134-
case "ukrainian", "uk":
140+
case "uk":
135141
return .ukrainian
136-
case "amharic", "am":
142+
case "am":
137143
return .amharic
138-
case "armenian", "hy":
144+
case "hy":
139145
return .armenian
140-
case "bengali", "bn":
146+
case "bn":
141147
return .bengali
142-
case "bulgarian", "bg":
148+
case "bg":
143149
return .bulgarian
144-
case "burmese", "my":
150+
case "my":
145151
return .burmese
146-
case "catalan", "ca":
152+
case "ca":
147153
return .catalan
148-
case "cherokee", "chr":
154+
case "chr":
149155
return .cherokee
150-
case "croatian", "hr":
156+
case "hr":
151157
return .croatian
152-
case "danish", "da":
158+
case "da":
153159
return .danish
154-
case "georgian", "ka":
160+
case "ka":
155161
return .georgian
156-
case "greek", "el":
162+
case "el":
157163
return .greek
158-
case "gujarati", "gu":
164+
case "gu":
159165
return .gujarati
160-
case "kannada", "kn":
166+
case "kn":
161167
return .kannada
162-
case "khmer", "km":
168+
case "km":
163169
return .khmer
164-
case "lao", "lo":
170+
case "lo":
165171
return .lao
166-
case "malayalam", "ml":
172+
case "ml":
167173
return .malayalam
168-
case "marathi", "mr":
174+
case "mr":
169175
return .marathi
170-
case "mongolian", "mn":
176+
case "mn":
171177
return .mongolian
172-
case "oriya", "or":
178+
case "or":
173179
return .oriya
174-
case "persian", "fa":
180+
case "fa":
175181
return .persian
176-
case "punjabi", "pa":
182+
case "pa":
177183
return .punjabi
178-
case "sinhalese", "si":
184+
case "si":
179185
return .sinhalese
180-
case "tamil", "ta":
186+
case "ta":
181187
return .tamil
182-
case "telugu", "te":
188+
case "te":
183189
return .telugu
184-
case "tibetan", "bo":
190+
case "bo":
185191
return .tibetan
186-
case "urdu", "ur":
192+
case "ur":
187193
return .urdu
188-
case "vietnamese", "vi":
194+
case "vi":
189195
return .vietnamese
190-
case "kazakh", "kk":
191-
if #available(iOS 16.0, *) {
192-
return .kazakh
193-
} else {
194-
throw LanguageConversionError.unsupportedLanguage("\(languageString) (requires iOS 16+)")
195-
}
196+
case "kk":
197+
return .kazakh
196198
default:
197-
throw LanguageConversionError.unsupportedLanguage(languageString)
199+
throw LanguageConversionError.unsupportedLanguage("\(languageCode)")
198200
}
199201
}
200202

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//
2+
// AppleSpeech.mm
3+
// AppleLLM
4+
//
5+
// Created by Mike Grabowski on 01/08/2025.
6+
//
7+
8+
#if __has_include("AppleLLM/AppleLLM-Swift.h")
9+
#import "AppleLLM/AppleLLM-Swift.h"
10+
#else
11+
#import "AppleLLM-Swift.h"
12+
#endif
13+
14+
#import <React/RCTCallInvokerModule.h>
15+
#import <React/RCTCallInvoker.h>
16+
#import <ReactCommon/RCTTurboModule.h>
17+
18+
#import <jsi/jsi.h>
19+
20+
#import <NativeAppleLLM/NativeAppleLLM.h>
21+
22+
@interface AppleSpeech : NativeAppleSpeechSpecBase <NativeAppleSpeechSpec, RCTCallInvokerModule>
23+
@property (strong, nonatomic) AppleSpeechImpl *speech;
24+
@end
25+
26+
using namespace facebook;
27+
using namespace JS::NativeAppleLLM;
28+
29+
@implementation AppleSpeech
30+
31+
@synthesize callInvoker;
32+
33+
- (instancetype)init {
34+
self = [super init];
35+
if (self) {
36+
_speech = [AppleSpeechImpl new];
37+
}
38+
return self;
39+
}
40+
41+
+ (NSString *)moduleName {
42+
return @"NativeAppleSpeech";
43+
}
44+
45+
- (void)installTranscribeFunc:(std::shared_ptr<facebook::react::CallInvoker>)jsInvoker {
46+
AppleSpeechImpl *speechModule = _speech;
47+
48+
auto runOnJS = [jsInvoker](std::function<void()>&& f) {
49+
jsInvoker->invokeAsync(std::move(f));
50+
};
51+
52+
jsInvoker->invokeAsync([speechModule, runOnJS](jsi::Runtime& rt) {
53+
@try {
54+
auto global = rt.global();
55+
56+
auto transcribeFunc = jsi::Function::createFromHostFunction(
57+
rt,
58+
jsi::PropNameID::forAscii(rt, "transcribe"),
59+
2,
60+
[speechModule, runOnJS](jsi::Runtime& rt, const jsi::Value& thisVal, const jsi::Value* args, size_t count) -> jsi::Value {
61+
auto arrayBuffer = args[0].asObject(rt);
62+
if (!arrayBuffer.isArrayBuffer(rt)) {
63+
throw jsi::JSError(rt, "First argument must be an ArrayBuffer");
64+
}
65+
66+
auto buffer = arrayBuffer.getArrayBuffer(rt);
67+
NSData *audioData = [NSData dataWithBytes:buffer.data(rt) length:buffer.size(rt)];
68+
69+
auto languageStr = args[1].asString(rt).utf8(rt);
70+
NSString *language = [NSString stringWithUTF8String:languageStr.c_str()];
71+
72+
auto Promise = rt.global().getPropertyAsFunction(rt, "Promise");
73+
74+
return Promise.callAsConstructor(rt, jsi::Function::createFromHostFunction(
75+
rt,
76+
jsi::PropNameID::forAscii(rt, "executor"),
77+
2,
78+
[speechModule, audioData, language, runOnJS](jsi::Runtime& rt, const jsi::Value& thisVal, const jsi::Value* args, size_t count) -> jsi::Value {
79+
auto resolve = std::make_shared<jsi::Function>(args[0].asObject(rt).asFunction(rt));
80+
auto reject = std::make_shared<jsi::Function>(args[1].asObject(rt).asFunction(rt));
81+
82+
[speechModule transcribe:audioData
83+
language:language
84+
resolve:^(id result) {
85+
runOnJS([resolve, result, &rt]() {
86+
auto jsResult = react::TurboModuleConvertUtils::convertObjCObjectToJSIValue(rt, result);
87+
resolve->call(rt, jsResult);
88+
});
89+
} reject:^(NSString *code, NSString *message, NSError *error) {
90+
runOnJS([reject, message, &rt]() {
91+
auto jsError = jsi::String::createFromUtf8(rt, [message UTF8String]);
92+
reject->call(rt, jsError);
93+
});
94+
}];
95+
96+
return jsi::Value::undefined();
97+
}
98+
));
99+
}
100+
);
101+
102+
global.setProperty(rt, "__apple__llm__transcribe__", transcribeFunc);
103+
} @catch (NSException *exception) {
104+
throw jsi::JSError(rt, [[NSString stringWithFormat:@"Failed to install transcribe handler: %@", exception.reason] UTF8String]);
105+
}
106+
});
107+
}
108+
109+
- (std::shared_ptr<react::TurboModule>)getTurboModule:(const react::ObjCTurboModule::InitParams &)params {
110+
[self installTranscribeFunc:params.jsInvoker];
111+
return std::make_shared<react::NativeAppleSpeechSpecJSI>(params);
112+
}
113+
114+
- (nonnull NSNumber *)isAvailable:(nonnull NSString *)language {
115+
return @([_speech isAvailable:language]);
116+
}
117+
118+
- (void)prepare:(nonnull NSString *)language resolve:(nonnull RCTPromiseResolveBlock)resolve reject:(nonnull RCTPromiseRejectBlock)reject {
119+
[_speech prepare:language resolve:resolve reject:reject];
120+
}
121+
122+
@end

0 commit comments

Comments
 (0)