Skip to content

Commit

Permalink
Merge pull request #157 from mihirshahxenlabs/main
Browse files Browse the repository at this point in the history
[Audio] createTranscription Added `timestamp_granularities[]` request parameter and updated response object
  • Loading branch information
anasfik authored Feb 21, 2024
2 parents e4b3eb5 + 8416206 commit 878b602
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 3 deletions.
1 change: 1 addition & 0 deletions lib/src/core/base/audio/interfaces.dart
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ abstract class CreateInterface {
OpenAIAudioResponseFormat? responseFormat,
double? temperature,
String? language,
List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
});

Future<OpenAIAudioModel> createTranslation({
Expand Down
2 changes: 2 additions & 0 deletions lib/src/core/enum.dart
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ enum OpenAIImageQuality { hd }

enum OpenAIImageResponseFormat { url, b64Json }

enum OpenAIAudioTimestampGranularity { word, segment }

enum OpenAIAudioResponseFormat { json, text, srt, verbose_json, vtt }

enum OpenAIAudioSpeechResponseFormat { mp3, opus, aac, flac }
Expand Down
153 changes: 151 additions & 2 deletions lib/src/core/models/audio/audio.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,39 @@ final class OpenAIAudioModel {
/// The text response from the audio requests.
/// This is the only field that is returned from the API.
final String text;
final String? task;
final String? language;
final double? duration;

final List<Word>? words;
final List<Segment>? segments;

@override
int get hashCode => text.hashCode;

/// {@macro openai_audio_model}
const OpenAIAudioModel({
required this.text,
this.task,
this.language,
this.duration,
this.words,
this.segments,
});

/// This is used to convert a [Map<String, dynamic>] object to a [OpenAIAudioModel] object.
factory OpenAIAudioModel.fromMap(Map<String, dynamic> json) {
return OpenAIAudioModel(
text: json['text'],
task: json['task'],
language: json['language'],
duration: json['duration'],
words: json['words'] != null
? List<Word>.from(json['words'].map((x) => Word.fromMap(x)))
: null,
segments: json['segments'] != null
? List<Segment>.from(json['segments'].map((x) => Segment.fromMap(x)))
: null,
);
}

Expand All @@ -30,18 +50,147 @@ final class OpenAIAudioModel {
Map<String, dynamic> toMap() {
return {
'text': text,
if (task != null) 'task': task,
if (language != null) 'language': language,
if (duration != null) 'duration': duration,
if (words != null) 'words': words,
if (segments != null) 'segments': segments,
};
}

@override
String toString() {
return 'OpenAIAudioModel(text: $text)';
return 'OpenAIAudioModel(text: $text, task: $task, language: $language, duration: $duration, words: $words, segments: $segments)';
}

@override
bool operator ==(Object other) {
if (identical(this, other)) return true;

return other is OpenAIAudioModel &&
other.text == text &&
other.task == task &&
other.language == language &&
other.duration == duration &&
other.words == words &&
other.segments == segments;
}
}

final class Word {
final String word;
final double start;
final double end;

const Word({
required this.word,
required this.start,
required this.end,
});

factory Word.fromMap(Map<String, dynamic> json) {
return Word(
word: json['word'],
start: json['start'],
end: json['end'],
);
}

Map<String, dynamic> toMap() {
return {
'word': word,
'start': start,
'end': end,
};
}

@override
String toString() => 'Word(word: $word, start: $start, end: $end)';

@override
bool operator ==(Object other) {
if (identical(this, other)) return true;

return other is Word &&
other.word == word &&
other.start == start &&
other.end == end;
}
}

final class Segment {
final int id;
final int seek;
final double start;
final double end;
final String text;
final List<int> tokens;
final double temperature;
final double avg_logprob;
final double compression_ratio;
final double no_speech_prob;

const Segment({
required this.id,
required this.seek,
required this.start,
required this.end,
required this.text,
required this.tokens,
required this.temperature,
required this.avg_logprob,
required this.compression_ratio,
required this.no_speech_prob,
});

factory Segment.fromMap(Map<String, dynamic> json) {
return Segment(
id: json['id'],
seek: json['seek'],
start: json['start'],
end: json['end'],
text: json['text'],
tokens: List<int>.from(json['tokens']),
temperature: json['temperature'],
avg_logprob: json['avg_logprob'],
compression_ratio: json['compression_ratio'],
no_speech_prob: json['no_speech_prob'],
);
}

Map<String, dynamic> toMap() {
return {
'id': id,
'seek': seek,
'start': start,
'end': end,
'text': text,
'tokens': tokens,
'temperature': temperature,
'avg_logprob': avg_logprob,
'compression_ratio': compression_ratio,
'no_speech_prob': no_speech_prob,
};
}

@override
String toString() =>
'Segment(id: $id, seek: $seek, start: $start, end: $end, text: $text, tokens: $tokens, temperature: $temperature, avg_logprob: $avg_logprob, compression_ratio: $compression_ratio, no_speech_prob: $no_speech_prob)';

@override
bool operator ==(Object other) {
if (identical(this, other)) return true;

return other is OpenAIAudioModel && other.text == text;
return other is Segment &&
other.id == id &&
other.seek == seek &&
other.start == start &&
other.end == end &&
other.text == text &&
other.tokens == tokens &&
other.temperature == temperature &&
other.avg_logprob == avg_logprob &&
other.compression_ratio == compression_ratio &&
other.no_speech_prob == no_speech_prob;
}
}
6 changes: 6 additions & 0 deletions lib/src/instance/audio/audio.dart
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ interface class OpenAIAudio implements OpenAIAudioBase {
///
/// [language] is the language of the input audio. Supplying the input language in **ISO-639-1** format will improve accuracy and latency.
///
/// [timestamp_granularities] The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities. Either: word or segment, both doesnt work.
///
/// Example:
/// ```dart
/// final transcription = await openai.audio.createTranscription(
Expand All @@ -52,6 +54,7 @@ interface class OpenAIAudio implements OpenAIAudioBase {
OpenAIAudioResponseFormat? responseFormat,
double? temperature,
String? language,
List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
}) async {
return await OpenAINetworkingClient.fileUpload(
file: file,
Expand All @@ -62,6 +65,9 @@ interface class OpenAIAudio implements OpenAIAudioBase {
if (responseFormat != null) "response_format": responseFormat.name,
if (temperature != null) "temperature": temperature.toString(),
if (language != null) "language": language,
if (timestamp_granularities != null)
"timestamp_granularities[]":
timestamp_granularities.map((e) => e.name).join(","),
},
onSuccess: (Map<String, dynamic> response) {
return OpenAIAudioModel.fromMap(response);
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: dart_openai
description: Dart SDK for openAI Apis (GPT-3 & DALL-E), integrate easily the power of OpenAI's state-of-the-art AI models into their Dart applications.
version: 5.0.0
version: 5.0.1
homepage: https://github.com/anasfik/openai
repository: https://github.com/anasfik/openai
documentation: https://github.com/anasfik/openai/blob/main/README.md
Expand Down
12 changes: 12 additions & 0 deletions test/openai_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,21 @@ void main() async {
model: "whisper-1",
responseFormat: OpenAIAudioResponseFormat.json,
);
expect(transcription, isA<OpenAIAudioModel>());
expect(transcription.text, isA<String>());
});

test("create transcription with timestamp granularity", () async {
final transcription = await OpenAI.instance.audio.createTranscription(
file: audioExampleFile,
model: "whisper-1",
responseFormat: OpenAIAudioResponseFormat.verbose_json,
timestamp_granularities: [OpenAIAudioTimestampGranularity.word],
);

expect(transcription, isA<OpenAIAudioModel>());
expect(transcription.text, isA<String>());
expect(transcription.words, isA<List>());
});
test("create translation", () async {
final audioExampleFile = await getFileFromUrl(
Expand Down

0 comments on commit 878b602

Please sign in to comment.