1818and recognition metadata.
1919
2020Example usage:
21- python beta_snippets.py enhanced-model resources/commercial_mono.wav
22- python beta_snippets.py metadata resources/commercial_mono.wav
23- python beta_snippets.py punctuation resources/commercial_mono.wav
24- python beta_snippets.py diarization resources/commercial_mono.wav
25- python beta_snippets.py multi-channel resources/commercial_mono.wav
26- python beta_snippets.py multi-language resources/multi.wav en-US es
27- python beta_snippets.py word-level-conf resources/commercial_mono.wav
21+ python beta_snippets.py enhanced-model
22+ python beta_snippets.py metadata
23+ python beta_snippets.py punctuation
24+ python beta_snippets.py diarization
25+ python beta_snippets.py multi-channel
26+ python beta_snippets.py multi-language
27+ python beta_snippets.py word-level-conf
2828"""
2929
3030import argparse
3131import io
3232
3333
34- def transcribe_file_with_enhanced_model (speech_file ):
34+ def transcribe_file_with_enhanced_model ():
3535 """Transcribe the given audio file using an enhanced model."""
3636 # [START speech_transcribe_file_with_enhanced_model]
3737 from google .cloud import speech_v1p1beta1 as speech
3838 client = speech .SpeechClient ()
3939
40- # TODO(developer): Uncomment and set to a path to your audio file.
41- # speech_file = 'path/to/file.wav'
40+ speech_file = 'resources/commercial_mono.wav'
4241
4342 with io .open (speech_file , 'rb' ) as audio_file :
4443 content = audio_file .read ()
@@ -64,14 +63,13 @@ def transcribe_file_with_enhanced_model(speech_file):
6463 # [END speech_transcribe_file_with_enhanced_model]
6564
6665
67- def transcribe_file_with_metadata (speech_file ):
66+ def transcribe_file_with_metadata ():
6867 """Send a request that includes recognition metadata."""
6968 # [START speech_transcribe_file_with_metadata]
7069 from google .cloud import speech_v1p1beta1 as speech
7170 client = speech .SpeechClient ()
7271
73- # TODO(developer): Uncomment and set to a path to your audio file.
74- # speech_file = 'path/to/file.wav'
72+ speech_file = 'resources/commercial_mono.wav'
7573
7674 with io .open (speech_file , 'rb' ) as audio_file :
7775 content = audio_file .read ()
@@ -110,14 +108,13 @@ def transcribe_file_with_metadata(speech_file):
110108 # [END speech_transcribe_file_with_metadata]
111109
112110
113- def transcribe_file_with_auto_punctuation (speech_file ):
111+ def transcribe_file_with_auto_punctuation ():
114112 """Transcribe the given audio file with auto punctuation enabled."""
115113 # [START speech_transcribe_file_with_auto_punctuation]
116114 from google .cloud import speech_v1p1beta1 as speech
117115 client = speech .SpeechClient ()
118116
119- # TODO(developer): Uncomment and set to a path to your audio file.
120- # speech_file = 'path/to/file.wav'
117+ speech_file = 'resources/commercial_mono.wav'
121118
122119 with io .open (speech_file , 'rb' ) as audio_file :
123120 content = audio_file .read ()
@@ -140,14 +137,13 @@ def transcribe_file_with_auto_punctuation(speech_file):
140137 # [END speech_transcribe_file_with_auto_punctuation]
141138
142139
143- def transcribe_file_with_diarization (speech_file ):
140+ def transcribe_file_with_diarization ():
144141 """Transcribe the given audio file synchronously with diarization."""
145142 # [START speech_transcribe_diarization]
146143 from google .cloud import speech_v1p1beta1 as speech
147144 client = speech .SpeechClient ()
148145
149- # TODO(developer): Uncomment and set to a path to your audio file.
150- # speech_file = 'path/to/file.wav'
146+ speech_file = 'resources/commercial_mono.wav'
151147
152148 with open (speech_file , 'rb' ) as audio_file :
153149 content = audio_file .read ()
@@ -156,33 +152,37 @@ def transcribe_file_with_diarization(speech_file):
156152
157153 config = speech .types .RecognitionConfig (
158154 encoding = speech .enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
159- sample_rate_hertz = 16000 ,
155+ sample_rate_hertz = 8000 ,
160156 language_code = 'en-US' ,
161157 enable_speaker_diarization = True ,
162158 diarization_speaker_count = 2 )
163159
164160 print ('Waiting for operation to complete...' )
165161 response = client .recognize (config , audio )
166162
167- for i , result in enumerate (response .results ):
168- alternative = result .alternatives [0 ]
169- print ('-' * 20 )
170- print ('First alternative of result {}: {}'
171- .format (i , alternative .transcript ))
172- print ('Speaker Tag for the first word: {}'
173- .format (alternative .words [0 ].speaker_tag ))
163+ # The transcript within each result is separate and sequential per result.
164+ # However, the words list within an alternative includes all the words
165+ # from all the results thus far. Thus, to get all the words with speaker
166+ # tags, you only have to take the words list from the last result:
167+ result = response .results [- 1 ]
168+
169+ words_info = result .alternatives [0 ].words
170+
171+ # Printing out the output:
172+ for word_info in words_info :
173+ print ("word: '{}', speaker_tag: {}" .format (word_info .word ,
174+ word_info .speaker_tag ))
174175 # [END speech_transcribe_diarization]
175176
176177
177- def transcribe_file_with_multichannel (speech_file ):
178+ def transcribe_file_with_multichannel ():
178179 """Transcribe the given audio file synchronously with
179180 multi channel."""
180181 # [START speech_transcribe_multichannel]
181182 from google .cloud import speech_v1p1beta1 as speech
182183 client = speech .SpeechClient ()
183184
184- # TODO(developer): Uncomment and set to a path to your audio file.
185- # speech_file = 'path/to/file.wav'
185+ speech_file = 'resources/Google_Gnome.wav'
186186
187187 with open (speech_file , 'rb' ) as audio_file :
188188 content = audio_file .read ()
@@ -207,17 +207,16 @@ def transcribe_file_with_multichannel(speech_file):
207207 # [END speech_transcribe_multichannel]
208208
209209
210- def transcribe_file_with_multilanguage (speech_file , first_lang , second_lang ):
210+ def transcribe_file_with_multilanguage ():
211211 """Transcribe the given audio file synchronously with
212212 multi language."""
213213 # [START speech_transcribe_multilanguage]
214214 from google .cloud import speech_v1p1beta1 as speech
215215 client = speech .SpeechClient ()
216216
217- # TODO(developer): Uncomment and set to a path to your audio file.
218- # speech_file = 'path/to/file.wav'
219- # first_lang = first language code, e,g, 'en-US'
220- # second_lang = first language code, e,g, 'es'
217+ speech_file = 'resources/multi.wav'
218+ first_lang = 'en-US'
219+ second_lang = 'es'
221220
222221 with open (speech_file , 'rb' ) as audio_file :
223222 content = audio_file .read ()
@@ -226,6 +225,7 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
226225
227226 config = speech .types .RecognitionConfig (
228227 encoding = speech .enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
228+ sample_rate_hertz = 44100 ,
229229 audio_channel_count = 2 ,
230230 language_code = first_lang ,
231231 alternative_language_codes = [second_lang ])
@@ -241,15 +241,14 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
241241 # [END speech_transcribe_multilanguage]
242242
243243
244- def transcribe_file_with_word_level_confidence (speech_file ):
244+ def transcribe_file_with_word_level_confidence ():
245245 """Transcribe the given audio file synchronously with
246246 word level confidence."""
247247 # [START speech_transcribe_word_level_confidence]
248248 from google .cloud import speech_v1p1beta1 as speech
249249 client = speech .SpeechClient ()
250250
251- # TODO(developer): Uncomment and set to a path to your audio file.
252- # speech_file = 'path/to/file.wav'
251+ speech_file = 'resources/Google_Gnome.wav'
253252
254253 with open (speech_file , 'rb' ) as audio_file :
255254 content = audio_file .read ()
@@ -279,28 +278,20 @@ def transcribe_file_with_word_level_confidence(speech_file):
279278 description = __doc__ ,
280279 formatter_class = argparse .RawDescriptionHelpFormatter )
281280 parser .add_argument ('command' )
282- parser .add_argument (
283- 'path' , help = 'File for audio file to be recognized' )
284- parser .add_argument (
285- 'first' , help = 'First language in audio file to be recognized' ,
286- nargs = '?' )
287- parser .add_argument (
288- 'second' , help = 'Second language in audio file to be recognized' ,
289- nargs = '?' )
290281
291282 args = parser .parse_args ()
292283
293284 if args .command == 'enhanced-model' :
294- transcribe_file_with_enhanced_model (args . path )
285+ transcribe_file_with_enhanced_model ()
295286 elif args .command == 'metadata' :
296- transcribe_file_with_metadata (args . path )
287+ transcribe_file_with_metadata ()
297288 elif args .command == 'punctuation' :
298- transcribe_file_with_auto_punctuation (args . path )
289+ transcribe_file_with_auto_punctuation ()
299290 elif args .command == 'diarization' :
300- transcribe_file_with_diarization (args . path )
291+ transcribe_file_with_diarization ()
301292 elif args .command == 'multi-channel' :
302- transcribe_file_with_multichannel (args . path )
293+ transcribe_file_with_multichannel ()
303294 elif args .command == 'multi-language' :
304- transcribe_file_with_multilanguage (args . path , args . first , args . second )
295+ transcribe_file_with_multilanguage ()
305296 elif args .command == 'word-level-conf' :
306- transcribe_file_with_word_level_confidence (args . path )
297+ transcribe_file_with_word_level_confidence ()
0 commit comments