2222 python beta_snippets.py metadata resources/commercial_mono.wav
2323 python beta_snippets.py punctuation resources/commercial_mono.wav
2424 python beta_snippets.py diarization resources/commercial_mono.wav
25+ python beta_snippets.py multi-channel resources/commercial_mono.wav
2526"""
2627
2728import argparse
2829import io
2930
30- from google .cloud import speech_v1p1beta1 as speech
3131
32-
33- # [START speech_transcribe_file_with_enhanced_model]
34- def transcribe_file_with_enhanced_model (path ):
32+ def transcribe_file_with_enhanced_model (speech_file ):
3533 """Transcribe the given audio file using an enhanced model."""
34+ # [START speech_transcribe_file_with_enhanced_model]
35+ from google .cloud import speech_v1p1beta1 as speech
3636 client = speech .SpeechClient ()
3737
38- with io .open (path , 'rb' ) as audio_file :
38+ # TODO(developer): Uncomment and set to a path to your audio file.
39+ # speech_file = 'path/to/file.wav'
40+
41+ with io .open (speech_file , 'rb' ) as audio_file :
3942 content = audio_file .read ()
4043
4144 audio = speech .types .RecognitionAudio (content = content )
@@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path):
5659 print ('-' * 20 )
5760 print ('First alternative of result {}' .format (i ))
5861 print ('Transcript: {}' .format (alternative .transcript ))
59- # [END speech_transcribe_file_with_enhanced_model]
62+ # [END speech_transcribe_file_with_enhanced_model]
6063
6164
62- # [START speech_transcribe_file_with_metadata]
63- def transcribe_file_with_metadata (path ):
65+ def transcribe_file_with_metadata (speech_file ):
6466 """Send a request that includes recognition metadata."""
67+ # [START speech_transcribe_file_with_metadata]
68+ from google .cloud import speech_v1p1beta1 as speech
6569 client = speech .SpeechClient ()
6670
67- with io .open (path , 'rb' ) as audio_file :
71+ # TODO(developer): Uncomment and set to a path to your audio file.
72+ # speech_file = 'path/to/file.wav'
73+
74+ with io .open (speech_file , 'rb' ) as audio_file :
6875 content = audio_file .read ()
6976
7077 # Here we construct a recognition metadata object.
@@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path):
98105 print ('-' * 20 )
99106 print ('First alternative of result {}' .format (i ))
100107 print ('Transcript: {}' .format (alternative .transcript ))
101- # [END speech_transcribe_file_with_metadata]
108+ # [END speech_transcribe_file_with_metadata]
102109
103110
104- # [START speech_transcribe_file_with_auto_punctuation]
105- def transcribe_file_with_auto_punctuation (path ):
111+ def transcribe_file_with_auto_punctuation (speech_file ):
106112 """Transcribe the given audio file with auto punctuation enabled."""
113+ # [START speech_transcribe_file_with_auto_punctuation]
114+ from google .cloud import speech_v1p1beta1 as speech
107115 client = speech .SpeechClient ()
108116
109- with io .open (path , 'rb' ) as audio_file :
117+ # TODO(developer): Uncomment and set to a path to your audio file.
118+ # speech_file = 'path/to/file.wav'
119+
120+ with io .open (speech_file , 'rb' ) as audio_file :
110121 content = audio_file .read ()
111122
112123 audio = speech .types .RecognitionAudio (content = content )
@@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path):
124135 print ('-' * 20 )
125136 print ('First alternative of result {}' .format (i ))
126137 print ('Transcript: {}' .format (alternative .transcript ))
127- # [END speech_transcribe_file_with_auto_punctuation]
138+ # [END speech_transcribe_file_with_auto_punctuation]
128139
129140
130- # [START speech_transcribe_diarization]
131- def transcribe_file_with_diarization (path ):
141+ def transcribe_file_with_diarization (speech_file ):
132142 """Transcribe the given audio file synchronously with diarization."""
143+ # [START speech_transcribe_diarization]
144+ from google .cloud import speech_v1p1beta1 as speech
133145 client = speech .SpeechClient ()
134146
135- with open (path , 'rb' ) as audio_file :
147+ # TODO(developer): Uncomment and set to a path to your audio file.
148+ # speech_file = 'path/to/file.wav'
149+
150+ with open (speech_file , 'rb' ) as audio_file :
136151 content = audio_file .read ()
137152
138153 audio = speech .types .RecognitionAudio (content = content )
@@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path):
154169 .format (i , alternative .transcript ))
155170 print ('Speaker Tag for the first word: {}'
156171 .format (alternative .words [0 ].speaker_tag ))
157- # [END speech_transcribe_diarization]
172+ # [END speech_transcribe_diarization]
173+
174+
175+ def transcribe_file_with_multichannel (speech_file ):
176+ """Transcribe the given audio file synchronously with
177+ multi channel."""
178+ # [START speech_transcribe_multichannel]
179+ from google .cloud import speech_v1p1beta1 as speech
180+ client = speech .SpeechClient ()
181+
182+ # TODO(developer): Uncomment and set to a path to your audio file.
183+ # speech_file = 'path/to/file.wav'
184+
185+ with open (speech_file , 'rb' ) as audio_file :
186+ content = audio_file .read ()
187+
188+ audio = speech .types .RecognitionAudio (content = content )
189+
190+ config = speech .types .RecognitionConfig (
191+ encoding = speech .enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
192+ sample_rate_hertz = 16000 ,
193+ language_code = 'en-US' ,
194+ audio_channel_count = 1 ,
195+ enable_separate_recognition_per_channel = True )
196+
197+ response = client .recognize (config , audio )
198+
199+ for i , result in enumerate (response .results ):
200+ alternative = result .alternatives [0 ]
201+ print ('-' * 20 )
202+ print ('First alternative of result {}' .format (i ))
203+ print (u'Transcript: {}' .format (alternative .transcript ))
204+ print (u'Channel Tag: {}' .format (result .channel_tag ))
205+ # [END speech_transcribe_multichannel]
158206
159207
160208if __name__ == '__main__' :
@@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path):
175223 transcribe_file_with_auto_punctuation (args .path )
176224 elif args .command == 'diarization' :
177225 transcribe_file_with_diarization (args .path )
226+ elif args .command == 'multi-channel' :
227+ transcribe_file_with_multichannel (args .path )
0 commit comments