14
14
15
15
"""GAX/GAPIC module for managing Speech API requests."""
16
16
17
-
18
- from google .cloud .gapic .speech .v1beta1 .speech_client import SpeechClient
19
- from google .cloud .proto .speech .v1beta1 .cloud_speech_pb2 import RecognitionAudio
20
- from google .cloud .proto .speech .v1beta1 .cloud_speech_pb2 import (
17
+ from google .cloud .gapic .speech .v1 .speech_client import SpeechClient
18
+ from google .cloud .proto .speech .v1 .cloud_speech_pb2 import RecognitionAudio
19
+ from google .cloud .proto .speech .v1 .cloud_speech_pb2 import (
21
20
RecognitionConfig )
22
- from google .cloud .proto .speech .v1beta1 .cloud_speech_pb2 import (
21
+ from google .cloud .proto .speech .v1 .cloud_speech_pb2 import (
23
22
SpeechContext )
24
- from google .cloud .proto .speech .v1beta1 .cloud_speech_pb2 import (
23
+ from google .cloud .proto .speech .v1 .cloud_speech_pb2 import (
25
24
StreamingRecognitionConfig )
26
- from google .cloud .proto .speech .v1beta1 .cloud_speech_pb2 import (
25
+ from google .cloud .proto .speech .v1 .cloud_speech_pb2 import (
27
26
StreamingRecognizeRequest )
28
27
from google .longrunning import operations_grpc
29
28
@@ -62,23 +61,22 @@ def __init__(self, client=None):
62
61
OPERATIONS_API_HOST ,
63
62
)
64
63
65
- def async_recognize (self , sample , language_code = None ,
66
- max_alternatives = None , profanity_filter = None ,
67
- speech_context = None ):
68
- """Asychronous Recognize request to Google Speech API.
64
+ def long_running_recognize (self , sample , language_code ,
65
+ max_alternatives = None , profanity_filter = None ,
66
+ speech_contexts = () ):
67
+ """Long-running Recognize request to Google Speech API.
69
68
70
- .. _async_recognize : https://cloud.google.com/speech/reference/\
71
- rest/v1beta1 /speech/asyncrecognize
69
+ .. _long_running_recognize : https://cloud.google.com/speech/reference/\
70
+ rest/v1 /speech/longrunningrecognize
72
71
73
- See `async_recognize `_.
72
+ See `long_running_recognize `_.
74
73
75
74
:type sample: :class:`~google.cloud.speech.sample.Sample`
76
75
:param sample: Instance of ``Sample`` containing audio information.
77
76
78
77
:type language_code: str
79
- :param language_code: (Optional) The language of the supplied audio as
80
- BCP-47 language tag. Example: ``'en-GB'``.
81
- If omitted, defaults to ``'en-US'``.
78
+ :param language_code: The language of the supplied audio as
79
+ BCP-47 language tag. Example: ``'en-US'``.
82
80
83
81
:type max_alternatives: int
84
82
:param max_alternatives: (Optional) Maximum number of recognition
@@ -94,8 +92,8 @@ def async_recognize(self, sample, language_code=None,
94
92
asterisks, e.g. ``'f***'``. If False or
95
93
omitted, profanities won't be filtered out.
96
94
97
- :type speech_context : list
98
- :param speech_context : A list of strings (max 50) containing words and
95
+ :type speech_contexts : list
96
+ :param speech_contexts : A list of strings (max 50) containing words and
99
97
phrases "hints" so that the speech recognition
100
98
is more likely to recognize them. This can be
101
99
used to improve the accuracy for specific words
@@ -106,21 +104,27 @@ def async_recognize(self, sample, language_code=None,
106
104
:returns: Instance of ``Operation`` to poll for results.
107
105
"""
108
106
config = RecognitionConfig (
109
- encoding = sample .encoding , sample_rate = sample .sample_rate ,
110
- language_code = language_code , max_alternatives = max_alternatives ,
107
+ encoding = sample .encoding ,
108
+ language_code = language_code ,
109
+ max_alternatives = max_alternatives ,
111
110
profanity_filter = profanity_filter ,
112
- speech_context = SpeechContext (phrases = speech_context ))
111
+ sample_rate_hertz = sample .sample_rate_hertz ,
112
+ speech_contexts = [SpeechContext (phrases = speech_contexts )],
113
+ )
113
114
114
115
audio = RecognitionAudio (content = sample .content ,
115
116
uri = sample .source_uri )
116
117
api = self ._gapic_api
117
- operation_future = api .async_recognize (config = config , audio = audio )
118
+ operation_future = api .long_running_recognize (
119
+ audio = audio ,
120
+ config = config ,
121
+ )
118
122
119
123
return Operation .from_pb (operation_future .last_operation_data (), self )
120
124
121
- def streaming_recognize (self , sample , language_code = None ,
125
+ def streaming_recognize (self , sample , language_code ,
122
126
max_alternatives = None , profanity_filter = None ,
123
- speech_context = None , single_utterance = False ,
127
+ speech_contexts = () , single_utterance = False ,
124
128
interim_results = False ):
125
129
"""Streaming speech recognition.
126
130
@@ -136,9 +140,8 @@ def streaming_recognize(self, sample, language_code=None,
136
140
:param sample: Instance of ``Sample`` containing audio information.
137
141
138
142
:type language_code: str
139
- :param language_code: (Optional) The language of the supplied audio as
140
- BCP-47 language tag. Example: ``'en-GB'``.
141
- If omitted, defaults to ``'en-US'``.
143
+ :param language_code: The language of the supplied audio as
144
+ BCP-47 language tag. Example: ``'en-US'``.
142
145
143
146
:type max_alternatives: int
144
147
:param max_alternatives: (Optional) Maximum number of recognition
@@ -154,8 +157,8 @@ def streaming_recognize(self, sample, language_code=None,
154
157
asterisks, e.g. ``'f***'``. If False or
155
158
omitted, profanities won't be filtered out.
156
159
157
- :type speech_context : list
158
- :param speech_context : A list of strings (max 50) containing words and
160
+ :type speech_contexts : list
161
+ :param speech_contexts : A list of strings (max 50) containing words and
159
162
phrases "hints" so that the speech recognition
160
163
is more likely to recognize them. This can be
161
164
used to improve the accuracy for specific words
@@ -190,7 +193,7 @@ def streaming_recognize(self, sample, language_code=None,
190
193
:raises: :class:`ValueError` if sample.content is not a file-like
191
194
object. :class:`ValueError` if stream has closed.
192
195
193
- :rtype: :class:`~google.cloud.grpc.speech.v1beta1 \
196
+ :rtype: :class:`~google.cloud.grpc.speech.v1 \
194
197
.cloud_speech_pb2.StreamingRecognizeResponse`
195
198
:returns: ``StreamingRecognizeResponse`` instances.
196
199
"""
@@ -200,29 +203,28 @@ def streaming_recognize(self, sample, language_code=None,
200
203
requests = _stream_requests (sample , language_code = language_code ,
201
204
max_alternatives = max_alternatives ,
202
205
profanity_filter = profanity_filter ,
203
- speech_context = speech_context ,
206
+ speech_contexts = speech_contexts ,
204
207
single_utterance = single_utterance ,
205
208
interim_results = interim_results )
206
209
api = self ._gapic_api
207
210
responses = api .streaming_recognize (requests )
208
211
return responses
209
212
210
- def sync_recognize (self , sample , language_code = None , max_alternatives = None ,
211
- profanity_filter = None , speech_context = None ):
213
+ def recognize (self , sample , language_code , max_alternatives = None ,
214
+ profanity_filter = None , speech_contexts = () ):
212
215
"""Synchronous Speech Recognition.
213
216
214
- .. _sync_recognize : https://cloud.google.com/speech/reference/\
215
- rest/v1beta1 /speech/syncrecognize
217
+ .. _recognize : https://cloud.google.com/speech/reference/\
218
+ rest/v1 /speech/recognize
216
219
217
- See `sync_recognize `_.
220
+ See `recognize `_.
218
221
219
222
:type sample: :class:`~google.cloud.speech.sample.Sample`
220
223
:param sample: Instance of ``Sample`` containing audio information.
221
224
222
225
:type language_code: str
223
- :param language_code: (Optional) The language of the supplied audio as
224
- BCP-47 language tag. Example: ``'en-GB'``.
225
- If omitted, defaults to ``'en-US'``.
226
+ :param language_code: The language of the supplied audio as
227
+ BCP-47 language tag. Example: ``'en-US'``.
226
228
227
229
:type max_alternatives: int
228
230
:param max_alternatives: (Optional) Maximum number of recognition
@@ -238,8 +240,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
238
240
asterisks, e.g. ``'f***'``. If False or
239
241
omitted, profanities won't be filtered out.
240
242
241
- :type speech_context : list
242
- :param speech_context : A list of strings (max 50) containing words and
243
+ :type speech_contexts : list
244
+ :param speech_contexts : A list of strings (max 50) containing words and
243
245
phrases "hints" so that the speech recognition
244
246
is more likely to recognize them. This can be
245
247
used to improve the accuracy for specific words
@@ -252,14 +254,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
252
254
:raises: ValueError if there are no results.
253
255
"""
254
256
config = RecognitionConfig (
255
- encoding = sample .encoding , sample_rate = sample .sample_rate ,
256
- language_code = language_code , max_alternatives = max_alternatives ,
257
+ encoding = sample .encoding ,
258
+ language_code = language_code ,
259
+ max_alternatives = max_alternatives ,
257
260
profanity_filter = profanity_filter ,
258
- speech_context = SpeechContext (phrases = speech_context ))
261
+ sample_rate_hertz = sample .sample_rate_hertz ,
262
+ speech_contexts = [SpeechContext (phrases = speech_contexts )],
263
+ )
259
264
audio = RecognitionAudio (content = sample .content ,
260
265
uri = sample .source_uri )
261
266
api = self ._gapic_api
262
- api_response = api .sync_recognize (config = config , audio = audio )
267
+ api_response = api .recognize (config = config , audio = audio )
263
268
264
269
# Sanity check: If we got no results back, raise an error.
265
270
if len (api_response .results ) == 0 :
@@ -269,18 +274,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
269
274
return [Result .from_pb (result ) for result in api_response .results ]
270
275
271
276
272
- def _stream_requests (sample , language_code = None , max_alternatives = None ,
273
- profanity_filter = None , speech_context = None ,
277
+ def _stream_requests (sample , language_code , max_alternatives = None ,
278
+ profanity_filter = None , speech_contexts = () ,
274
279
single_utterance = None , interim_results = None ):
275
280
"""Generate stream of requests from sample.
276
281
277
282
:type sample: :class:`~google.cloud.speech.sample.Sample`
278
283
:param sample: Instance of ``Sample`` containing audio information.
279
284
280
285
:type language_code: str
281
- :param language_code: (Optional) The language of the supplied audio as
282
- BCP-47 language tag. Example: ``'en-GB'``.
283
- If omitted, defaults to ``'en-US'``.
286
+ :param language_code: The language of the supplied audio as
287
+ BCP-47 language tag. Example: ``'en-US'``.
284
288
285
289
:type max_alternatives: int
286
290
:param max_alternatives: (Optional) Maximum number of recognition
@@ -296,13 +300,14 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
296
300
asterisks, e.g. ``'f***'``. If False or
297
301
omitted, profanities won't be filtered out.
298
302
299
- :type speech_context: list
300
- :param speech_context: (Optional) A list of strings (max 50) containing
301
- words and phrases "hints" so that the speech
302
- recognition is more likely to recognize them.
303
- This can be used to improve the accuracy for
304
- specific words and phrases. This can also be used to
305
- add new words to the vocabulary of the recognizer.
303
+ :type speech_contexts: list
304
+ :param speech_contexts: (Optional) A list of strings (max 50) containing
305
+ words and phrases "hints" so that the speech
306
+ recognition is more likely to recognize them.
307
+ This can be used to improve the accuracy for
308
+ specific words and phrases. This can also be used
309
+ to add new words to the vocabulary of the
310
+ recognizer.
306
311
307
312
:type single_utterance: bool
308
313
:param single_utterance: (Optional) If false or omitted, the recognizer
@@ -333,7 +338,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
333
338
config_request = _make_streaming_request (
334
339
sample , language_code = language_code , max_alternatives = max_alternatives ,
335
340
profanity_filter = profanity_filter ,
336
- speech_context = SpeechContext (phrases = speech_context ) ,
341
+ speech_contexts = [ SpeechContext (phrases = speech_contexts )] ,
337
342
single_utterance = single_utterance , interim_results = interim_results )
338
343
339
344
# The config request MUST go first and not contain any audio data.
@@ -348,7 +353,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
348
353
349
354
def _make_streaming_request (sample , language_code ,
350
355
max_alternatives , profanity_filter ,
351
- speech_context , single_utterance ,
356
+ speech_contexts , single_utterance ,
352
357
interim_results ):
353
358
"""Build streaming request.
354
359
@@ -374,8 +379,8 @@ def _make_streaming_request(sample, language_code,
374
379
asterisks, e.g. ``'f***'``. If False or
375
380
omitted, profanities won't be filtered out.
376
381
377
- :type speech_context : list
378
- :param speech_context : A list of strings (max 50) containing words and
382
+ :type speech_contexts : list
383
+ :param speech_contexts : A list of strings (max 50) containing words and
379
384
phrases "hints" so that the speech recognition
380
385
is more likely to recognize them. This can be
381
386
used to improve the accuracy for specific words
@@ -409,13 +414,17 @@ def _make_streaming_request(sample, language_code,
409
414
returned.
410
415
411
416
:rtype:
412
- :class:`~grpc.speech.v1beta1 .cloud_speech_pb2.StreamingRecognizeRequest`
417
+ :class:`~grpc.speech.v1 .cloud_speech_pb2.StreamingRecognizeRequest`
413
418
:returns: Instance of ``StreamingRecognizeRequest``.
414
419
"""
415
420
config = RecognitionConfig (
416
- encoding = sample .encoding , sample_rate = sample .sample_rate ,
417
- language_code = language_code , max_alternatives = max_alternatives ,
418
- profanity_filter = profanity_filter , speech_context = speech_context )
421
+ encoding = sample .encoding ,
422
+ language_code = language_code ,
423
+ max_alternatives = max_alternatives ,
424
+ profanity_filter = profanity_filter ,
425
+ sample_rate_hertz = sample .sample_rate_hertz ,
426
+ speech_contexts = speech_contexts ,
427
+ )
419
428
420
429
streaming_config = StreamingRecognitionConfig (
421
430
config = config , single_utterance = single_utterance ,
0 commit comments