Skip to content

Commit bafed15

Browse files
authored
Rename and move _JSONSpeechAPI to HTTPSpeechAPI. (#2979)
* Rename and move _JSONSpeechAPI to HTTPSpeechAPI.
1 parent fca6333 commit bafed15

File tree

3 files changed

+214
-195
lines changed

3 files changed

+214
-195
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""HTTP module for managing Speech API requests."""
16+
17+
from base64 import b64encode
18+
19+
from google.cloud._helpers import _bytes_to_unicode
20+
from google.cloud._helpers import _to_bytes
21+
22+
from google.cloud.speech.result import Result
23+
from google.cloud.speech.operation import Operation
24+
25+
26+
class HTTPSpeechAPI(object):
27+
"""Speech API for interacting with the HTTP version of the API.
28+
29+
:type client: :class:`google.cloud.core.client.Client`
30+
:param client: Instance of a ``Client`` object.
31+
"""
32+
def __init__(self, client):
33+
self._client = client
34+
self._connection = client._connection
35+
36+
def async_recognize(self, sample, language_code=None,
37+
max_alternatives=None, profanity_filter=None,
38+
speech_context=None):
39+
"""Asychronous Recognize request to Google Speech API.
40+
41+
.. _async_recognize: https://cloud.google.com/speech/reference/\
42+
rest/v1beta1/speech/asyncrecognize
43+
44+
See `async_recognize`_.
45+
46+
:type sample: :class:`~google.cloud.speech.sample.Sample`
47+
:param sample: Instance of ``Sample`` containing audio information.
48+
49+
:type language_code: str
50+
:param language_code: (Optional) The language of the supplied audio as
51+
BCP-47 language tag. Example: ``'en-GB'``.
52+
If omitted, defaults to ``'en-US'``.
53+
54+
:type max_alternatives: int
55+
:param max_alternatives: (Optional) Maximum number of recognition
56+
hypotheses to be returned. The server may
57+
return fewer than maxAlternatives.
58+
Valid values are 0-30. A value of 0 or 1
59+
will return a maximum of 1. Defaults to 1
60+
61+
:type profanity_filter: bool
62+
:param profanity_filter: If True, the server will attempt to filter
63+
out profanities, replacing all but the
64+
initial character in each filtered word with
65+
asterisks, e.g. ``'f***'``. If False or
66+
omitted, profanities won't be filtered out.
67+
68+
:type speech_context: list
69+
:param speech_context: A list of strings (max 50) containing words and
70+
phrases "hints" so that the speech recognition
71+
is more likely to recognize them. This can be
72+
used to improve the accuracy for specific words
73+
and phrases. This can also be used to add new
74+
words to the vocabulary of the recognizer.
75+
76+
:rtype: :class:`~google.cloud.speech.operation.Operation`
77+
:returns: Operation for asynchronous request to Google Speech API.
78+
"""
79+
data = _build_request_data(sample, language_code, max_alternatives,
80+
profanity_filter, speech_context)
81+
api_response = self._connection.api_request(
82+
method='POST', path='speech:asyncrecognize', data=data)
83+
84+
operation = Operation.from_dict(api_response, self._client)
85+
operation.caller_metadata['request_type'] = 'AsyncRecognize'
86+
return operation
87+
88+
def sync_recognize(self, sample, language_code=None, max_alternatives=None,
89+
profanity_filter=None, speech_context=None):
90+
"""Synchronous Speech Recognition.
91+
92+
.. _sync_recognize: https://cloud.google.com/speech/reference/\
93+
rest/v1beta1/speech/syncrecognize
94+
95+
See `sync_recognize`_.
96+
97+
:type sample: :class:`~google.cloud.speech.sample.Sample`
98+
:param sample: Instance of ``Sample`` containing audio information.
99+
100+
:type language_code: str
101+
:param language_code: (Optional) The language of the supplied audio as
102+
BCP-47 language tag. Example: ``'en-GB'``.
103+
If omitted, defaults to ``'en-US'``.
104+
105+
:type max_alternatives: int
106+
:param max_alternatives: (Optional) Maximum number of recognition
107+
hypotheses to be returned. The server may
108+
return fewer than maxAlternatives.
109+
Valid values are 0-30. A value of 0 or 1
110+
will return a maximum of 1. Defaults to 1
111+
112+
:type profanity_filter: bool
113+
:param profanity_filter: If True, the server will attempt to filter
114+
out profanities, replacing all but the
115+
initial character in each filtered word with
116+
asterisks, e.g. ``'f***'``. If False or
117+
omitted, profanities won't be filtered out.
118+
119+
:type speech_context: list
120+
:param speech_context: A list of strings (max 50) containing words and
121+
phrases "hints" so that the speech recognition
122+
is more likely to recognize them. This can be
123+
used to improve the accuracy for specific words
124+
and phrases. This can also be used to add new
125+
words to the vocabulary of the recognizer.
126+
127+
:rtype: list
128+
:returns: A list of dictionaries. One dict for each alternative. Each
129+
dictionary typically contains two keys (though not
130+
all will be present in all cases)
131+
132+
* ``transcript``: The detected text from the audio recording.
133+
* ``confidence``: The confidence in language detection, float
134+
between 0 and 1.
135+
136+
:raises: ValueError if more than one result is returned or no results.
137+
"""
138+
data = _build_request_data(sample, language_code, max_alternatives,
139+
profanity_filter, speech_context)
140+
api_response = self._connection.api_request(
141+
method='POST', path='speech:syncrecognize', data=data)
142+
143+
if len(api_response['results']) > 0:
144+
results = api_response['results']
145+
return [Result.from_api_repr(result) for result in results]
146+
else:
147+
raise ValueError('No results were returned from the API')
148+
149+
150+
def _build_request_data(sample, language_code=None, max_alternatives=None,
151+
profanity_filter=None, speech_context=None):
152+
"""Builds the request data before making API request.
153+
154+
:type sample: :class:`~google.cloud.speech.sample.Sample`
155+
:param sample: Instance of ``Sample`` containing audio information.
156+
157+
:type language_code: str
158+
:param language_code: (Optional) The language of the supplied audio as
159+
BCP-47 language tag. Example: ``'en-GB'``.
160+
If omitted, defaults to ``'en-US'``.
161+
162+
:type max_alternatives: int
163+
:param max_alternatives: (Optional) Maximum number of recognition
164+
hypotheses to be returned. The server may
165+
return fewer than maxAlternatives.
166+
Valid values are 0-30. A value of 0 or 1
167+
will return a maximum of 1. Defaults to 1
168+
169+
:type profanity_filter: bool
170+
:param profanity_filter: If True, the server will attempt to filter
171+
out profanities, replacing all but the
172+
initial character in each filtered word with
173+
asterisks, e.g. ``'f***'``. If False or
174+
omitted, profanities won't be filtered out.
175+
176+
:type speech_context: list
177+
:param speech_context: A list of strings (max 50) containing words and
178+
phrases "hints" so that the speech recognition
179+
is more likely to recognize them. This can be
180+
used to improve the accuracy for specific words
181+
and phrases. This can also be used to add new
182+
words to the vocabulary of the recognizer.
183+
184+
:rtype: dict
185+
:returns: Dictionary with required data for Google Speech API.
186+
"""
187+
if sample.content is not None:
188+
audio = {'content':
189+
_bytes_to_unicode(b64encode(_to_bytes(sample.content)))}
190+
else:
191+
audio = {'uri': sample.source_uri}
192+
193+
config = {'encoding': sample.encoding,
194+
'sampleRate': sample.sample_rate}
195+
196+
if language_code is not None:
197+
config['languageCode'] = language_code
198+
if max_alternatives is not None:
199+
config['maxAlternatives'] = max_alternatives
200+
if profanity_filter is not None:
201+
config['profanityFilter'] = profanity_filter
202+
if speech_context is not None:
203+
config['speechContext'] = {'phrases': speech_context}
204+
205+
data = {
206+
'audio': audio,
207+
'config': config,
208+
}
209+
210+
return data

0 commit comments

Comments
 (0)