Skip to content

Commit a49643f

Browse files
committed
Make API surface more usable.
1 parent fd07cd7 commit a49643f

File tree

9 files changed

+159
-247
lines changed

9 files changed

+159
-247
lines changed

docs/speech-usage.rst

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -157,24 +157,47 @@ data to possible text alternatives on the fly.
157157

158158
See: https://cloud.google.com/speech/limits#content
159159

160+
.. code-block:: python
161+
160162
>>> import io
161163
>>> from google.cloud import speech
162164
>>> from google.cloud.speech.encoding import Encoding
163165
>>> client = speech.Client()
164166
>>> with io.open('./hello.wav', 'rb') as stream:
165167
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
166168
... sample_rate=16000)
167-
>>> stream_container = client.stream_recognize(sample)
168-
>>> print(stream_container)
169-
<google.cloud.speech.streaming.container.StreamingResponseContainer object at 0x10538ee10>
170-
>>> print(stream_container.responses)
171-
{0: <google.cloud.speech.streaming.response.StreamingSpeechResponse object at 0x10f9ac9d0>}
172-
>>> print(stream_container.responses[0].results[0].alternatives[0].confidence)
173-
0.698092460632
174-
>>> print(stream_container.is_finished)
169+
>>> for response in client.stream_recognize(sample):
170+
... print(response.transcript)
171+
hello
172+
... print(response.is_final)
175173
True
176-
>>> print stream_container.get_full_text()
174+
175+
176+
By setting ``interim_results`` to true, interim results (tentative hypotheses)
177+
may be returned as they become available (these interim results are indicated
178+
with the is_final=false flag). If false or omitted, only is_final=true
179+
result(s) are returned.
180+
181+
.. code-block:: python
182+
183+
>>> import io
184+
>>> from google.cloud import speech
185+
>>> from google.cloud.speech.encoding import Encoding
186+
>>> client = speech.Client()
187+
>>> with io.open('./hello.wav', 'rb') as stream:
188+
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
189+
... sample_rate=16000)
190+
>>> for response in client.stream_recognize(sample,
191+
... interim_results=True):
192+
... print(response.transcript)
193+
hell
194+
... print(response.is_final)
195+
False
196+
... print(response.transcript)
177197
hello
198+
... print(response.is_final)
199+
True
200+
178201
179202
By default the recognizer will perform continuous recognition
180203
(continuing to process audio even if the user pauses speaking) until the client
@@ -195,32 +218,6 @@ See: `Single Utterance`_
195218
>>> print(stream_container.get_full_text())
196219
hello
197220
198-
199-
If ``interim_results`` is set to ``True``, interim results
200-
(tentative hypotheses) may be returned as they become available.
201-
202-
.. code-block:: python
203-
204-
>>> with io.open('./hello_pause_goodbye.wav', 'rb') as stream:
205-
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
206-
... sample_rate=16000)
207-
>>> stream_container = client.stream_recognize(sample,
208-
... interim_results=True)
209-
>>> print(stream_container.get_full_text())
210-
hello
211-
212-
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
213-
... encoding=Encoding.FLAC,
214-
... sample_rate=44100)
215-
>>> results = client.stream_recognize(sample, interim_results=True)
216-
>>> print(stream_container.responses[0].results[0].alternatives[0].transcript)
217-
how
218-
print(stream_container.responses[1].results[0].alternatives[0].transcript)
219-
hello
220-
>>> print(stream_container.responses[1].results[2].is_final)
221-
True
222-
223-
224221
.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
225222
.. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize
226223
.. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize

speech/google/cloud/speech/client.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from google.cloud.speech.operation import Operation
2727
from google.cloud.speech.streaming.request import _make_request_stream
2828
from google.cloud.speech.sample import Sample
29-
from google.cloud.speech.streaming.container import StreamingResponseContainer
29+
from google.cloud.speech.streaming.response import StreamingSpeechResponse
3030

3131
try:
3232
from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
@@ -302,12 +302,9 @@ def stream_recognize(self, sample, language_code=None,
302302
single_utterance=single_utterance,
303303
interim_results=interim_results)
304304

305-
responses = StreamingResponseContainer()
306305
for response in self.speech_api.streaming_recognize(requests):
307-
if response:
308-
responses.add_response(response)
309-
310-
return responses
306+
if hasattr(response, 'results') or interim_results:
307+
yield StreamingSpeechResponse.from_pb(response)
311308

312309
@property
313310
def speech_api(self):

speech/google/cloud/speech/streaming/container.py

Lines changed: 0 additions & 72 deletions
This file was deleted.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
class EndpointerType(object):
2+
ENDPOINTER_EVENT_UNSPECIFIED = 0
3+
START_OF_SPEECH = 1
4+
END_OF_SPEECH = 2
5+
END_OF_AUDIO = 3
6+
END_OF_UTTERANCE = 4
7+
8+
reverse_map = {
9+
0: 'ENDPOINTER_EVENT_UNSPECIFIED',
10+
1: 'START_OF_SPEECH',
11+
2: 'END_OF_SPEECH',
12+
3: 'END_OF_AUDIO',
13+
4: 'END_OF_UTTERANCE'
14+
}

speech/google/cloud/speech/streaming/response.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
"""Representation of a GAPIC Speech API response."""
1616

17+
from google.cloud.speech.streaming.endpointer_type import EndpointerType
1718
from google.cloud.speech.streaming.result import StreamingSpeechResult
1819

1920

@@ -34,9 +35,12 @@ class StreamingSpeechResponse(object):
3435
:param result_index: Index for specific result set. Used for updating with
3536
``interim_results``.
3637
"""
37-
def __init__(self, error, endpointer_type, results, result_index):
38+
def __init__(self, error=None, endpointer_type=None, results=None,
39+
result_index=None):
40+
results = results or []
3841
self._error = error
39-
self._endpointer_type = endpointer_type # Should be enum.
42+
self._endpointer_type = EndpointerType.reverse_map.get(
43+
endpointer_type, None)
4044
self._result_index = result_index
4145
self._results = [StreamingSpeechResult.from_pb(result)
4246
for result in results]
@@ -56,7 +60,41 @@ def from_pb(cls, pb_response):
5660
endpointer_type = pb_response.endpointer_type
5761
results = pb_response.results
5862
result_index = pb_response.result_index
59-
return cls(error, endpointer_type, results, result_index)
63+
return cls(error=error, endpointer_type=endpointer_type,
64+
results=results, result_index=result_index)
65+
66+
@property
67+
def confidence(self):
68+
"""Confidence score for recognized speech.
69+
70+
:rtype: float
71+
:returns: Confidence score of recognized speech [0.0-1.0].
72+
"""
73+
if self.results and self.results[0].alternatives:
74+
return self.results[0].alternatives[0].confidence
75+
else:
76+
return 0.0
77+
78+
@property
79+
def endpointer_type(self):
80+
"""Endpointer indicating the state of the speech detection.
81+
82+
:rtype: str
83+
:returns: String derived from :class:`~endpointer_type.EndpointerType`.
84+
"""
85+
return self._endpointer_type
86+
87+
@property
88+
def is_final(self):
89+
"""Represents an interim result that may change.
90+
91+
:rtype: bool
92+
:returns: True if the result has completed it's processing.
93+
"""
94+
if len(self.results):
95+
return self.results[0].is_final
96+
else:
97+
return False
6098

6199
@property
62100
def result_index(self):
@@ -75,3 +113,15 @@ def results(self):
75113
:returns: List of ``StreamingSpeechResult`` in this response.
76114
"""
77115
return self._results
116+
117+
@property
118+
def transcript(self):
119+
"""Get most likely transcript from response.
120+
121+
:rtype: str
122+
:returns: Transcript text from response.
123+
"""
124+
if self.results and self.results[0].alternatives:
125+
return self.results[0].alternatives[0].transcript
126+
else:
127+
return ''

speech/google/cloud/speech/streaming/result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ def is_final(self):
7070
:rtype: bool
7171
:returns: True if the result has completed it's processing.
7272
"""
73-
return self._is_final
73+
return bool(self._is_final)

0 commit comments

Comments
 (0)