Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stability information to streaming results. #2714

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
Client <speech-client>
speech-encoding
speech-operation
speech-result
speech-sample
speech-alternative

Expand Down
7 changes: 7 additions & 0 deletions docs/speech-result.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Speech Result
=============

.. automodule:: google.cloud.speech.result
:members:
:undoc-members:
:show-inheritance:
28 changes: 18 additions & 10 deletions docs/speech-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly.
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... alternatives = list(client.streaming_recognize(sample))
>>> print(alternatives[0].transcript)
... results = list(client.streaming_recognize(sample))
>>> print(results[0].alternatives[0].transcript)
'hello'
>>> print(alternatives[0].confidence)
>>> print(results[0].alternatives[0].confidence)
0.973458576


Expand All @@ -196,10 +196,10 @@ See: `Single Utterance`_
... sample_rate=16000)
... responses = client.streaming_recognize(sample,
... single_utterance=True)
... alternatives = list(responses)
>>> print(alternatives[0].transcript)
... results = list(responses)
>>> print(results[0].alternatives[0].transcript)
hello
>>> print(alternatives[0].confidence)
>>> print(results[0].alternatives[0].confidence)

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

0.96523453546


Expand All @@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... for alternatives in client.streaming_recognize(sample,
... interim_results=True):
... for results in client.streaming_recognize(sample,
... interim_results=True):
... print('=' * 20)
... print(alternatives[0].transcript)
... print(alternatives[0].confidence)
... print(results[0].alternatives[0].transcript)
... print(results[0].alternatives[0].confidence)
... print(results[0].is_final)
... print(results[0].stability)
====================
'he'
None
False
0.113245
====================
'hell'
None
False
0.132454
====================
'hello'
0.973458576
True
0.982345


.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
Expand Down
7 changes: 4 additions & 3 deletions speech/google/cloud/speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from google.cloud.speech.connection import Connection
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.operation import Operation
from google.cloud.speech.result import StreamingSpeechResult
from google.cloud.speech.sample import Sample


Expand Down Expand Up @@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None,
Streaming recognition requests are limited to 1 minute of audio.
See: https://cloud.google.com/speech/limits#content

Yields: list of :class:`~google.cloud.speech.alternative.Alternatives`
Yields: Instance of
:class:`~google.cloud.speech.result.StreamingSpeechResult`
containing results and metadata from the streaming request.

:type sample: :class:`~google.cloud.speech.sample.Sample`
Expand Down Expand Up @@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None,
for response in responses:
for result in response.results:
if result.is_final or interim_results:
yield [Alternative.from_pb(alternative)
for alternative in result.alternatives]
yield StreamingSpeechResult.from_pb(result)

def sync_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
Expand Down
54 changes: 54 additions & 0 deletions speech/google/cloud/speech/result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Speech result representations."""

from google.cloud.speech.alternative import Alternative


class StreamingSpeechResult(object):
"""Streaming speech result representation.

:type alternatives: list
:param alternatives: List of
:class:`~google.cloud.speech.alternative.Alternative`.

:type is_final: bool
:param is_final: Boolean indicator of results finality.

:type stability: float
:param stability: 0.0-1.0 stability score for the results returned.
"""
def __init__(self, alternatives, is_final=False, stability=0.0):
self.alternatives = alternatives
self.is_final = is_final
self.stability = stability

@classmethod
def from_pb(cls, response):
"""Factory: construct instance of ``StreamingSpeechResult``.

:type response: :class:`~google.cloud.grpc.speech.v1beta1\
.cloud_speech_pb2.StreamingRecognizeResult`
:param response: Instance of ``StreamingRecognizeResult`` protobuf.

:rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult`
:returns: Instance of ``StreamingSpeechResult``.
"""
alternatives = [Alternative.from_pb(alternative)
for alternative in response.alternatives]
is_final = response.is_final
stability = response.stability
return cls(alternatives=alternatives, is_final=is_final,
stability=stability)
39 changes: 28 additions & 11 deletions speech/unit_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _make_result(alternatives=()):
)


def _make_streaming_result(alternatives=(), is_final=True):
def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2

return cloud_speech_pb2.StreamingRecognitionResult(
Expand All @@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
) for alternative in alternatives
],
is_final=is_final,
stability=stability,
)


Expand Down Expand Up @@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):

from google.cloud.speech import _gax
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.client import StreamingSpeechResult

stream = BytesIO(b'Some audio data...')
credentials = _Credentials()
Expand All @@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
'confidence': 0.0123456,
}]
first_response = _make_streaming_response(
_make_streaming_result([], is_final=False))
_make_streaming_result([], is_final=False, stability=0.122435))
second_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=False))
_make_streaming_result(alternatives, is_final=False,
stability=0.1432343))
last_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=True))
_make_streaming_result(alternatives, is_final=True,
stability=0.9834534))
responses = [first_response, second_response, last_response]

channel_args = []
Expand All @@ -521,15 +525,28 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample,
interim_results=True))
self.assertEqual(results[0], [])
self.assertEqual(results[1][0].transcript,

self.assertEqual(len(results), 3)
self.assertIsInstance(results[0], StreamingSpeechResult)

This comment was marked as spam.

self.assertEqual(results[0].alternatives, [])
self.assertFalse(results[0].is_final)
self.assertEqual(results[0].stability, 0.122435)
self.assertEqual(results[1].stability, 0.1432343)
self.assertFalse(results[1].is_final)
self.assertEqual(results[1].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[1][0].confidence,
self.assertEqual(results[1].alternatives[0].confidence,
alternatives[0]['confidence'])
self.assertEqual(results[1][1].transcript,
self.assertEqual(results[1].alternatives[1].transcript,
alternatives[1]['transcript'])
self.assertEqual(results[1][1].confidence,
self.assertEqual(results[1].alternatives[1].confidence,
alternatives[1]['confidence'])
self.assertTrue(results[2].is_final)
self.assertEqual(results[2].stability, 0.9834534)
self.assertEqual(results[2].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[2].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize(self):
from io import BytesIO
Expand Down Expand Up @@ -582,9 +599,9 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample))
self.assertEqual(len(results), 1)
self.assertEqual(results[0][0].transcript,
self.assertEqual(results[0].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[0][0].confidence,
self.assertEqual(results[0].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize_no_results(self):
Expand Down
18 changes: 9 additions & 9 deletions system_tests/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True,
single_utterance=single_utterance,
interim_results=interim_results)

def _check_results(self, results, num_results=1):
self.assertEqual(len(results), num_results)
top_result = results[0]
def _check_results(self, alternatives, num_results=1):
self.assertEqual(len(alternatives), num_results)
top_result = alternatives[0]
self.assertIsInstance(top_result, Alternative)
self.assertEqual(top_result.transcript,
'hello ' + self.ASSERT_TEXT)
self.assertGreater(top_result.confidence, 0.90)
if num_results == 2:
second_alternative = results[1]
second_alternative = alternatives[1]
self.assertIsInstance(second_alternative, Alternative)
self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT)
self.assertIsNone(second_alternative.confidence)
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_stream_recognize(self):

with open(AUDIO_FILE, 'rb') as file_obj:
for results in self._make_streaming_request(file_obj):
self._check_results(results)
self._check_results(results.alternatives)

def test_stream_recognize_interim_results(self):
if not Config.USE_GAX:
Expand All @@ -207,12 +207,12 @@ def test_stream_recognize_interim_results(self):
interim_results=True)
responses = list(recognize)
for response in responses:
if response[0].transcript:
self.assertIn(response[0].transcript,
if response.alternatives[0].transcript:
self.assertIn(response.alternatives[0].transcript,
extras + self.ASSERT_TEXT)

self.assertGreater(len(responses), 5)
self._check_results(responses[-1])
self._check_results(responses[-1].alternatives)

def test_stream_recognize_single_utterance(self):
if not Config.USE_GAX:
Expand All @@ -221,4 +221,4 @@ def test_stream_recognize_single_utterance(self):
with open(AUDIO_FILE, 'rb') as file_obj:
for results in self._make_streaming_request(
file_obj, single_utterance=False):
self._check_results(results)
self._check_results(results.alternatives)