diff --git a/google-cloud-speech/google/cloud/speech/client.py b/google-cloud-speech/google/cloud/speech/client.py index 94716086b3a2..ce93ad785880 100644 --- a/google-cloud-speech/google/cloud/speech/client.py +++ b/google-cloud-speech/google/cloud/speech/client.py @@ -27,6 +27,7 @@ from google.cloud.speech.connection import Connection from google.cloud.speech.encoding import Encoding from google.cloud.speech.operation import Operation +from google.cloud.speech.result import StreamingSpeechResult from google.cloud.speech.sample import Sample @@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None, Streaming recognition requests are limited to 1 minute of audio. See: https://cloud.google.com/speech/limits#content - Yields: list of :class:`~google.cloud.speech.alternative.Alternatives` + Yields: Instance of + :class:`~google.cloud.speech.result.StreamingSpeechResult` containing results and metadata from the streaming request. :type sample: :class:`~google.cloud.speech.sample.Sample` @@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None, for response in responses: for result in response.results: if result.is_final or interim_results: - yield [Alternative.from_pb(alternative) - for alternative in result.alternatives] + yield StreamingSpeechResult.from_pb(result) def sync_recognize(self, sample, language_code=None, max_alternatives=None, profanity_filter=None, diff --git a/google-cloud-speech/google/cloud/speech/result.py b/google-cloud-speech/google/cloud/speech/result.py new file mode 100644 index 000000000000..11efb93626b6 --- /dev/null +++ b/google-cloud-speech/google/cloud/speech/result.py @@ -0,0 +1,54 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Speech result representations.""" + +from google.cloud.speech.alternative import Alternative + + +class StreamingSpeechResult(object): + """Streaming speech result representation. + + :type alternatives: list + :param alternatives: List of + :class:`~google.cloud.speech.alternative.Alternative`. + + :type is_final: bool + :param is_final: Boolean indicator of results finality. + + :type stability: float + :param stability: 0.0-1.0 stability score for the results returned. + """ + def __init__(self, alternatives, is_final=False, stability=0.0): + self.alternatives = alternatives + self.is_final = is_final + self.stability = stability + + @classmethod + def from_pb(cls, response): + """Factory: construct instance of ``StreamingSpeechResult``. + + :type response: :class:`~google.cloud.grpc.speech.v1beta1\ + .cloud_speech_pb2.StreamingRecognizeResult` + :param response: Instance of ``StreamingRecognizeResult`` protobuf. + + :rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult` + :returns: Instance of ``StreamingSpeechResult``. + """ + alternatives = [Alternative.from_pb(alternative) + for alternative in response.alternatives] + is_final = response.is_final + stability = response.stability + return cls(alternatives=alternatives, is_final=is_final, + stability=stability) diff --git a/google-cloud-speech/unit_tests/test_client.py b/google-cloud-speech/unit_tests/test_client.py index 8291540e0e4f..895cd715d39d 100644 --- a/google-cloud-speech/unit_tests/test_client.py +++ b/google-cloud-speech/unit_tests/test_client.py @@ -28,7 +28,7 @@ def _make_result(alternatives=()): ) -def _make_streaming_result(alternatives=(), is_final=True): +def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 return cloud_speech_pb2.StreamingRecognitionResult( @@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True): ) for alternative in alternatives ], is_final=is_final, + stability=stability, ) @@ -477,6 +478,7 @@ def test_stream_recognize_interim_results(self): from google.cloud.speech import _gax from google.cloud.speech.encoding import Encoding + from google.cloud.speech.client import StreamingSpeechResult stream = BytesIO(b'Some audio data...') credentials = _Credentials() @@ -492,11 +494,13 @@ def test_stream_recognize_interim_results(self): 'confidence': 0.0123456, }] first_response = _make_streaming_response( - _make_streaming_result([], is_final=False)) + _make_streaming_result([], is_final=False, stability=0.122435)) second_response = _make_streaming_response( - _make_streaming_result(alternatives, is_final=False)) + _make_streaming_result(alternatives, is_final=False, + stability=0.1432343)) last_response = _make_streaming_response( - _make_streaming_result(alternatives, is_final=True)) + _make_streaming_result(alternatives, is_final=True, + stability=0.9834534)) responses = [first_response, second_response, last_response] channel_args = [] @@ -522,15 +526,28 @@ def speech_api(channel=None): results = list(client.streaming_recognize(sample, interim_results=True)) - self.assertEqual(results[0], []) - self.assertEqual(results[1][0].transcript, + + self.assertEqual(len(results), 3) + self.assertIsInstance(results[0], StreamingSpeechResult) + self.assertEqual(results[0].alternatives, []) + self.assertFalse(results[0].is_final) + self.assertEqual(results[0].stability, 0.122435) + self.assertEqual(results[1].stability, 0.1432343) + self.assertFalse(results[1].is_final) + self.assertEqual(results[1].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[1][0].confidence, + self.assertEqual(results[1].alternatives[0].confidence, alternatives[0]['confidence']) - self.assertEqual(results[1][1].transcript, + self.assertEqual(results[1].alternatives[1].transcript, alternatives[1]['transcript']) - self.assertEqual(results[1][1].confidence, + self.assertEqual(results[1].alternatives[1].confidence, alternatives[1]['confidence']) + self.assertTrue(results[2].is_final) + self.assertEqual(results[2].stability, 0.9834534) + self.assertEqual(results[2].alternatives[0].transcript, + alternatives[0]['transcript']) + self.assertEqual(results[2].alternatives[0].confidence, + alternatives[0]['confidence']) def test_stream_recognize(self): from io import BytesIO @@ -583,9 +600,9 @@ def speech_api(channel=None): results = list(client.streaming_recognize(sample)) self.assertEqual(len(results), 1) - self.assertEqual(results[0][0].transcript, + self.assertEqual(results[0].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[0][0].confidence, + self.assertEqual(results[0].alternatives[0].confidence, alternatives[0]['confidence']) def test_stream_recognize_no_results(self):