Merge pull request #2640 from daspecster/speech-streaming-part-2

Add _make_streaming_request, formerly _make_streaming_config.
googleapis · Oct 29, 2016 · 9a56816 · 9a56816
2 parents 81fe19d + 8c360f2
commit 9a56816
Show file tree

Hide file tree

Showing 2 changed files with 166 additions and 0 deletions.
diff --git a/packages/google-cloud-python-speech/google/cloud/speech/_gax.py b/packages/google-cloud-python-speech/google/cloud/speech/_gax.py
@@ -18,6 +18,11 @@
 from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext
 from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig
 from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
+from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+    StreamingRecognitionConfig)
+from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+    StreamingRecognizeRequest)
+
 
 from google.cloud.speech.transcript import Transcript
 
@@ -138,3 +143,84 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
                     for alternative in alternatives]
         else:
             raise ValueError('More than one result or none returned from API.')
+
+
+def _make_streaming_request(sample, language_code,
+                            max_alternatives, profanity_filter,
+                            speech_context, single_utterance,
+                            interim_results):
+    """Build streaming request.
+
+    :type sample: :class:`~google.cloud.speech.sample.Sample`
+    :param sample: Instance of ``Sample`` containing audio information.
+
+    :type language_code: str
+    :param language_code: The language of the supplied audio as
+                          BCP-47 language tag. Example: ``'en-GB'``.
+                          If omitted, defaults to ``'en-US'``.
+
+    :type max_alternatives: int
+    :param max_alternatives: Maximum number of recognition
+                             hypotheses to be returned. The server may
+                             return fewer than maxAlternatives.
+                             Valid values are 0-30. A value of 0 or 1
+                             will return a maximum of 1. Defaults to 1
+
+    :type profanity_filter: bool
+    :param profanity_filter: If True, the server will attempt to filter
+                             out profanities, replacing all but the
+                             initial character in each filtered word with
+                             asterisks, e.g. ``'f***'``. If False or
+                             omitted, profanities won't be filtered out.
+
+    :type speech_context: list
+    :param speech_context: A list of strings (max 50) containing words and
+                           phrases "hints" so that the speech recognition
+                           is more likely to recognize them. This can be
+                           used to improve the accuracy for specific words
+                           and phrases. This can also be used to add new
+                           words to the vocabulary of the recognizer.
+
+    :type single_utterance: bool
+    :param single_utterance: If false or omitted, the recognizer
+                             will perform continuous recognition
+                             (continuing to process audio even if the user
+                             pauses speaking) until the client closes the
+                             output stream (gRPC API) or when the maximum
+                             time limit has been reached. Multiple
+                             SpeechRecognitionResults with the is_final
+                             flag set to true may be returned.
+
+                             If true, the recognizer will detect a single
+                             spoken utterance. When it detects that the
+                             user has paused or stopped speaking, it will
+                             return an END_OF_UTTERANCE event and cease
+                             recognition. It will return no more than one
+                             SpeechRecognitionResult with the is_final flag
+                             set to true.
+
+    :type interim_results: bool
+    :param interim_results: If true, interim results (tentative
+                            hypotheses) may be returned as they become
+                            available (these interim results are indicated
+                            with the is_final=false flag). If false or
+                            omitted, only is_final=true result(s) are
+                            returned.
+
+    :rtype:
+        :class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest`
+    :returns: Instance of ``StreamingRecognizeRequest``.
+    """
+    config = RecognitionConfig(
+        encoding=sample.encoding, sample_rate=sample.sample_rate,
+        language_code=language_code, max_alternatives=max_alternatives,
+        profanity_filter=profanity_filter, speech_context=speech_context)
+
+    streaming_config = StreamingRecognitionConfig(
+        config=config, single_utterance=single_utterance,
+        interim_results=interim_results)
+
+    config_request = StreamingRecognizeRequest(
+        streaming_config=streaming_config)
+
+    return config_request
diff --git a/packages/google-cloud-python-speech/unit_tests/test__gax.py b/packages/google-cloud-python-speech/unit_tests/test__gax.py
@@ -0,0 +1,80 @@
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+
+class TestSpeechGAX(unittest.TestCase):
+    SAMPLE_RATE = 16000
+    HINTS = ['hi']
+    AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
+
+    def _callFUT(self, sample, language_code, max_alternatives,
+                 profanity_filter, speech_context, single_utterance,
+                 interim_results):
+        from google.cloud.speech._gax import _make_streaming_request
+        return _make_streaming_request(sample=sample,
+                                       language_code=language_code,
+                                       max_alternatives=max_alternatives,
+                                       profanity_filter=profanity_filter,
+                                       speech_context=speech_context,
+                                       single_utterance=single_utterance,
+                                       interim_results=interim_results)
+
+    def test_ctor(self):
+        from google.cloud import speech
+        from google.cloud.speech.sample import Sample
+        from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+            SpeechContext)
+        from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+            RecognitionConfig)
+        from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+            StreamingRecognitionConfig)
+        from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+            StreamingRecognizeRequest)
+
+        sample = Sample(content=self.AUDIO_CONTENT,
+                        encoding=speech.Encoding.FLAC,
+                        sample_rate=self.SAMPLE_RATE)
+        language_code = 'US-en'
+        max_alternatives = 2
+        profanity_filter = True
+        speech_context = SpeechContext(phrases=self.HINTS)
+        single_utterance = True
+        interim_results = False
+
+        streaming_request = self._callFUT(sample, language_code,
+                                          max_alternatives, profanity_filter,
+                                          speech_context, single_utterance,
+                                          interim_results)
+        self.assertIsInstance(streaming_request, StreamingRecognizeRequest)
+
+        # This isn't set by _make_streaming_request().
+        # The first request can only have `streaming_config` set.
+        # The following requests can only have `audio_content` set.
+        self.assertEqual(streaming_request.audio_content, b'')
+
+        self.assertIsInstance(streaming_request.streaming_config,
+                              StreamingRecognitionConfig)
+        streaming_config = streaming_request.streaming_config
+        self.assertTrue(streaming_config.single_utterance)
+        self.assertFalse(streaming_config.interim_results)
+        config = streaming_config.config
+        self.assertIsInstance(config, RecognitionConfig)
+        self.assertEqual(config.encoding, 2)  # speech.Encoding.FLAC maps to 2.
+        self.assertEqual(config.sample_rate, self.SAMPLE_RATE)
+        self.assertEqual(config.language_code, language_code)
+        self.assertEqual(config.max_alternatives, max_alternatives)
+        self.assertTrue(config.profanity_filter)
+        self.assertEqual(config.speech_context.phrases, self.HINTS)