Skip to content

Commit

Permalink
Merge pull request #2640 from daspecster/speech-streaming-part-2
Browse files Browse the repository at this point in the history
Add _make_streaming_request, formerly _make_streaming_config.
  • Loading branch information
daspecster authored Oct 29, 2016
2 parents 81fe19d + 8c360f2 commit 9a56816
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 0 deletions.
86 changes: 86 additions & 0 deletions packages/google-cloud-python-speech/google/cloud/speech/_gax.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognizeRequest)


from google.cloud.speech.transcript import Transcript

Expand Down Expand Up @@ -138,3 +143,84 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
for alternative in alternatives]
else:
raise ValueError('More than one result or none returned from API.')


def _make_streaming_request(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
interim_results):
"""Build streaming request.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:type single_utterance: bool
:param single_utterance: If false or omitted, the recognizer
will perform continuous recognition
(continuing to process audio even if the user
pauses speaking) until the client closes the
output stream (gRPC API) or when the maximum
time limit has been reached. Multiple
SpeechRecognitionResults with the is_final
flag set to true may be returned.
If true, the recognizer will detect a single
spoken utterance. When it detects that the
user has paused or stopped speaking, it will
return an END_OF_UTTERANCE event and cease
recognition. It will return no more than one
SpeechRecognitionResult with the is_final flag
set to true.
:type interim_results: bool
:param interim_results: If true, interim results (tentative
hypotheses) may be returned as they become
available (these interim results are indicated
with the is_final=false flag). If false or
omitted, only is_final=true result(s) are
returned.
:rtype:
:class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest`
:returns: Instance of ``StreamingRecognizeRequest``.
"""
config = RecognitionConfig(
encoding=sample.encoding, sample_rate=sample.sample_rate,
language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter, speech_context=speech_context)

streaming_config = StreamingRecognitionConfig(
config=config, single_utterance=single_utterance,
interim_results=interim_results)

config_request = StreamingRecognizeRequest(
streaming_config=streaming_config)

return config_request
80 changes: 80 additions & 0 deletions packages/google-cloud-python-speech/unit_tests/test__gax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest


class TestSpeechGAX(unittest.TestCase):
SAMPLE_RATE = 16000
HINTS = ['hi']
AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'

def _callFUT(self, sample, language_code, max_alternatives,
profanity_filter, speech_context, single_utterance,
interim_results):
from google.cloud.speech._gax import _make_streaming_request
return _make_streaming_request(sample=sample,
language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=speech_context,
single_utterance=single_utterance,
interim_results=interim_results)

def test_ctor(self):
from google.cloud import speech
from google.cloud.speech.sample import Sample
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
SpeechContext)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
RecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognizeRequest)

sample = Sample(content=self.AUDIO_CONTENT,
encoding=speech.Encoding.FLAC,
sample_rate=self.SAMPLE_RATE)
language_code = 'US-en'
max_alternatives = 2
profanity_filter = True
speech_context = SpeechContext(phrases=self.HINTS)
single_utterance = True
interim_results = False

streaming_request = self._callFUT(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
interim_results)
self.assertIsInstance(streaming_request, StreamingRecognizeRequest)

# This isn't set by _make_streaming_request().
# The first request can only have `streaming_config` set.
# The following requests can only have `audio_content` set.
self.assertEqual(streaming_request.audio_content, b'')

self.assertIsInstance(streaming_request.streaming_config,
StreamingRecognitionConfig)
streaming_config = streaming_request.streaming_config
self.assertTrue(streaming_config.single_utterance)
self.assertFalse(streaming_config.interim_results)
config = streaming_config.config
self.assertIsInstance(config, RecognitionConfig)
self.assertEqual(config.encoding, 2) # speech.Encoding.FLAC maps to 2.
self.assertEqual(config.sample_rate, self.SAMPLE_RATE)
self.assertEqual(config.language_code, language_code)
self.assertEqual(config.max_alternatives, max_alternatives)
self.assertTrue(config.profanity_filter)
self.assertEqual(config.speech_context.phrases, self.HINTS)

0 comments on commit 9a56816

Please sign in to comment.