From 43c8ec6ae160011f9c1731878f8434ed2f5b9367 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 6 Jul 2016 09:38:26 -0700 Subject: [PATCH] Revert "Changing cloud speech code samples to work with v1beta1 (#399)" This reverts commit bfef34d64e3929f4d42c8fa7b1c27d0feac20a2c. --- speech/api/grpc_auth.py | 0 speech/api/requirements-speech_grpc.txt | 2 +- speech/api/speech_gcs.py | 23 +++++---- speech/api/speech_gcs_test.py | 6 +-- speech/api/speech_rest.py | 8 ++-- speech/api/speech_streaming.py | 62 ++++++++++++------------- 6 files changed, 51 insertions(+), 50 deletions(-) create mode 100644 speech/api/grpc_auth.py diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt index 443a444c5dea..65e0755501df 100644 --- a/speech/api/requirements-speech_grpc.txt +++ b/speech/api/requirements-speech_grpc.txt @@ -1,4 +1,4 @@ gcloud==0.17.0 grpcio==0.14.0 PyAudio==0.2.9 -grpc-google-cloud-speech-v1beta1==1.0.0 +grpc-google-cloud-speech==1.0.4 diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py index 8aa0aeb72c0f..b25956c50ae7 100644 --- a/speech/api/speech_gcs.py +++ b/speech/api/speech_gcs.py @@ -18,7 +18,7 @@ import argparse from gcloud.credentials import get_credentials -from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech +from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech from grpc.beta import implementations # Keep the request alive for this many seconds @@ -48,23 +48,25 @@ def make_channel(host, port): return implementations.secure_channel(host, port, composite_channel) -def main(input_uri, encoding, sample_rate): +def main(input_uri, output_uri, encoding, sample_rate): service = cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: - # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto - response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest( - config=cloud_speech.RecognitionConfig( + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto + response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest( + initial_request=cloud_speech.InitialRecognizeRequest( encoding=encoding, sample_rate=sample_rate, + output_uri=output_uri, ), - audio=cloud_speech.RecognitionAudio( + audio_request=cloud_speech.AudioRequest( uri=input_uri, ) ), DEADLINE_SECS) - # Print the recognition results. - print(response.results) + # This shouldn't actually print anything, since the transcription is output + # to the GCS uri specified + print(response.responses) def _gcs_uri(text): @@ -75,10 +77,11 @@ def _gcs_uri(text): PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' - 'google/cloud/speech/v1beta1/cloud_speech.proto') + 'google/cloud/speech/v1/cloud_speech.proto') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('input_uri', type=_gcs_uri) + parser.add_argument('output_uri', type=_gcs_uri) parser.add_argument( '--encoding', default='FLAC', choices=[ 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], @@ -86,4 +89,4 @@ def _gcs_uri(text): parser.add_argument('--sample_rate', default=16000) args = parser.parse_args() - main(args.input_uri, args.encoding, args.sample_rate) + main(args.input_uri, args.output_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py index 56f2b4a42d3e..7f03ede18ebf 100644 --- a/speech/api/speech_gcs_test.py +++ b/speech/api/speech_gcs_test.py @@ -11,7 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re import sys import pytest @@ -25,11 +24,12 @@ 'https://github.com/grpc/grpc/issues/282')) def test_main(cloud_config, capsys): input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) + output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket) - main(input_uri, 'FLAC', 16000) + main(input_uri, output_uri, 'FLAC', 16000) out, err = capsys.readouterr() - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + assert '[]\n' == out def test_gcs_uri(): diff --git a/speech/api/speech_rest.py b/speech/api/speech_rest.py index e6f83fb4a6e4..6ab5160aa8e7 100644 --- a/speech/api/speech_rest.py +++ b/speech/api/speech_rest.py @@ -40,7 +40,7 @@ def get_speech_service(): credentials.authorize(http) return discovery.build( - 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) + 'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL) # [END authenticating] @@ -57,13 +57,13 @@ def main(speech_file): speech_content = base64.b64encode(speech.read()) service = get_speech_service() - service_request = service.speech().syncrecognize( + service_request = service.speech().recognize( body={ - 'config': { + 'initialRequest': { 'encoding': 'LINEAR16', 'sampleRate': 16000 }, - 'audio': { + 'audioRequest': { 'content': speech_content.decode('UTF-8') } }) diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py index 560b072990fa..6ecac2e2804f 100644 --- a/speech/api/speech_streaming.py +++ b/speech/api/speech_streaming.py @@ -14,14 +14,12 @@ # limitations under the License. """Sample that streams audio to the Google Cloud Speech API via GRPC.""" -from __future__ import division - import contextlib import re import threading from gcloud.credentials import get_credentials -from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech +from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech from google.rpc import code_pb2 from grpc.beta import implementations import pyaudio @@ -29,7 +27,7 @@ # Audio recording parameters RATE = 16000 CHANNELS = 1 -CHUNK = int(RATE / 10) # 100ms +CHUNK = RATE // 10 # 100ms # Keep the request alive for this many seconds DEADLINE_SECS = 8 * 60 * 60 @@ -45,15 +43,15 @@ def make_channel(host, port): creds = get_credentials().create_scoped([SPEECH_SCOPE]) # Add a plugin to inject the creds into the header auth_header = ( - 'Authorization', - 'Bearer ' + creds.get_access_token().access_token) + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) auth_plugin = implementations.metadata_call_credentials( - lambda _, cb: cb([auth_header], None), - name='google_creds') + lambda _, cb: cb([auth_header], None), + name='google_creds') # compose the two together for both ssl and google auth composite_channel = implementations.composite_channel_credentials( - ssl_channel, auth_plugin) + ssl_channel, auth_plugin) return implementations.secure_channel(host, port, composite_channel) @@ -77,8 +75,7 @@ def record_audio(channels, rate, chunk): def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): - """Yields `StreamingRecognizeRequest`s constructed from a recording audio - stream. + """Yields `RecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. @@ -86,31 +83,33 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): rate: The sampling rate. chunk: Buffer audio into chunks of this size before sending to the api. """ - # The initial request must contain metadata about the stream, so the - # server knows how to interpret it. - recognition_config = cloud_speech.RecognitionConfig( - encoding='LINEAR16', sample_rate=rate) - streaming_config = cloud_speech.StreamingRecognitionConfig( - config=recognition_config, - # Note that setting interim_results to True means that you'll likely - # get multiple results for the same bit of audio, as the system - # re-interprets audio in the context of subsequent audio. However, this - # will give us quick results without having to tell the server when to - # finalize a piece of audio. - interim_results=True, single_utterance=True - ) - - yield cloud_speech.StreamingRecognizeRequest( - streaming_config=streaming_config) - with record_audio(channels, rate, chunk) as audio_stream: + # The initial request must contain metadata about the stream, so the + # server knows how to interpret it. + metadata = cloud_speech.InitialRecognizeRequest( + encoding='LINEAR16', sample_rate=rate, + # Note that setting interim_results to True means that you'll + # likely get multiple results for the same bit of audio, as the + # system re-interprets audio in the context of subsequent audio. + # However, this will give us quick results without having to tell + # the server when to finalize a piece of audio. + interim_results=True, continuous=False, + ) + data = audio_stream.read(chunk) + audio_request = cloud_speech.AudioRequest(content=data) + + yield cloud_speech.RecognizeRequest( + initial_request=metadata, + audio_request=audio_request) + while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() - # Subsequent requests can all just have the content - yield cloud_speech.StreamingRecognizeRequest(audio_content=data) + audio_request = cloud_speech.AudioRequest(content=data) + + yield cloud_speech.RecognizeRequest(audio_request=audio_request) def listen_print_loop(recognize_stream): @@ -137,8 +136,7 @@ def main(): make_channel('speech.googleapis.com', 443)) as service: try: listen_print_loop( - service.StreamingRecognize( - request_stream(stop_audio), DEADLINE_SECS)) + service.Recognize(request_stream(stop_audio), DEADLINE_SECS)) finally: # Stop the request stream once we're done with the loop - otherwise # it'll keep going in the thread that the grpc lib makes for it..