Merge pull request #352 from GoogleCloudPlatform/speech-gcs

jerjou · jerjou · commit c5c6c952ac47 · 2016-05-18T11:35:40.000-07:00
Add sample for speech api on GCS file via grpc.
diff --git a/speech/api/README.md b/speech/api/README.md
@@ -49,7 +49,7 @@ for more information.
 * If you're running the `speech_streaming.py` sample:
 
     ```sh
-    $ pip install -r requirements-speech_streaming.txt
+    $ pip install -r requirements-speech_grpc.txt
     ```
 
     The sample uses the [PyAudio][pyaudio] library to stream audio from your
diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py
diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt
diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python
+# Copyright (C) 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#            http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage,
+using GRPC."""
+
+import argparse
+
+from gcloud.credentials import get_credentials
+from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
+from grpc.beta import implementations
+
+# Keep the request alive for this many seconds
+DEADLINE_SECS = 10
+SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
+
+
+def make_channel(host, port):
+    """Creates an SSL channel with auth credentials from the environment."""
+    # In order to make an https call, use an ssl channel with defaults
+    ssl_channel = implementations.ssl_channel_credentials(None, None, None)
+
+    # Grab application default credentials from the environment
+    creds = get_credentials().create_scoped([SPEECH_SCOPE])
+    # Add a plugin to inject the creds into the header
+    auth_header = (
+            'Authorization',
+            'Bearer ' + creds.get_access_token().access_token)
+    auth_plugin = implementations.metadata_call_credentials(
+            lambda _, cb: cb([auth_header], None),
+            name='google_creds')
+
+    # compose the two together for both ssl and google auth
+    composite_channel = implementations.composite_channel_credentials(
+            ssl_channel, auth_plugin)
+
+    return implementations.secure_channel(host, port, composite_channel)
+
+
+def main(input_uri, output_uri, encoding, sample_rate):
+    service = cloud_speech.beta_create_Speech_stub(
+            make_channel('speech.googleapis.com', 443))
+    # The method and parameters can be inferred from the proto from which the
+    # grpc client lib was generated. See:
+    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
+    response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
+        initial_request=cloud_speech.InitialRecognizeRequest(
+            encoding=encoding,
+            sample_rate=sample_rate,
+            output_uri=output_uri,
+        ),
+        audio_request=cloud_speech.AudioRequest(
+            uri=input_uri,
+        )
+    ), DEADLINE_SECS)
+    # This shouldn't actually print anything, since the transcription is output
+    # to the GCS uri specified
+    print(response.responses)
+
+
+def _gcs_uri(text):
+    if not text.startswith('gs://'):
+        raise ValueError(
+            'Cloud Storage uri must be of the form gs://bucket/path/')
+    return text
+
+
+PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
+             'google/cloud/speech/v1/cloud_speech.proto')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_uri', type=_gcs_uri)
+    parser.add_argument('output_uri', type=_gcs_uri)
+    parser.add_argument(
+        '--encoding', default='FLAC', choices=[
+            'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
+        help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
+    parser.add_argument('--sample_rate', default=16000)
+
+    args = parser.parse_args()
+    main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py
@@ -0,0 +1,38 @@
+# Copyright 2016, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+import pytest
+from speech_gcs import _gcs_uri
+from speech_gcs import main
+
+
+@pytest.mark.skipif(
+        sys.version_info >= (3, 0),
+        reason=("grpc doesn't yet support python3 "
+                'https://github.com/grpc/grpc/issues/282'))
+def test_main(cloud_config, capsys):
+    input_uri = 'gs://{}/speech/clip.flac'.format(cloud_config.storage_bucket)
+    output_uri = 'gs://{}/speech/clip.txt'.format(cloud_config.storage_bucket)
+
+    main(input_uri, output_uri, 'FLAC', 16000)
+
+    out, err = capsys.readouterr()
+    assert '[]\n' == out
+
+
+def test_gcs_uri():
+    _gcs_uri('gs://bucket/path')
+    with pytest.raises(ValueError):
+        _gcs_uri('/local/path')
diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py
@@ -1,11 +1,25 @@
 #!/usr/bin/python
+# Copyright (C) 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
 
 import contextlib
 import re
 import threading
 
 from gcloud.credentials import get_credentials
-from google.cloud.speech.v1.cloud_speech_pb2 import *  # noqa
+from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
 from google.rpc import code_pb2
 from grpc.beta import implementations
 import pyaudio
@@ -70,7 +84,7 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
     with record_audio(channels, rate, chunk) as audio_stream:
         # The initial request must contain metadata about the stream, so the
         # server knows how to interpret it.
-        metadata = InitialRecognizeRequest(
+        metadata = cloud_speech.InitialRecognizeRequest(
             encoding='LINEAR16', sample_rate=rate,
             # Note that setting interim_results to True means that you'll
             # likely get multiple results for the same bit of audio, as the
@@ -80,9 +94,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
             interim_results=True, continuous=False,
         )
         data = audio_stream.read(chunk)
-        audio_request = AudioRequest(content=data)
+        audio_request = cloud_speech.AudioRequest(content=data)
 
-        yield RecognizeRequest(
+        yield cloud_speech.RecognizeRequest(
             initial_request=metadata,
             audio_request=audio_request)
 
@@ -91,9 +105,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
             if not data:
                 raise StopIteration()
             # Subsequent requests can all just have the content
-            audio_request = AudioRequest(content=data)
+            audio_request = cloud_speech.AudioRequest(content=data)
 
-            yield RecognizeRequest(audio_request=audio_request)
+            yield cloud_speech.RecognizeRequest(audio_request=audio_request)
 
 
 def listen_print_loop(recognize_stream):
@@ -116,7 +130,7 @@ def listen_print_loop(recognize_stream):
 
 def main():
     stop_audio = threading.Event()
-    with beta_create_Speech_stub(
+    with cloud_speech.beta_create_Speech_stub(
             make_channel('speech.googleapis.com', 443)) as service:
         try:
             listen_print_loop(