Merge pull request googleapis#2344 from Fematich/master

Fematich · dhermes · commit ddee54898273 · 2016-09-21T10:22:55.000-07:00
Basic implementation of Speech API.

* First basic implementation of Speech API

* unit_test + pep8 + start documentation

* Added Speech API to index of docs

* Fully tested basic implementation of Speech API

* Delete build.zip

* sync_recognize rename, drop source_uri support, update unit_test and partly updated docs

* resolve newlines at end of files (commit from Windows vs. Linux)

* included source_uri again and updated formatting/unit_tests
diff --git a/docs/index.rst b/docs/index.rst
@@ -166,6 +166,14 @@
   language-document
   language-responses
 
+.. toctree::
+  :maxdepth: 0
+  :hidden:
+  :caption: Speech
+
+  speech-usage
+  Client <speech-client>
+
 .. toctree::
   :maxdepth: 0
   :hidden:
@@ -229,4 +237,4 @@ Cloud Storage
   client = storage.Client()
   bucket = client.get_bucket('<your-bucket-name>')
   blob = bucket.blob('my-test-file.txt')
-  blob.upload_from_string('this is test content!')
+  blob.upload_from_string('this is test content!')
diff --git a/docs/speech-client.rst b/docs/speech-client.rst
@@ -0,0 +1,15 @@
+Speech Client
+================
+
+.. automodule:: google.cloud.speech.client
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
+Connection
+~~~~~~~~~~
+
+.. automodule:: google.cloud.speech.connection
+  :members:
+  :undoc-members:
+  :show-inheritance:
diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst
@@ -0,0 +1,54 @@
+Using the API
+=============
+
+The `Google Speech`_ API enables developers to convert audio to text.
+The API recognizes over 80 languages and variants, to support your global user base.
+
+.. warning::
+
+   This is a Beta release of Google Speech API. This
+   API is not intended for real-time usage in critical applications.
+
+.. _Google Speech: https://cloud.google.com/speech/docs/getting-started
+
+Client
+------
+
+:class:`~google.cloud.speech.client.Client` objects provide a
+means to configure your application. Each instance holds
+an authenticated connection to the Natural Language service.
+
+For an overview of authentication in ``google-cloud-python``, see
+:doc:`google-cloud-auth`.
+
+Assuming your environment is set up as described in that document,
+create an instance of :class:`~google.cloud.speech.client.Client`.
+
+  .. code-block:: python
+
+     >>> from google.cloud import speech
+     >>> client = speech.Client()
+
+
+Synchronous Recognition
+-----------------------
+
+The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech data to text
+and returns alternative text transcriptons.
+
+  .. code-block:: python
+
+     >>> alternatives = client.sync_recognize(None,"gs://my-bucket/recording.flac",
+     ...                 "FLAC", 16000, max_alternatives=2):
+     >>> for alternative in alternatives:
+     ...     print('=' * 20)
+     ...     print('   transcript: %s' % (alternative["transcript"],))
+     ...     print('   confidence: %s' % (alternative["confidence"],))
+     ====================
+              transcript: Hello, this is a test
+              confidence: 0.81
+     ====================
+              transcript: Hello, this is one test
+              confidence: 0
+
+.. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize
diff --git a/google/cloud/speech/__init__.py b/google/cloud/speech/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API wrapper."""
+
+from google.cloud.speech.client import Client
+from google.cloud.speech.client import Encoding
+from google.cloud.speech.connection import Connection
diff --git a/google/cloud/speech/client.py b/google/cloud/speech/client.py
@@ -0,0 +1,182 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Basic client for Google Cloud Speech API."""
+
+from base64 import b64encode
+
+from google.cloud._helpers import _to_bytes
+from google.cloud import client as client_module
+from google.cloud.speech.connection import Connection
+
+
+class Encoding(object):
+    """Audio encoding types.
+
+    See:
+    https://cloud.google.com/speech/reference/rest/v1beta1/\
+    RecognitionConfig#AudioEncoding
+    """
+
+    LINEAR16 = 'LINEAR16'
+    """LINEAR16 encoding type."""
+
+    FLAC = 'FLAC'
+    """FLAC encoding type."""
+
+    MULAW = 'MULAW'
+    """MULAW encoding type."""
+
+    AMR = 'AMR'
+    """AMR encoding type."""
+
+    AMR_WB = 'AMR_WB'
+    """AMR_WB encoding type."""
+
+
+class Client(client_module.Client):
+    """Client to bundle configuration needed for API requests.
+
+    :type project: str
+    :param project: The project which the client acts on behalf of. Will be
+                    passed when creating a dataset / job.  If not passed,
+                    falls back to the default inferred from the environment.
+
+    :type credentials: :class:`oauth2client.client.OAuth2Credentials` or
+                       :class:`NoneType`
+    :param credentials: The OAuth2 Credentials to use for the connection
+                        owned by this client. If not passed (and if no ``http``
+                        object is passed), falls back to the default inferred
+                        from the environment.
+
+    :type http: :class:`httplib2.Http` or class that defines ``request()``.
+    :param http: An optional HTTP object to make requests. If not passed, an
+                 ``http`` object is created that is bound to the
+                 ``credentials`` for the current object.
+    """
+
+    _connection_class = Connection
+
+    def sync_recognize(self, content, source_uri, encoding, sample_rate,
+                       language_code=None, max_alternatives=None,
+                       profanity_filter=None,
+                       speech_context=None):
+        """Synchronous Speech Recognition.
+
+        .. _sync_recognize: https://cloud.google.com/speech/reference/\
+                            rest/v1beta1/speech/syncrecognize
+
+        See `sync_recognize`_.
+
+        :type content: bytes
+        :param content: Byte stream of audio.
+
+        :type source_uri: str
+        :param source_uri: URI that points to a file that contains audio
+                      data bytes as specified in RecognitionConfig.
+                      Currently, only Google Cloud Storage URIs are
+                      supported, which must be specified in the following
+                      format: gs://bucket_name/object_name
+
+        :type encoding: str
+        :param encoding: encoding of audio data sent in all RecognitionAudio
+                         messages, can be one of: :attr:`~.Encoding.LINEAR16`,
+                         :attr:`~.Encoding.FLAC`, :attr:`~.Encoding.MULAW`,
+                         :attr:`~.Encoding.AMR`, :attr:`~.Encoding.AMR_WB`
+
+        :type sample_rate: int
+        :param sample_rate: Sample rate in Hertz of the audio data sent in all
+                            requests. Valid values are: 8000-48000.
+                            16000 is optimal. For best results, set the
+                            sampling rate of the audio source to 16000 Hz.
+                            If that's not possible, use the native sample rate
+                            of the audio source (instead of re-sampling).
+
+        :type language_code: str
+        :param language_code: (Optional) The language of the supplied audio as
+                              BCP-47 language tag. Example: "en-GB".
+                              If omitted, defaults to "en-US".
+
+        :type max_alternatives: int
+        :param max_alternatives: (Optional) Maximum number of recognition
+                                 hypotheses to be returned. The server may
+                                 return fewer than maxAlternatives.
+                                 Valid values are 0-30. A value of 0 or 1
+                                 will return a maximum of 1. Defaults to 1
+
+        :type profanity_filter: bool
+        :param profanity_filter: If True, the server will attempt to filter
+                                 out profanities, replacing all but the
+                                 initial character in each filtered word with
+                                 asterisks, e.g. "f***". If False or omitted,
+                                 profanities won't be filtered out.
+
+        :type speech_context: list
+        :param speech_context: A list of strings (max 50) containing words and
+                               phrases "hints" so that the speech recognition
+                               is more likely to recognize them. This can be
+                               used to improve the accuracy for specific words
+                               and phrases. This can also be used to add new
+                               words to the vocabulary of the recognizer.
+
+        :rtype: list
+        :returns: A list of dictionaries. One dict for each alternative. Each
+                  dictionary typically contains two keys (though not
+                  all will be present in all cases)
+
+                  * ``transcript``: The detected text from the audio recording.
+                  * ``confidence``: The confidence in language detection, float
+                    between 0 and 1.
+        """
+
+        if (content is None) and (source_uri is None):
+            raise ValueError('content and source_uri cannot be both equal to\
+                             None')
+
+        if (content is not None) and (source_uri is not None):
+            raise ValueError('content and source_uri cannot be both different from\
+                             None')
+        if encoding is None:
+            raise ValueError('encoding cannot be None')
+        if sample_rate is None:
+            raise ValueError('sample_rate cannot be None')
+
+        if content is not None:
+            audio = {'content': b64encode(_to_bytes(content))}
+        else:
+            audio = {'uri': source_uri}
+
+        config = {'encoding': encoding, 'sampleRate': sample_rate}
+
+        if language_code is not None:
+            config['languageCode'] = language_code
+        if max_alternatives is not None:
+            config['maxAlternatives'] = max_alternatives
+        if profanity_filter is not None:
+            config['profanityFilter'] = profanity_filter
+        if speech_context is not None:
+            config['speechContext'] = {'phrases': speech_context}
+
+        data = {
+            'audio': audio,
+            'config': config,
+        }
+
+        api_response = self.connection.api_request(
+            method='POST', path='syncrecognize', data=data)
+
+        if len(api_response['results']) == 1:
+            return api_response['results'][0]['alternatives']
+        else:
+            raise ValueError('result in api should have length 1')
diff --git a/google/cloud/speech/connection.py b/google/cloud/speech/connection.py
@@ -0,0 +1,33 @@
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Create / interact with Google Cloud Speech connections."""
+
+from google.cloud import connection as base_connection
+
+
+class Connection(base_connection.JSONConnection):
+    """A connection to Google Cloud Speech JSON REST API."""
+
+    API_BASE_URL = 'https://speech.googleapis.com'
+    """The base of the API call URL."""
+
+    API_VERSION = 'v1beta1'
+    """The version of the API, used in building the API call's URL."""
+
+    API_URL_TEMPLATE = '{api_base_url}/{api_version}/speech:{path}'
+    """A template for the URL of a particular API call."""
+
+    SCOPE = ('https://www.googleapis.com/auth/cloud-platform',)
+    """The scopes required for authenticating as an API consumer."""
diff --git a/scripts/verify_included_modules.py b/scripts/verify_included_modules.py
@@ -55,6 +55,7 @@
     'google.cloud.translate.__init__',
     'google.cloud.vision.__init__',
     'google.cloud.vision.fixtures',
+    'google.cloud.speech.__init__',
 ])
 
 
diff --git a/unit_tests/speech/__init__.py b/unit_tests/speech/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/unit_tests/speech/_fixtures.py b/unit_tests/speech/_fixtures.py
@@ -0,0 +1,30 @@
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SYNC_RECOGNIZE_RESPONSE = {
+    'results': [
+        {
+            'alternatives': [
+                {
+                    'transcript': 'hello',
+                    'confidence': 0.784919
+                }
+            ]
+        }
+    ]
+}
+
+SYNC_RECOGNIZE_EMPTY_RESPONSE = {
+    'results': []
+}
diff --git a/unit_tests/speech/test_client.py b/unit_tests/speech/test_client.py
diff --git a/unit_tests/speech/test_connection.py b/unit_tests/speech/test_connection.py