-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Vendor the GAPIC for Speech. * Speech Partial Veneer (#3483) * Update to docs based on @dhermes catch. * Fix incorrect variable. * Fix the docs. * Style fixes to unit tests. * More PR review from me.
- Loading branch information
1 parent
24f99cb
commit d90c659
Showing
24 changed files
with
2,435 additions
and
155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__import__('pkg_resources').declare_namespace(__name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__import__('pkg_resources').declare_namespace(__name__) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Copyright 2016 Google Inc. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Wrappers for protocol buffer enum types.""" | ||
|
||
|
||
class RecognitionConfig(object): | ||
class AudioEncoding(object): | ||
""" | ||
Audio encoding of the data sent in the audio message. All encodings support | ||
only 1 channel (mono) audio. Only ``FLAC`` includes a header that describes | ||
the bytes of audio that follow the header. The other encodings are raw | ||
audio bytes with no header. | ||
For best results, the audio source should be captured and transmitted using | ||
a lossless encoding (``FLAC`` or ``LINEAR16``). Recognition accuracy may be | ||
reduced if lossy codecs, which include the other codecs listed in | ||
this section, are used to capture or transmit the audio, particularly if | ||
background noise is present. | ||
Attributes: | ||
ENCODING_UNSPECIFIED (int): Not specified. Will return result ``google.rpc.Code.INVALID_ARGUMENT``. | ||
LINEAR16 (int): Uncompressed 16-bit signed little-endian samples (Linear PCM). | ||
FLAC (int): ```FLAC`` <https://xiph.org/flac/documentation.html>`_ (Free Lossless Audio | ||
Codec) is the recommended encoding because it is | ||
lossless--therefore recognition is not compromised--and | ||
requires only about half the bandwidth of ``LINEAR16``. ``FLAC`` stream | ||
encoding supports 16-bit and 24-bit samples, however, not all fields in | ||
``STREAMINFO`` are supported. | ||
MULAW (int): 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. | ||
AMR (int): Adaptive Multi-Rate Narrowband codec. ``sample_rate_hertz`` must be 8000. | ||
AMR_WB (int): Adaptive Multi-Rate Wideband codec. ``sample_rate_hertz`` must be 16000. | ||
OGG_OPUS (int): Opus encoded audio frames in Ogg container | ||
(`OggOpus <https://wiki.xiph.org/OggOpus>`_). | ||
``sample_rate_hertz`` must be 16000. | ||
SPEEX_WITH_HEADER_BYTE (int): Although the use of lossy encodings is not recommended, if a very low | ||
bitrate encoding is required, ``OGG_OPUS`` is highly preferred over | ||
Speex encoding. The `Speex <https://speex.org/>`_ encoding supported by | ||
Cloud Speech API has a header byte in each block, as in MIME type | ||
``audio/x-speex-with-header-byte``. | ||
It is a variant of the RTP Speex encoding defined in | ||
`RFC 5574 <https://tools.ietf.org/html/rfc5574>`_. | ||
The stream is a sequence of blocks, one block per RTP packet. Each block | ||
starts with a byte containing the length of the block, in bytes, followed | ||
by one or more frames of Speex data, padded to an integral number of | ||
bytes (octets) as specified in RFC 5574. In other words, each RTP header | ||
is replaced with a single byte containing the block length. Only Speex | ||
wideband is supported. ``sample_rate_hertz`` must be 16000. | ||
""" | ||
ENCODING_UNSPECIFIED = 0 | ||
LINEAR16 = 1 | ||
FLAC = 2 | ||
MULAW = 3 | ||
AMR = 4 | ||
AMR_WB = 5 | ||
OGG_OPUS = 6 | ||
SPEEX_WITH_HEADER_BYTE = 7 | ||
|
||
|
||
class StreamingRecognizeResponse(object): | ||
class SpeechEventType(object): | ||
""" | ||
Indicates the type of speech event. | ||
Attributes: | ||
SPEECH_EVENT_UNSPECIFIED (int): No speech event specified. | ||
END_OF_SINGLE_UTTERANCE (int): This event indicates that the server has detected the end of the user's | ||
speech utterance and expects no additional speech. Therefore, the server | ||
will not process additional audio (although it may subsequently return | ||
additional results). The client should stop sending additional audio | ||
data, half-close the gRPC connection, and wait for any additional results | ||
until the server closes the gRPC connection. This event is only sent if | ||
``single_utterance`` was set to ``true``, and is not used otherwise. | ||
""" | ||
SPEECH_EVENT_UNSPECIFIED = 0 | ||
END_OF_SINGLE_UTTERANCE = 1 |
285 changes: 285 additions & 0 deletions
285
google-cloud-speech/google/cloud/gapic/speech/v1/speech_client.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
# Copyright 2017, Google Inc. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# EDITING INSTRUCTIONS | ||
# This file was generated from the file | ||
# https://github.com/google/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto, | ||
# and updates to that file get reflected here through a refresh process. | ||
# For the short term, the refresh process will only be runnable by Google engineers. | ||
# | ||
# The only allowed edits are to method and file documentation. A 3-way | ||
# merge preserves those additions if the generated source changes. | ||
"""Accesses the google.cloud.speech.v1 Speech API.""" | ||
|
||
import collections | ||
import json | ||
import os | ||
import pkg_resources | ||
import platform | ||
|
||
from google.gapic.longrunning import operations_client | ||
from google.gax import api_callable | ||
from google.gax import config | ||
from google.gax import path_template | ||
from google.gax.utils import oneof | ||
import google.gax | ||
|
||
from google.cloud.gapic.speech.v1 import enums | ||
from google.cloud.proto.speech.v1 import cloud_speech_pb2 | ||
|
||
|
||
class SpeechClient(object): | ||
"""Service that implements Google Cloud Speech API.""" | ||
|
||
SERVICE_ADDRESS = 'speech.googleapis.com' | ||
"""The default address of the service.""" | ||
|
||
DEFAULT_SERVICE_PORT = 443 | ||
"""The default port of the service.""" | ||
|
||
# The scopes needed to make gRPC calls to all of the methods defined in | ||
# this service | ||
_ALL_SCOPES = ('https://www.googleapis.com/auth/cloud-platform', ) | ||
|
||
def __init__(self, | ||
service_path=SERVICE_ADDRESS, | ||
port=DEFAULT_SERVICE_PORT, | ||
channel=None, | ||
credentials=None, | ||
ssl_credentials=None, | ||
scopes=None, | ||
client_config=None, | ||
app_name=None, | ||
app_version='', | ||
lib_name=None, | ||
lib_version='', | ||
metrics_headers=()): | ||
"""Constructor. | ||
Args: | ||
service_path (string): The domain name of the API remote host. | ||
port (int): The port on which to connect to the remote host. | ||
channel (:class:`grpc.Channel`): A ``Channel`` instance through | ||
which to make calls. | ||
credentials (object): The authorization credentials to attach to | ||
requests. These credentials identify this application to the | ||
service. | ||
ssl_credentials (:class:`grpc.ChannelCredentials`): A | ||
``ChannelCredentials`` instance for use with an SSL-enabled | ||
channel. | ||
scopes (list[string]): A list of OAuth2 scopes to attach to requests. | ||
client_config (dict): | ||
A dictionary for call options for each method. See | ||
:func:`google.gax.construct_settings` for the structure of | ||
this data. Falls back to the default config if not specified | ||
or the specified config is missing data points. | ||
app_name (string): The name of the application calling | ||
the service. Recommended for analytics purposes. | ||
app_version (string): The version of the application calling | ||
the service. Recommended for analytics purposes. | ||
lib_name (string): The API library software used for calling | ||
the service. (Unless you are writing an API client itself, | ||
leave this as default.) | ||
lib_version (string): The API library software version used | ||
for calling the service. (Unless you are writing an API client | ||
itself, leave this as default.) | ||
metrics_headers (dict): A dictionary of values for tracking | ||
client library metrics. Ultimately serializes to a string | ||
(e.g. 'foo/1.2.3 bar/3.14.1'). This argument should be | ||
considered private. | ||
Returns: | ||
A SpeechClient object. | ||
""" | ||
# Unless the calling application specifically requested | ||
# OAuth scopes, request everything. | ||
if scopes is None: | ||
scopes = self._ALL_SCOPES | ||
|
||
# Initialize an empty client config, if none is set. | ||
if client_config is None: | ||
client_config = {} | ||
|
||
# Initialize metrics_headers as an ordered dictionary | ||
# (cuts down on cardinality of the resulting string slightly). | ||
metrics_headers = collections.OrderedDict(metrics_headers) | ||
metrics_headers['gl-python'] = platform.python_version() | ||
|
||
# The library may or may not be set, depending on what is | ||
# calling this client. Newer client libraries set the library name | ||
# and version. | ||
if lib_name: | ||
metrics_headers[lib_name] = lib_version | ||
|
||
# Finally, track the GAPIC package version. | ||
metrics_headers['gapic'] = pkg_resources.get_distribution( | ||
'google-cloud-speech', ).version | ||
|
||
# Load the configuration defaults. | ||
default_client_config = json.loads( | ||
pkg_resources.resource_string( | ||
__name__, 'speech_client_config.json').decode()) | ||
defaults = api_callable.construct_settings( | ||
'google.cloud.speech.v1.Speech', | ||
default_client_config, | ||
client_config, | ||
config.STATUS_CODE_NAMES, | ||
metrics_headers=metrics_headers, ) | ||
self.speech_stub = config.create_stub( | ||
cloud_speech_pb2.SpeechStub, | ||
channel=channel, | ||
service_path=service_path, | ||
service_port=port, | ||
credentials=credentials, | ||
scopes=scopes, | ||
ssl_credentials=ssl_credentials) | ||
|
||
self.operations_client = operations_client.OperationsClient( | ||
service_path=service_path, | ||
port=port, | ||
channel=channel, | ||
credentials=credentials, | ||
ssl_credentials=ssl_credentials, | ||
scopes=scopes, | ||
client_config=client_config, | ||
metrics_headers=metrics_headers, ) | ||
|
||
self._recognize = api_callable.create_api_call( | ||
self.speech_stub.Recognize, settings=defaults['recognize']) | ||
self._long_running_recognize = api_callable.create_api_call( | ||
self.speech_stub.LongRunningRecognize, | ||
settings=defaults['long_running_recognize']) | ||
self._streaming_recognize = api_callable.create_api_call( | ||
self.speech_stub.StreamingRecognize, | ||
settings=defaults['streaming_recognize']) | ||
|
||
# Service calls | ||
def recognize(self, config, audio, options=None): | ||
""" | ||
Performs synchronous speech recognition: receive results after all audio | ||
has been sent and processed. | ||
Example: | ||
>>> from google.cloud.gapic.speech.v1 import speech_client | ||
>>> from google.cloud.gapic.speech.v1 import enums | ||
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2 | ||
>>> client = speech_client.SpeechClient() | ||
>>> encoding = enums.RecognitionConfig.AudioEncoding.FLAC | ||
>>> sample_rate_hertz = 44100 | ||
>>> language_code = 'en-US' | ||
>>> config = cloud_speech_pb2.RecognitionConfig(encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code) | ||
>>> uri = 'gs://bucket_name/file_name.flac' | ||
>>> audio = cloud_speech_pb2.RecognitionAudio(uri=uri) | ||
>>> response = client.recognize(config, audio) | ||
Args: | ||
config (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionConfig`): *Required* Provides information to the recognizer that specifies how to | ||
process the request. | ||
audio (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionAudio`): *Required* The audio data to be recognized. | ||
options (:class:`google.gax.CallOptions`): Overrides the default | ||
settings for this call, e.g, timeout, retries etc. | ||
Returns: | ||
A :class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognizeResponse` instance. | ||
Raises: | ||
:exc:`google.gax.errors.GaxError` if the RPC is aborted. | ||
:exc:`ValueError` if the parameters are invalid. | ||
""" | ||
# Create the request object. | ||
request = cloud_speech_pb2.RecognizeRequest(config=config, audio=audio) | ||
return self._recognize(request, options) | ||
|
||
def long_running_recognize(self, config, audio, options=None): | ||
""" | ||
Performs asynchronous speech recognition: receive results via the | ||
google.longrunning.Operations interface. Returns either an | ||
``Operation.error`` or an ``Operation.response`` which contains | ||
a ``LongRunningRecognizeResponse`` message. | ||
Example: | ||
>>> from google.cloud.gapic.speech.v1 import speech_client | ||
>>> from google.cloud.gapic.speech.v1 import enums | ||
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2 | ||
>>> client = speech_client.SpeechClient() | ||
>>> encoding = enums.RecognitionConfig.AudioEncoding.FLAC | ||
>>> sample_rate_hertz = 44100 | ||
>>> language_code = 'en-US' | ||
>>> config = cloud_speech_pb2.RecognitionConfig(encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code) | ||
>>> uri = 'gs://bucket_name/file_name.flac' | ||
>>> audio = cloud_speech_pb2.RecognitionAudio(uri=uri) | ||
>>> response = client.long_running_recognize(config, audio) | ||
>>> | ||
>>> def callback(operation_future): | ||
>>> # Handle result. | ||
>>> result = operation_future.result() | ||
>>> | ||
>>> response.add_done_callback(callback) | ||
>>> | ||
>>> # Handle metadata. | ||
>>> metadata = response.metadata() | ||
Args: | ||
config (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionConfig`): *Required* Provides information to the recognizer that specifies how to | ||
process the request. | ||
audio (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionAudio`): *Required* The audio data to be recognized. | ||
options (:class:`google.gax.CallOptions`): Overrides the default | ||
settings for this call, e.g, timeout, retries etc. | ||
Returns: | ||
A :class:`google.gax._OperationFuture` instance. | ||
Raises: | ||
:exc:`google.gax.errors.GaxError` if the RPC is aborted. | ||
:exc:`ValueError` if the parameters are invalid. | ||
""" | ||
# Create the request object. | ||
request = cloud_speech_pb2.LongRunningRecognizeRequest( | ||
config=config, audio=audio) | ||
return google.gax._OperationFuture( | ||
self._long_running_recognize(request, | ||
options), self.operations_client, | ||
cloud_speech_pb2.LongRunningRecognizeResponse, | ||
cloud_speech_pb2.LongRunningRecognizeMetadata, options) | ||
|
||
def streaming_recognize(self, requests, options=None): | ||
""" | ||
Performs bidirectional streaming speech recognition: receive results while | ||
sending audio. This method is only available via the gRPC API (not REST). | ||
EXPERIMENTAL: This method interface might change in the future. | ||
Example: | ||
>>> from google.cloud.gapic.speech.v1 import speech_client | ||
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2 | ||
>>> client = speech_client.SpeechClient() | ||
>>> request = cloud_speech_pb2.StreamingRecognizeRequest() | ||
>>> requests = [request] | ||
>>> for element in client.streaming_recognize(requests): | ||
>>> # process element | ||
>>> pass | ||
Args: | ||
requests (iterator[:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.StreamingRecognizeRequest`]): The input objects. | ||
options (:class:`google.gax.CallOptions`): Overrides the default | ||
settings for this call, e.g, timeout, retries etc. | ||
Returns: | ||
iterator[:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.StreamingRecognizeResponse`]. | ||
Raises: | ||
:exc:`google.gax.errors.GaxError` if the RPC is aborted. | ||
:exc:`ValueError` if the parameters are invalid. | ||
""" | ||
return self._streaming_recognize(requests, options) |
Oops, something went wrong.