-
Notifications
You must be signed in to change notification settings - Fork 6.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding async sample code to cloud speech. (#404)
- Loading branch information
Showing
7 changed files
with
277 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
gcloud==0.17.0 | ||
grpcio==0.15.0 | ||
PyAudio==0.2.9 | ||
grpc-google-cloud-speech==1.0.4 | ||
grpc-google-cloud-speech-v1beta1==1.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#!/usr/bin/env python | ||
# Copyright (C) 2016 Google Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage, | ||
using async GRPC.""" | ||
|
||
import argparse | ||
import time | ||
|
||
from gcloud.credentials import get_credentials | ||
from google.cloud.speech.v1beta1 import cloud_speech_pb2 | ||
from google.longrunning import operations_grpc_pb2 | ||
from grpc.beta import implementations | ||
|
||
# Keep the request alive for this many seconds | ||
DEADLINE_SECS = 10 | ||
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' | ||
|
||
|
||
def make_channel(host, port): | ||
"""Creates an SSL channel with auth credentials from the environment.""" | ||
# In order to make an https call, use an ssl channel with defaults | ||
ssl_channel = implementations.ssl_channel_credentials(None, None, None) | ||
|
||
# Grab application default credentials from the environment | ||
creds = get_credentials().create_scoped([SPEECH_SCOPE]) | ||
# Add a plugin to inject the creds into the header | ||
auth_header = ( | ||
'Authorization', | ||
'Bearer ' + creds.get_access_token().access_token) | ||
auth_plugin = implementations.metadata_call_credentials( | ||
lambda _, cb: cb([auth_header], None), | ||
name='google_creds') | ||
|
||
# compose the two together for both ssl and google auth | ||
composite_channel = implementations.composite_channel_credentials( | ||
ssl_channel, auth_plugin) | ||
|
||
return implementations.secure_channel(host, port, composite_channel) | ||
|
||
|
||
def main(input_uri, encoding, sample_rate): | ||
channel = make_channel('speech.googleapis.com', 443) | ||
service = cloud_speech_pb2.beta_create_Speech_stub(channel) | ||
# The method and parameters can be inferred from the proto from which the | ||
# grpc client lib was generated. See: | ||
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto | ||
response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest( | ||
config=cloud_speech_pb2.RecognitionConfig( | ||
encoding=encoding, | ||
sample_rate=sample_rate, | ||
), | ||
audio=cloud_speech_pb2.RecognitionAudio( | ||
uri=input_uri, | ||
) | ||
), DEADLINE_SECS) | ||
|
||
# Print the longrunning operation handle. | ||
print(response) | ||
|
||
# Construct a long running operation endpoint. | ||
service = operations_grpc_pb2.beta_create_Operations_stub(channel) | ||
|
||
name = response.name | ||
|
||
while True: | ||
# Give the server a few seconds to process. | ||
print('Waiting for server processing...') | ||
time.sleep(1) | ||
# Get the long running operation with response. | ||
response = service.GetOperation( | ||
operations_grpc_pb2.GetOperationRequest(name=name), | ||
DEADLINE_SECS) | ||
|
||
if response.done: | ||
break | ||
|
||
# Print the recognition results. | ||
results = cloud_speech_pb2.AsyncRecognizeResponse() | ||
response.response.Unpack(results) | ||
print(results) | ||
|
||
|
||
def _gcs_uri(text): | ||
if not text.startswith('gs://'): | ||
raise argparse.ArgumentTypeError( | ||
'Cloud Storage uri must be of the form gs://bucket/path/') | ||
return text | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('input_uri', type=_gcs_uri) | ||
parser.add_argument( | ||
'--encoding', default='FLAC', choices=[ | ||
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], | ||
help='How the audio file is encoded. See {}#L67'.format( | ||
'https://github.com/googleapis/googleapis/blob/master/' | ||
'google/cloud/speech/v1beta1/cloud_speech.proto')) | ||
parser.add_argument('--sample_rate', default=16000) | ||
|
||
args = parser.parse_args() | ||
main(args.input_uri, args.encoding, args.sample_rate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Copyright 2016, Google, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import re | ||
import sys | ||
|
||
import pytest | ||
from speech_async_grpc import _gcs_uri | ||
from speech_async_grpc import main | ||
|
||
|
||
@pytest.mark.skipif( | ||
sys.version_info >= (3, 0), | ||
reason=("grpc doesn't yet support python3 " | ||
'https://github.com/grpc/grpc/issues/282')) | ||
def test_main(cloud_config, capsys): | ||
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) | ||
|
||
main(input_uri, 'FLAC', 16000) | ||
|
||
out, err = capsys.readouterr() | ||
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) | ||
|
||
|
||
def test_gcs_uri(): | ||
_gcs_uri('gs://bucket/path') | ||
with pytest.raises(ValueError): | ||
_gcs_uri('/local/path') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#!/usr/bin/env python | ||
# Copyright 2016 Google Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Google Cloud Speech API sample application using the REST API for async | ||
batch processing.""" | ||
|
||
# [START import_libraries] | ||
import argparse | ||
import base64 | ||
import json | ||
import time | ||
|
||
from googleapiclient import discovery | ||
from oauth2client.client import GoogleCredentials | ||
# [END import_libraries] | ||
|
||
|
||
# [START authenticating] | ||
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' | ||
'version={apiVersion}') | ||
|
||
|
||
# Application default credentials provided by env variable | ||
# GOOGLE_APPLICATION_CREDENTIALS | ||
def get_speech_service(): | ||
credentials = GoogleCredentials.get_application_default().create_scoped( | ||
['https://www.googleapis.com/auth/cloud-platform']) | ||
|
||
return discovery.build( | ||
'speech', 'v1beta1', credentials=credentials, | ||
discoveryServiceUrl=DISCOVERY_URL) | ||
# [END authenticating] | ||
|
||
|
||
def main(speech_file): | ||
"""Transcribe the given audio file asynchronously. | ||
Args: | ||
speech_file: the name of the audio file. | ||
""" | ||
# [START construct_request] | ||
with open(speech_file, 'rb') as speech: | ||
# Base64 encode the binary audio file for inclusion in the request. | ||
speech_content = base64.b64encode(speech.read()) | ||
|
||
service = get_speech_service() | ||
service_request = service.speech().asyncrecognize( | ||
body={ | ||
'config': { | ||
'encoding': 'LINEAR16', | ||
'sampleRate': 16000 | ||
}, | ||
'audio': { | ||
'content': speech_content.decode('UTF-8') | ||
} | ||
}) | ||
# [END construct_request] | ||
# [START send_request] | ||
response = service_request.execute() | ||
print(json.dumps(response)) | ||
# [END send_request] | ||
|
||
name = response['name'] | ||
# Construct a GetOperation request. | ||
service_request = service.operations().get(name=name) | ||
|
||
while True: | ||
# Give the server a few seconds to process. | ||
print('Waiting for server processing...') | ||
time.sleep(1) | ||
# Get the long running operation with response. | ||
response = service_request.execute() | ||
|
||
if 'done' in response and response['done']: | ||
break | ||
|
||
print(json.dumps(response['response']['results'])) | ||
|
||
|
||
# [START run_application] | ||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
'speech_file', help='Full path of audio file to be recognized') | ||
args = parser.parse_args() | ||
main(args.speech_file) | ||
# [END run_application] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Copyright 2016, Google, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import re | ||
|
||
from speech_async_rest import main | ||
|
||
|
||
def test_main(resource, capsys): | ||
main(resource('audio.raw')) | ||
out, err = capsys.readouterr() | ||
|
||
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters