Skip to content

Commit c5c6c95

Browse files
committed
Merge pull request #352 from GoogleCloudPlatform/speech-gcs
Add sample for speech api on GCS file via grpc.
2 parents 3c093d8 + 4985070 commit c5c6c95

6 files changed

+152
-8
lines changed

speech/api/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ for more information.
4949
* If you're running the `speech_streaming.py` sample:
5050
5151
```sh
52-
$ pip install -r requirements-speech_streaming.txt
52+
$ pip install -r requirements-speech_grpc.txt
5353
```
5454
5555
The sample uses the [PyAudio][pyaudio] library to stream audio from your

speech/api/grpc_auth.py

Whitespace-only changes.

speech/api/speech_gcs.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/python
2+
# Copyright (C) 2016 Google Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage,
16+
using GRPC."""
17+
18+
import argparse
19+
20+
from gcloud.credentials import get_credentials
21+
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
22+
from grpc.beta import implementations
23+
24+
# Keep the request alive for this many seconds
25+
DEADLINE_SECS = 10
26+
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
27+
28+
29+
def make_channel(host, port):
30+
"""Creates an SSL channel with auth credentials from the environment."""
31+
# In order to make an https call, use an ssl channel with defaults
32+
ssl_channel = implementations.ssl_channel_credentials(None, None, None)
33+
34+
# Grab application default credentials from the environment
35+
creds = get_credentials().create_scoped([SPEECH_SCOPE])
36+
# Add a plugin to inject the creds into the header
37+
auth_header = (
38+
'Authorization',
39+
'Bearer ' + creds.get_access_token().access_token)
40+
auth_plugin = implementations.metadata_call_credentials(
41+
lambda _, cb: cb([auth_header], None),
42+
name='google_creds')
43+
44+
# compose the two together for both ssl and google auth
45+
composite_channel = implementations.composite_channel_credentials(
46+
ssl_channel, auth_plugin)
47+
48+
return implementations.secure_channel(host, port, composite_channel)
49+
50+
51+
def main(input_uri, output_uri, encoding, sample_rate):
52+
service = cloud_speech.beta_create_Speech_stub(
53+
make_channel('speech.googleapis.com', 443))
54+
# The method and parameters can be inferred from the proto from which the
55+
# grpc client lib was generated. See:
56+
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
57+
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
58+
initial_request=cloud_speech.InitialRecognizeRequest(
59+
encoding=encoding,
60+
sample_rate=sample_rate,
61+
output_uri=output_uri,
62+
),
63+
audio_request=cloud_speech.AudioRequest(
64+
uri=input_uri,
65+
)
66+
), DEADLINE_SECS)
67+
# This shouldn't actually print anything, since the transcription is output
68+
# to the GCS uri specified
69+
print(response.responses)
70+
71+
72+
def _gcs_uri(text):
73+
if not text.startswith('gs://'):
74+
raise ValueError(
75+
'Cloud Storage uri must be of the form gs://bucket/path/')
76+
return text
77+
78+
79+
PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
80+
'google/cloud/speech/v1/cloud_speech.proto')
81+
if __name__ == '__main__':
82+
parser = argparse.ArgumentParser()
83+
parser.add_argument('input_uri', type=_gcs_uri)
84+
parser.add_argument('output_uri', type=_gcs_uri)
85+
parser.add_argument(
86+
'--encoding', default='FLAC', choices=[
87+
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
88+
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
89+
parser.add_argument('--sample_rate', default=16000)
90+
91+
args = parser.parse_args()
92+
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)

speech/api/speech_gcs_test.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import sys
15+
16+
import pytest
17+
from speech_gcs import _gcs_uri
18+
from speech_gcs import main
19+
20+
21+
@pytest.mark.skipif(
22+
sys.version_info >= (3, 0),
23+
reason=("grpc doesn't yet support python3 "
24+
'https://github.com/grpc/grpc/issues/282'))
25+
def test_main(cloud_config, capsys):
26+
input_uri = 'gs://{}/speech/clip.flac'.format(cloud_config.storage_bucket)
27+
output_uri = 'gs://{}/speech/clip.txt'.format(cloud_config.storage_bucket)
28+
29+
main(input_uri, output_uri, 'FLAC', 16000)
30+
31+
out, err = capsys.readouterr()
32+
assert '[]\n' == out
33+
34+
35+
def test_gcs_uri():
36+
_gcs_uri('gs://bucket/path')
37+
with pytest.raises(ValueError):
38+
_gcs_uri('/local/path')

speech/api/speech_streaming.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
#!/usr/bin/python
2+
# Copyright (C) 2016 Google Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
216

317
import contextlib
418
import re
519
import threading
620

721
from gcloud.credentials import get_credentials
8-
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa
22+
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
923
from google.rpc import code_pb2
1024
from grpc.beta import implementations
1125
import pyaudio
@@ -70,7 +84,7 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
7084
with record_audio(channels, rate, chunk) as audio_stream:
7185
# The initial request must contain metadata about the stream, so the
7286
# server knows how to interpret it.
73-
metadata = InitialRecognizeRequest(
87+
metadata = cloud_speech.InitialRecognizeRequest(
7488
encoding='LINEAR16', sample_rate=rate,
7589
# Note that setting interim_results to True means that you'll
7690
# likely get multiple results for the same bit of audio, as the
@@ -80,9 +94,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
8094
interim_results=True, continuous=False,
8195
)
8296
data = audio_stream.read(chunk)
83-
audio_request = AudioRequest(content=data)
97+
audio_request = cloud_speech.AudioRequest(content=data)
8498

85-
yield RecognizeRequest(
99+
yield cloud_speech.RecognizeRequest(
86100
initial_request=metadata,
87101
audio_request=audio_request)
88102

@@ -91,9 +105,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
91105
if not data:
92106
raise StopIteration()
93107
# Subsequent requests can all just have the content
94-
audio_request = AudioRequest(content=data)
108+
audio_request = cloud_speech.AudioRequest(content=data)
95109

96-
yield RecognizeRequest(audio_request=audio_request)
110+
yield cloud_speech.RecognizeRequest(audio_request=audio_request)
97111

98112

99113
def listen_print_loop(recognize_stream):
@@ -116,7 +130,7 @@ def listen_print_loop(recognize_stream):
116130

117131
def main():
118132
stop_audio = threading.Event()
119-
with beta_create_Speech_stub(
133+
with cloud_speech.beta_create_Speech_stub(
120134
make_channel('speech.googleapis.com', 443)) as service:
121135
try:
122136
listen_print_loop(

0 commit comments

Comments
 (0)