Skip to content

Commit 11464ec

Browse files
author
Jerjou Cheng
committed
Add speech api streaming sample.
1 parent a25245c commit 11464ec

File tree

8 files changed

+231
-4
lines changed

8 files changed

+231
-4
lines changed

nox.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ def session_tests(session, interpreter, extra_pytest_args=None):
8686
# allows users to run a particular test instead of all of them.
8787
for sample in (session.posargs or
8888
collect_sample_dirs('.', SESSION_TESTS_BLACKLIST)):
89+
# Install additional dependencies if they exist
90+
dirname = sample if os.path.isdir(sample) else os.path.dirname(sample)
91+
for reqfile in list_files(dirname, 'requirements*.txt'):
92+
session.install('-r', reqfile)
93+
8994
session.run(
9095
'py.test', sample,
9196
*pytest_args,

speech/api/README.md

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,36 @@ See the
3737
[Cloud Platform Auth Guide](https://cloud.google.com/docs/authentication#developer_workflow)
3838
for more information.
3939

40+
### Install the dependencies
41+
42+
* If you're running the `speechrest.py` sample:
43+
44+
```sh
45+
$ pip install speechrest-requirements.txt
46+
```
47+
48+
* If you're running the `speech_streaming.py` sample:
49+
50+
```sh
51+
$ pip install speech_streaming-requirements.txt
52+
```
53+
4054
## Run the example
4155
42-
```sh
43-
$ python speechrest.py resources/audio.raw
44-
```
56+
* To run the `speechrest.py` sample:
57+
58+
```sh
59+
$ python speechrest.py resources/audio.raw
60+
```
61+
62+
You should see a response with the transcription result.
63+
64+
* To run the `speech_streaming.py` sample:
65+
66+
```sh
67+
$ python speech_streaming.py
68+
```
4569
46-
You should see a response with the transcription result.
70+
The sample will run in a continuous loop, printing the data and metadata
71+
it receives from the Speech API, which includes alternative transcriptions
72+
of what it hears, and a confidence score. Say "exit" to exit the loop.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
gcloud>=0.12.0
2+
git+https://github.com/grpc/grpc.git#egg=grpcio
3+
PyAudio>=0.2.9
4+
grpc-google-cloud-speech>=1.0.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
google-api-python-client==1.5.0

speech/api/resources/quit.raw

160 KB
Binary file not shown.

speech/api/speech_streaming.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/python
2+
3+
import contextlib
4+
import threading
5+
6+
from gcloud.credentials import get_credentials
7+
8+
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa
9+
from google.rpc import code_pb2
10+
11+
from grpc.beta import implementations
12+
13+
import pyaudio
14+
15+
# Audio recording parameters
16+
RATE = 16000
17+
CHANNELS = 1
18+
CHUNK = RATE // 10 # 100ms
19+
20+
# Keep the request alive for this many seconds
21+
DEADLINE_SECS = 8 * 60 * 60
22+
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
23+
24+
25+
def _make_channel(host, port):
26+
"""Creates an SSL channel with auth credentials from the environment."""
27+
# In order to make an https call, use an ssl channel with defaults
28+
ssl_channel = implementations.ssl_channel_credentials(None, None, None)
29+
30+
# Grab application default credentials from the environment
31+
creds = get_credentials().create_scoped([SPEECH_SCOPE])
32+
# Add a plugin to inject the creds into the header
33+
auth_header = (
34+
'Authorization',
35+
'Bearer ' + creds.get_access_token().access_token)
36+
auth_plugin = implementations.metadata_call_credentials(
37+
lambda _, cb: cb([auth_header], None),
38+
name='google_creds')
39+
40+
# compose the two together for both ssl and google auth
41+
composite_channel = implementations.composite_channel_credentials(
42+
ssl_channel, auth_plugin)
43+
44+
return implementations.secure_channel(host, port, composite_channel)
45+
46+
47+
@contextlib.contextmanager
48+
def _record_audio(channels, rate, chunk):
49+
"""Opens a recording stream in a context manager."""
50+
p = pyaudio.PyAudio()
51+
audio_stream = p.open(
52+
format=pyaudio.paInt16, channels=channels, rate=rate,
53+
input=True, frames_per_buffer=chunk,
54+
)
55+
56+
yield audio_stream
57+
58+
audio_stream.stop_stream()
59+
audio_stream.close()
60+
p.terminate()
61+
62+
63+
def _request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
64+
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
65+
66+
Args:
67+
stop_audio: A threading.Event object stops the recording when set.
68+
channels: How many audio channels to record.
69+
rate: The sampling rate.
70+
chunk: Buffer audio into chunks of this size before sending to the api.
71+
"""
72+
with _record_audio(channels, rate, chunk) as audio_stream:
73+
# The initial request must contain metadata about the stream, so the
74+
# server knows how to interpret it.
75+
metadata = InitialRecognizeRequest(
76+
encoding='LINEAR16', sample_rate=rate)
77+
audio_request = AudioRequest(content=audio_stream.read(chunk))
78+
79+
yield RecognizeRequest(
80+
initial_request=metadata,
81+
audio_request=audio_request)
82+
83+
while not stop_audio.is_set():
84+
# Subsequent requests can all just have the content
85+
audio_request = AudioRequest(content=audio_stream.read(chunk))
86+
87+
yield RecognizeRequest(audio_request=audio_request)
88+
89+
90+
def listen_print_loop(recognize_stream):
91+
for resp in recognize_stream:
92+
if resp.error.code != code_pb2.OK:
93+
raise Exception('Server error: ' + resp.error.message)
94+
95+
# Display the transcriptions & their alternatives
96+
for result in resp.results:
97+
print(result.alternatives)
98+
99+
# Exit recognition if any of the transcribed phrases could be
100+
# one of our keywords.
101+
if any(alt.confidence > .5 and
102+
(alt.transcript.strip() in ('exit', 'quit'))
103+
for result in resp.results
104+
for alt in result.alternatives):
105+
print('Exiting..')
106+
return
107+
108+
109+
def main():
110+
stop_audio = threading.Event()
111+
with beta_create_Speech_stub(
112+
_make_channel('speech.googleapis.com', 443)) as service:
113+
try:
114+
listen_print_loop(
115+
service.Recognize(_request_stream(stop_audio), DEADLINE_SECS))
116+
finally:
117+
# Stop the request stream once we're done with the loop - otherwise
118+
# it'll keep going in the thread that the grpc lib makes for it..
119+
stop_audio.set()
120+
121+
122+
if __name__ == '__main__':
123+
main()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import contextlib
15+
import io
16+
import re
17+
import sys
18+
19+
import pytest
20+
21+
import speech_streaming
22+
23+
24+
class MockAudioStream(object):
25+
def __init__(self, audio_filename, trailing_silence_secs=10):
26+
self.audio_filename = audio_filename
27+
self.silence = io.BytesIO('\0\0' * speech_streaming.RATE *
28+
trailing_silence_secs)
29+
30+
def __enter__(self):
31+
self.audio_file = open(self.audio_filename)
32+
return self
33+
34+
def __exit__(self, *args):
35+
self.audio_file.close()
36+
37+
def __call__(self, *args):
38+
return self
39+
40+
def read(self, num_frames):
41+
# audio is 16-bit samples, whereas python byte is 8-bit
42+
num_bytes = 2 * num_frames
43+
chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes)
44+
return chunk
45+
46+
47+
def _mock_audio_stream(filename):
48+
@contextlib.contextmanager
49+
def mock_audio_stream(channels, rate, chunk):
50+
with open(filename, 'rb') as audio_file:
51+
yield audio_file
52+
53+
return mock_audio_stream
54+
55+
56+
@pytest.mark.skipif(
57+
sys.version_info >= (3, 0), reason="can't get grpc lib to work in python3")
58+
def test_main(resource, monkeypatch, capsys):
59+
monkeypatch.setattr(
60+
speech_streaming, '_record_audio',
61+
_mock_audio_stream(resource('quit.raw')))
62+
monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5)
63+
64+
speech_streaming.main()
65+
out, err = capsys.readouterr()
66+
67+
assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
google-api-python-client==1.5.0

0 commit comments

Comments
 (0)