-
Notifications
You must be signed in to change notification settings - Fork 6.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speech gapic client library #1012
Changes from 22 commits
aa0de94
c695c82
4777dc6
e77f5f8
199a748
51c4d01
0de6c7c
a594c70
9129caf
4db0f45
f09dfec
66d53aa
99b2e79
c7d1ad7
ce0d25d
3196c73
d5acd7c
cb40b7f
34ce758
0955793
e355325
a5f4c35
73d2b79
39f9b6b
efe110c
1f4cda6
bd32ab4
1f861ee
8fa2982
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
google-cloud-speech==0.26.0 | ||
google-cloud-speech==0.27.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,20 +29,30 @@ | |
def transcribe_streaming(stream_file): | ||
"""Streams transcription of the given audio file.""" | ||
from google.cloud import speech | ||
speech_client = speech.Client() | ||
from google.cloud.speech import types | ||
client = speech.SpeechClient() | ||
|
||
with io.open(stream_file, 'rb') as audio_file: | ||
audio_sample = speech_client.sample( | ||
stream=audio_file, | ||
encoding=speech.encoding.Encoding.LINEAR16, | ||
sample_rate_hertz=16000) | ||
alternatives = audio_sample.streaming_recognize('en-US') | ||
|
||
for alternative in alternatives: | ||
print('Finished: {}'.format(alternative.is_final)) | ||
print('Stability: {}'.format(alternative.stability)) | ||
print('Confidence: {}'.format(alternative.confidence)) | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
content = audio_file.read() | ||
|
||
config = types.RecognitionConfig( | ||
encoding='LINEAR16', | ||
sample_rate_hertz=16000, | ||
language_code='en-US') | ||
|
||
# In practice requests should be a generator yielding chunks of audio data. | ||
requests = (types.StreamingRecognizeRequest(audio_content=c) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please use a more descriptive variable name rather than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
for c in [content]) | ||
streaming_config = types.StreamingRecognitionConfig(config=config) | ||
|
||
for response in client.streaming_recognize(streaming_config, requests): | ||
for result in response.results: | ||
print('Finished: {}'.format(result.is_final)) | ||
print('Stability: {}'.format(result.stability)) | ||
alternatives = result.alternatives | ||
for alternative in alternatives: | ||
print('Confidence: {}'.format(alternative.confidence)) | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
|
||
|
||
if __name__ == '__main__': | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
import sys | ||
|
||
from google.cloud import speech | ||
from google.cloud.speech import types | ||
import pyaudio | ||
from six.moves import queue | ||
# [END import_libraries] | ||
|
@@ -41,8 +42,8 @@ | |
CHUNK = int(RATE / 10) # 100ms | ||
|
||
|
||
class MicAsFile(object): | ||
"""Opens a recording stream as a file-like object.""" | ||
class MicrophoneStream(object): | ||
"""Opens a recording stream as a generator yielding the audio chunks.""" | ||
def __init__(self, rate, chunk): | ||
self._rate = rate | ||
self._chunk = chunk | ||
|
@@ -73,7 +74,8 @@ def __exit__(self, type, value, traceback): | |
self._audio_stream.stop_stream() | ||
self._audio_stream.close() | ||
self.closed = True | ||
# Flush out the read, just in case | ||
# Signal the generator to terminate so that the client's | ||
# streaming_recognize method will not block the process termination. | ||
self._buff.put(None) | ||
self._audio_interface.terminate() | ||
|
||
|
@@ -82,51 +84,64 @@ def _fill_buffer(self, in_data, frame_count, time_info, status_flags): | |
self._buff.put(in_data) | ||
return None, pyaudio.paContinue | ||
|
||
def read(self, chunk_size): | ||
if self.closed: | ||
return | ||
|
||
# Use a blocking get() to ensure there's at least one chunk of data. | ||
data = [self._buff.get()] | ||
|
||
# Now consume whatever other data's still buffered. | ||
while True: | ||
try: | ||
data.append(self._buff.get(block=False)) | ||
except queue.Empty: | ||
break | ||
|
||
if self.closed: | ||
return | ||
return b''.join(data) | ||
def generator(self): | ||
while not self.closed: | ||
# Use a blocking get() to ensure there's at least one chunk of | ||
# data, and stop iteration if the chunk is None, indicating the | ||
# end of the audio stream. | ||
chunk = self._buff.get() | ||
if chunk is None: | ||
return | ||
data = [chunk] | ||
|
||
# Now consume whatever other data's still buffered. | ||
while True: | ||
try: | ||
chunk = self._buff.get(block=False) | ||
if chunk is None: | ||
return | ||
data.append(chunk) | ||
except queue.Empty: | ||
break | ||
|
||
yield b''.join(data) | ||
# [END audio_stream] | ||
|
||
|
||
def listen_print_loop(results_gen): | ||
def listen_print_loop(responses): | ||
"""Iterates through server responses and prints them. | ||
|
||
The results_gen passed is a generator that will block until a response | ||
is provided by the server. When the transcription response comes, print it. | ||
The responses passed is a generator that will block until a response | ||
is provided by the server. | ||
|
||
Each response may contain multiple results, and each result may contain | ||
multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we | ||
print only the transcription for the top alternative of the top result. | ||
|
||
In this case, responses are provided for interim results as well. If the | ||
response is an interim one, print a line feed at the end of it, to allow | ||
the next result to overwrite it, until the response is a final one. For the | ||
final one, print a newline to preserve the finalized transcription. | ||
""" | ||
num_chars_printed = 0 | ||
for result in results_gen: | ||
for response in responses: | ||
if not response.results: | ||
continue | ||
|
||
# There could be multiple results in each response. | ||
result = response.results[0] | ||
if not result.alternatives: | ||
continue | ||
|
||
# Display the top transcription | ||
transcript = result.transcript | ||
# Display the transcription of the top alternative. | ||
transcript = result.alternatives[0].transcript | ||
|
||
# Display interim results, but with a carriage return at the end of the | ||
# line, so subsequent lines will overwrite them. | ||
# | ||
# If the previous result was longer than this one, we need to print | ||
# some extra spaces to overwrite the previous result | ||
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript)) | ||
overwrite_chars = ' ' * (num_chars_printed - len(transcript)) | ||
|
||
if not result.is_final: | ||
sys.stdout.write(transcript + overwrite_chars + '\r') | ||
|
@@ -147,21 +162,27 @@ def listen_print_loop(results_gen): | |
|
||
|
||
def main(): | ||
speech_client = speech.Client() | ||
|
||
with MicAsFile(RATE, CHUNK) as stream: | ||
audio_sample = speech_client.sample( | ||
stream=stream, | ||
encoding=speech.encoding.Encoding.LINEAR16, | ||
sample_rate_hertz=RATE) | ||
# See http://g.co/cloud/speech/docs/languages | ||
# for a list of supported languages. | ||
language_code = 'en-US' # a BCP-47 language tag | ||
results_gen = audio_sample.streaming_recognize( | ||
language_code=language_code, interim_results=True) | ||
# See http://g.co/cloud/speech/docs/languages | ||
# for a list of supported languages. | ||
language_code = 'en-US' # a BCP-47 language tag | ||
|
||
client = speech.SpeechClient() | ||
config = types.RecognitionConfig( | ||
encoding='LINEAR16', | ||
sample_rate_hertz=RATE, | ||
language_code=language_code) | ||
streaming_config = types.StreamingRecognitionConfig(config=config, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: start a newline at the opening There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
interim_results=True) | ||
|
||
with MicrophoneStream(RATE, CHUNK) as stream: | ||
audio_generator = stream.generator() | ||
requests = (types.StreamingRecognizeRequest(audio_content=content) | ||
for content in audio_generator) | ||
|
||
responses = client.streaming_recognize(streaming_config, requests) | ||
|
||
# Now, put the transcription responses to use. | ||
listen_print_loop(results_gen) | ||
listen_print_loop(responses) | ||
|
||
|
||
if __name__ == '__main__': | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there an enum for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is, but requires the additional import of
from google.cloud.speech import enums
. I was keeping it consistent with the existing samples.