Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech gapic client library #1012

Merged
merged 29 commits into from
Jul 14, 2017
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
aa0de94
Migrate quickstart to GAPIC client library
dizcology Jun 13, 2017
c695c82
Migrate transcribe to GAPIC client library
dizcology Jun 13, 2017
4777dc6
Migrate transcribe_async to GAPIC client library
dizcology Jun 14, 2017
e77f5f8
Migrate transcribe_streaming to GAPIC client library
dizcology Jun 14, 2017
199a748
clean up
dizcology Jun 20, 2017
51c4d01
clean up
dizcology Jun 21, 2017
0de6c7c
Import from google.cloud.speech
dizcology Jun 26, 2017
a594c70
update transcribe samples
dizcology Jun 27, 2017
9129caf
import in alphabetic order
dizcology Jun 27, 2017
4db0f45
remove unused variable
dizcology Jun 29, 2017
f09dfec
use strings instead of enums
dizcology Jun 29, 2017
66d53aa
restructure code
dizcology Jun 30, 2017
99b2e79
comment on sreaming requests
dizcology Jul 5, 2017
c7d1ad7
import style
dizcology Jul 6, 2017
ce0d25d
flake
dizcology Jul 7, 2017
3196c73
correct indent
dizcology Jul 11, 2017
d5acd7c
migrate transcribe_streaming_mic to gapic
dizcology Jul 11, 2017
cb40b7f
update google-cloud-speech version requirement
dizcology Jul 11, 2017
34ce758
addressing review comments
dizcology Jul 11, 2017
0955793
at the end of the audio stream, put None to signal to the generator
dizcology Jul 11, 2017
e355325
flake
dizcology Jul 12, 2017
a5f4c35
addressing github review comments
dizcology Jul 12, 2017
73d2b79
add region tags for migration guide
dizcology Jul 13, 2017
39f9b6b
update README
dizcology Jul 13, 2017
efe110c
rst format
dizcology Jul 13, 2017
1f4cda6
bullet
dizcology Jul 13, 2017
bd32ab4
addressing PR review comments
dizcology Jul 13, 2017
1f861ee
use enums
dizcology Jul 13, 2017
8fa2982
remove a word
dizcology Jul 13, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions speech/cloud-client/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ def run_quickstart():

# Imports the Google Cloud client library
from google.cloud import speech
from google.cloud.speech import types

# Instantiates a client
speech_client = speech.Client()
client = speech.SpeechClient()

# The name of the audio file to transcribe
file_name = os.path.join(
Expand All @@ -35,14 +36,16 @@ def run_quickstart():
# Loads the audio into memory
with io.open(file_name, 'rb') as audio_file:
content = audio_file.read()
sample = speech_client.sample(
content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)
audio = types.RecognitionAudio(content=content)

config = types.RecognitionConfig(
encoding='LINEAR16',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an enum for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is, but requires the additional import of from google.cloud.speech import enums. I was keeping it consistent with the existing samples.

sample_rate_hertz=16000,
language_code='en-US')

# Detects speech in the audio file
alternatives = sample.recognize('en-US')
response = client.recognize(config, audio)
alternatives = response.results[0].alternatives

for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
Expand Down
2 changes: 1 addition & 1 deletion speech/cloud-client/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-speech==0.26.0
google-cloud-speech==0.27.0
34 changes: 21 additions & 13 deletions speech/cloud-client/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,41 @@
def transcribe_file(speech_file):
"""Transcribe the given audio file."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import types
client = speech.SpeechClient()

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio_sample = speech_client.sample(
content=content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)
audio = types.RecognitionAudio(content=content)

config = types.RecognitionConfig(
encoding='LINEAR16',
sample_rate_hertz=16000,
language_code='en-US')

response = client.recognize(config, audio)
alternatives = response.results[0].alternatives

alternatives = audio_sample.recognize('en-US')
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))


def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import types
client = speech.SpeechClient()

audio_sample = speech_client.sample(
content=None,
source_uri=gcs_uri,
audio = types.RecognitionAudio(uri=gcs_uri)

config = types.RecognitionConfig(
encoding='FLAC',
sample_rate_hertz=16000)
sample_rate_hertz=16000,
language_code='en-US')

response = client.recognize(config, audio)
alternatives = response.results[0].alternatives

alternatives = audio_sample.recognize('en-US')
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))

Expand Down
46 changes: 23 additions & 23 deletions speech/cloud-client/transcribe_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,63 +30,63 @@
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import types
client = speech.SpeechClient()

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio_sample = speech_client.sample(
content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)
audio = types.RecognitionAudio(content=content)

operation = audio_sample.long_running_recognize('en-US')
config = types.RecognitionConfig(
encoding='LINEAR16',
sample_rate_hertz=16000,
language_code='en-US')

operation = client.long_running_recognize(config, audio)

retry_count = 100
while retry_count > 0 and not operation.complete:
while retry_count > 0 and not operation.done():
retry_count -= 1
time.sleep(2)
operation.poll()

if not operation.complete:
if not operation.done():
print('Operation not complete and retry limit reached.')
return

alternatives = operation.results
alternatives = operation.result().results[0].alternatives
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
# [END send_request]


def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import types
client = speech.SpeechClient()

audio = types.RecognitionAudio(uri=gcs_uri)

audio_sample = speech_client.sample(
content=None,
source_uri=gcs_uri,
config = types.RecognitionConfig(
encoding='FLAC',
sample_rate_hertz=16000)
sample_rate_hertz=16000,
language_code='en-US')

operation = audio_sample.long_running_recognize('en-US')
operation = client.long_running_recognize(config, audio)

retry_count = 100
while retry_count > 0 and not operation.complete:
while retry_count > 0 and not operation.done():
retry_count -= 1
time.sleep(2)
operation.poll()

if not operation.complete:
if not operation.done():
print('Operation not complete and retry limit reached.')
return

alternatives = operation.results
alternatives = operation.result().results[0].alternatives
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
# [END send_request_gcs]


if __name__ == '__main__':
Expand Down
34 changes: 22 additions & 12 deletions speech/cloud-client/transcribe_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,30 @@
def transcribe_streaming(stream_file):
"""Streams transcription of the given audio file."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import types
client = speech.SpeechClient()

with io.open(stream_file, 'rb') as audio_file:
audio_sample = speech_client.sample(
stream=audio_file,
encoding=speech.encoding.Encoding.LINEAR16,
sample_rate_hertz=16000)
alternatives = audio_sample.streaming_recognize('en-US')

for alternative in alternatives:
print('Finished: {}'.format(alternative.is_final))
print('Stability: {}'.format(alternative.stability))
print('Confidence: {}'.format(alternative.confidence))
print('Transcript: {}'.format(alternative.transcript))
content = audio_file.read()

config = types.RecognitionConfig(
encoding='LINEAR16',
sample_rate_hertz=16000,
language_code='en-US')

# In practice requests should be a generator yielding chunks of audio data.
requests = (types.StreamingRecognizeRequest(audio_content=c)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please use a more descriptive variable name rather than c.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

for c in [content])
streaming_config = types.StreamingRecognitionConfig(config=config)

for response in client.streaming_recognize(streaming_config, requests):
for result in response.results:
print('Finished: {}'.format(result.is_final))
print('Stability: {}'.format(result.stability))
alternatives = result.alternatives
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print('Transcript: {}'.format(alternative.transcript))


if __name__ == '__main__':
Expand Down
101 changes: 61 additions & 40 deletions speech/cloud-client/transcribe_streaming_mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import sys

from google.cloud import speech
from google.cloud.speech import types
import pyaudio
from six.moves import queue
# [END import_libraries]
Expand All @@ -41,8 +42,8 @@
CHUNK = int(RATE / 10) # 100ms


class MicAsFile(object):
"""Opens a recording stream as a file-like object."""
class MicrophoneStream(object):
"""Opens a recording stream as a generator yielding the audio chunks."""
def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk
Expand Down Expand Up @@ -73,7 +74,8 @@ def __exit__(self, type, value, traceback):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Flush out the read, just in case
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()

Expand All @@ -82,51 +84,64 @@ def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
self._buff.put(in_data)
return None, pyaudio.paContinue

def read(self, chunk_size):
if self.closed:
return

# Use a blocking get() to ensure there's at least one chunk of data.
data = [self._buff.get()]

# Now consume whatever other data's still buffered.
while True:
try:
data.append(self._buff.get(block=False))
except queue.Empty:
break

if self.closed:
return
return b''.join(data)
def generator(self):
while not self.closed:
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = self._buff.get()
if chunk is None:
return
data = [chunk]

# Now consume whatever other data's still buffered.
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break

yield b''.join(data)
# [END audio_stream]


def listen_print_loop(results_gen):
def listen_print_loop(responses):
"""Iterates through server responses and prints them.

The results_gen passed is a generator that will block until a response
is provided by the server. When the transcription response comes, print it.
The responses passed is a generator that will block until a response
is provided by the server.

Each response may contain multiple results, and each result may contain
multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we
print only the transcription for the top alternative of the top result.

In this case, responses are provided for interim results as well. If the
response is an interim one, print a line feed at the end of it, to allow
the next result to overwrite it, until the response is a final one. For the
final one, print a newline to preserve the finalized transcription.
"""
num_chars_printed = 0
for result in results_gen:
for response in responses:
if not response.results:
continue

# There could be multiple results in each response.
result = response.results[0]
if not result.alternatives:
continue

# Display the top transcription
transcript = result.transcript
# Display the transcription of the top alternative.
transcript = result.alternatives[0].transcript

# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))
overwrite_chars = ' ' * (num_chars_printed - len(transcript))

if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
Expand All @@ -147,21 +162,27 @@ def listen_print_loop(results_gen):


def main():
speech_client = speech.Client()

with MicAsFile(RATE, CHUNK) as stream:
audio_sample = speech_client.sample(
stream=stream,
encoding=speech.encoding.Encoding.LINEAR16,
sample_rate_hertz=RATE)
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code = 'en-US' # a BCP-47 language tag
results_gen = audio_sample.streaming_recognize(
language_code=language_code, interim_results=True)
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code = 'en-US' # a BCP-47 language tag

client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding='LINEAR16',
sample_rate_hertz=RATE,
language_code=language_code)
streaming_config = types.StreamingRecognitionConfig(config=config,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: start a newline at the opening ( to avoid hanging indents like this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

interim_results=True)

with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator()
requests = (types.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator)

responses = client.streaming_recognize(streaming_config, requests)

# Now, put the transcription responses to use.
listen_print_loop(results_gen)
listen_print_loop(responses)


if __name__ == '__main__':
Expand Down