From f34c15cebff6a709f48dd90351eec8ade90c4ab8 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 3 Aug 2017 16:01:45 -0700 Subject: [PATCH] Add word time offset samples [(#1050)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1050) --- samples/snippets/README.rst | 26 ++++ samples/snippets/README.rst.in | 3 + samples/snippets/transcribe_async.py | 35 +----- samples/snippets/transcribe_async_test.py | 11 -- .../snippets/transcribe_word_time_offsets.py | 111 ++++++++++++++++++ .../transcribe_word_time_offsets_test.py | 43 +++++++ 6 files changed, 189 insertions(+), 40 deletions(-) create mode 100644 samples/snippets/transcribe_word_time_offsets.py create mode 100644 samples/snippets/transcribe_word_time_offsets_test.py diff --git a/samples/snippets/README.rst b/samples/snippets/README.rst index 761ebdf6..44721eaf 100644 --- a/samples/snippets/README.rst +++ b/samples/snippets/README.rst @@ -140,6 +140,32 @@ To run this sample: -h, --help show this help message and exit +Transcribe with word time offsets ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python transcribe_word_time_offsets.py + + usage: transcribe_word_time_offsets.py [-h] path + + Google Cloud Speech API sample that demonstrates word time offsets. + + Example usage: + python transcribe_word_time_offsets.py resources/audio.raw + python transcribe_word_time_offsets.py gs://cloud-samples-tests/speech/vr.flac + + positional arguments: + path File or GCS path for audio file to be recognized + + optional arguments: + -h, --help show this help message and exit + + Transcribe Streaming +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/samples/snippets/README.rst.in b/samples/snippets/README.rst.in index 9b671369..c2589cd8 100644 --- a/samples/snippets/README.rst.in +++ b/samples/snippets/README.rst.in @@ -28,6 +28,9 @@ samples: - name: Transcribe async file: transcribe_async.py show_help: true +- name: Transcribe with word time offsets + file: transcribe_word_time_offsets.py + show_help: true - name: Transcribe Streaming file: transcribe_streaming.py show_help: true diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index b2512121..e8fd6115 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -24,7 +24,6 @@ import argparse import io -import time # [START def_transcribe_file] @@ -49,17 +48,10 @@ def transcribe_file(speech_file): operation = client.long_running_recognize(config, audio) # [END migration_async_request] - # Sleep and poll operation.done() - retry_count = 100 - while retry_count > 0 and not operation.done(): - retry_count -= 1 - time.sleep(2) + print('Waiting for operation to complete...') + result = operation.result(timeout=90) - if not operation.done(): - print('Operation not complete and retry limit reached.') - return - - alternatives = operation.result().results[0].alternatives + alternatives = result.results[0].alternatives for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) @@ -84,28 +76,13 @@ def transcribe_gcs(gcs_uri): operation = client.long_running_recognize(config, audio) - retry_count = 100 - while retry_count > 0 and not operation.done(): - retry_count -= 1 - time.sleep(2) - - if not operation.done(): - print('Operation not complete and retry limit reached.') - return + print('Waiting for operation to complete...') + result = operation.result(timeout=90) - alternatives = operation.result().results[0].alternatives + alternatives = result.results[0].alternatives for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) - - for word_info in alternative.words: - word = word_info.word - start_time = word_info.start_time - end_time = word_info.end_time - print('Word: {}, start_time: {}, end_time: {}'.format( - word, - start_time.seconds + start_time.nanos * 1e-9, - end_time.seconds + end_time.nanos * 1e-9)) # [END def_transcribe_gcs] diff --git a/samples/snippets/transcribe_async_test.py b/samples/snippets/transcribe_async_test.py index 286434d0..7d66747e 100644 --- a/samples/snippets/transcribe_async_test.py +++ b/samples/snippets/transcribe_async_test.py @@ -33,14 +33,3 @@ def test_transcribe_gcs(capsys): out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) - - -def test_transcribe_gcs_word_time_offsets(capsys): - transcribe_async.transcribe_gcs( - 'gs://python-docs-samples-tests/speech/audio.flac') - out, err = capsys.readouterr() - - match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) - time = float(match.group(1)) - - assert time > 0 diff --git a/samples/snippets/transcribe_word_time_offsets.py b/samples/snippets/transcribe_word_time_offsets.py new file mode 100644 index 00000000..1c98feaf --- /dev/null +++ b/samples/snippets/transcribe_word_time_offsets.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Speech API sample that demonstrates word time offsets. + +Example usage: + python transcribe_word_time_offsets.py resources/audio.raw + python transcribe_word_time_offsets.py \ + gs://cloud-samples-tests/speech/vr.flac +""" + +import argparse +import io + + +def transcribe_file_with_word_time_offsets(speech_file): + """Transcribe the given audio file synchronously and output the word time + offsets.""" + from google.cloud import speech + from google.cloud.speech import enums + from google.cloud.speech import types + client = speech.SpeechClient() + + with io.open(speech_file, 'rb') as audio_file: + content = audio_file.read() + + audio = types.RecognitionAudio(content=content) + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US', + enable_word_time_offsets=True) + + response = client.recognize(config, audio) + + alternatives = response.results[0].alternatives + + for alternative in alternatives: + print('Transcript: {}'.format(alternative.transcript)) + + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print('Word: {}, start_time: {}, end_time: {}'.format( + word, + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) + + +# [START def_transcribe_gcs] +def transcribe_gcs_with_word_time_offsets(gcs_uri): + """Transcribe the given audio file asynchronously and output the word time + offsets.""" + from google.cloud import speech + from google.cloud.speech import enums + from google.cloud.speech import types + client = speech.SpeechClient() + + audio = types.RecognitionAudio(uri=gcs_uri) + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.FLAC, + sample_rate_hertz=16000, + language_code='en-US', + enable_word_time_offsets=True) + + operation = client.long_running_recognize(config, audio) + + print('Waiting for operation to complete...') + result = operation.result(timeout=90) + + alternatives = result.results[0].alternatives + for alternative in alternatives: + print('Transcript: {}'.format(alternative.transcript)) + print('Confidence: {}'.format(alternative.confidence)) + + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print('Word: {}, start_time: {}, end_time: {}'.format( + word, + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) +# [END def_transcribe_gcs] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'path', help='File or GCS path for audio file to be recognized') + args = parser.parse_args() + if args.path.startswith('gs://'): + transcribe_gcs_with_word_time_offsets(args.path) + else: + transcribe_file_with_word_time_offsets(args.path) diff --git a/samples/snippets/transcribe_word_time_offsets_test.py b/samples/snippets/transcribe_word_time_offsets_test.py new file mode 100644 index 00000000..e894385f --- /dev/null +++ b/samples/snippets/transcribe_word_time_offsets_test.py @@ -0,0 +1,43 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +import transcribe_word_time_offsets + +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') + + +def test_transcribe_file_with_word_time_offsets(capsys): + transcribe_word_time_offsets.transcribe_file_with_word_time_offsets( + os.path.join(RESOURCES, 'audio.raw')) + out, _ = capsys.readouterr() + + print(out) + match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) + time = float(match.group(1)) + + assert time > 0 + + +def test_transcribe_gcs_with_word_time_offsets(capsys): + transcribe_word_time_offsets.transcribe_gcs_with_word_time_offsets( + 'gs://python-docs-samples-tests/speech/audio.flac') + out, _ = capsys.readouterr() + + print(out) + match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) + time = float(match.group(1)) + + assert time > 0