Skip to content

Commit

Permalink
Speech region tag update [(#1644)](#1644)
Browse files Browse the repository at this point in the history
* update beta tags

* updates tags for recognition-metadata page

* update diarization tag to beta

* update word level confidence tags to beta

* updates region tags for async-recognize page

* updates region tag for async-time-offsets

* update region tags for sync transcribe

* updates multichannel tags to beta

* updates multilanguage tags to beta

* update streaming region tags

* updates async local tags and fixes beta tags

* updates tags for migration guide

* updates word time offsets region tag
  • Loading branch information
alixhami authored and telpirion committed Mar 13, 2023
1 parent 2f887b5 commit 19133f0
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 57 deletions.
28 changes: 14 additions & 14 deletions speech/snippets/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

def transcribe_file_with_enhanced_model():
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_file_with_enhanced_model]
# [START speech_transcribe_enhanced_model_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -60,12 +60,12 @@ def transcribe_file_with_enhanced_model():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_enhanced_model]
# [END speech_transcribe_enhanced_model_beta]


def transcribe_file_with_metadata():
"""Send a request that includes recognition metadata."""
# [START speech_transcribe_file_with_metadata]
# [START speech_transcribe_recognition_metadata_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -105,12 +105,12 @@ def transcribe_file_with_metadata():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_metadata]
# [END speech_transcribe_recognition_metadata_beta]


def transcribe_file_with_auto_punctuation():
"""Transcribe the given audio file with auto punctuation enabled."""
# [START speech_transcribe_file_with_auto_punctuation]
# [START speech_transcribe_auto_punctuation_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -134,12 +134,12 @@ def transcribe_file_with_auto_punctuation():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_auto_punctuation]
# [END speech_transcribe_auto_punctuation_beta]


def transcribe_file_with_diarization():
"""Transcribe the given audio file synchronously with diarization."""
# [START speech_transcribe_diarization]
# [START speech_transcribe_diarization_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -172,13 +172,13 @@ def transcribe_file_with_diarization():
for word_info in words_info:
print("word: '{}', speaker_tag: {}".format(word_info.word,
word_info.speaker_tag))
# [END speech_transcribe_diarization]
# [END speech_transcribe_diarization_beta]


def transcribe_file_with_multichannel():
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
# [START speech_transcribe_multichannel_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -204,13 +204,13 @@ def transcribe_file_with_multichannel():
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel]
# [END speech_transcribe_multichannel_beta]


def transcribe_file_with_multilanguage():
"""Transcribe the given audio file synchronously with
multi language."""
# [START speech_transcribe_multilanguage]
# [START speech_transcribe_multilanguage_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -238,13 +238,13 @@ def transcribe_file_with_multilanguage():
print('-' * 20)
print('First alternative of result {}: {}'.format(i, alternative))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_multilanguage]
# [END speech_transcribe_multilanguage_beta]


def transcribe_file_with_word_level_confidence():
"""Transcribe the given audio file synchronously with
word level confidence."""
# [START speech_transcribe_word_level_confidence]
# [START speech_transcribe_word_level_confidence_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -270,7 +270,7 @@ def transcribe_file_with_word_level_confidence():
print(u'Transcript: {}'.format(alternative.transcript))
print(u'First Word and Confidence: ({}, {})'.format(
alternative.words[0].word, alternative.words[0].confidence))
# [END speech_transcribe_word_level_confidence]
# [END speech_transcribe_word_level_confidence_beta]


if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions speech/snippets/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ def run_quickstart():
import os

# Imports the Google Cloud client library
# [START migration_import]
# [START speech_python_migration_imports]
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
# [END migration_import]
# [END speech_python_migration_imports]

# Instantiates a client
# [START migration_client]
# [START speech_python_migration_client]
client = speech.SpeechClient()
# [END migration_client]
# [END speech_python_migration_client]

# The name of the audio file to transcribe
file_name = os.path.join(
Expand Down
26 changes: 12 additions & 14 deletions speech/snippets/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,20 @@
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac
"""

# [START import_libraries]
import argparse
import io
# [END import_libraries]


# [START def_transcribe_file]
# [START speech_transcribe_sync]
def transcribe_file(speech_file):
"""Transcribe the given audio file."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_sync_request]
# [START migration_audio_config_file]
# [START speech_python_migration_sync_request]
# [START speech_python_migration_config]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -46,43 +44,43 @@ def transcribe_file(speech_file):
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_file]
# [END speech_python_migration_config]

# [START migration_sync_response]
# [START speech_python_migration_sync_response]
response = client.recognize(config, audio)
# [END migration_sync_request]
# [END speech_python_migration_sync_request]
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END migration_sync_response]
# [END def_transcribe_file]
# [END speech_python_migration_sync_response]
# [END speech_transcribe_sync]


# [START def_transcribe_gcs]
# [START speech_transcribe_sync_gcs]
def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_audio_config_gcs]
# [START speech_python_migration_config_gcs]
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_gcs]
# [END speech_python_migration_config_gcs]

response = client.recognize(config, audio)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END def_transcribe_gcs]
# [END speech_transcribe_sync_gcs]


if __name__ == '__main__':
Expand Down
16 changes: 8 additions & 8 deletions speech/snippets/transcribe_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
import io


# [START def_transcribe_file]
# [START speech_transcribe_async]
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_async_request]
# [START speech_python_migration_async_request]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -44,9 +44,9 @@ def transcribe_file(speech_file):
sample_rate_hertz=16000,
language_code='en-US')

# [START migration_async_response]
# [START speech_python_migration_async_response]
operation = client.long_running_recognize(config, audio)
# [END migration_async_request]
# [END speech_python_migration_async_request]

print('Waiting for operation to complete...')
response = operation.result(timeout=90)
Expand All @@ -57,11 +57,11 @@ def transcribe_file(speech_file):
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END migration_async_response]
# [END def_transcribe_file]
# [END speech_python_migration_async_response]
# [END speech_transcribe_async]


# [START def_transcribe_gcs]
# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
Expand All @@ -86,7 +86,7 @@ def transcribe_gcs(gcs_uri):
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END def_transcribe_gcs]
# [END speech_transcribe_async_gcs]


if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions speech/snippets/transcribe_model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import argparse


# [START speech_transcribe_model_selection]
# [START speech_transcribe_model_selection_beta]
def transcribe_model_selection(speech_file, model):
"""Transcribe the given audio file synchronously with
the selected model."""
Expand All @@ -52,10 +52,10 @@ def transcribe_model_selection(speech_file, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection]
# [END speech_transcribe_model_selection_beta]


# [START speech_transcribe_model_selection_gcs]
# [START speech_transcribe_model_selection_gcs_beta]
def transcribe_model_selection_gcs(gcs_uri, model):
"""Transcribe the given audio file asynchronously with
the selected model."""
Expand All @@ -80,7 +80,7 @@ def transcribe_model_selection_gcs(gcs_uri, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection_gcs]
# [END speech_transcribe_model_selection_gcs_beta]


if __name__ == '__main__':
Expand Down
14 changes: 6 additions & 8 deletions speech/snippets/transcribe_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,19 @@
python transcribe_streaming.py resources/audio.raw
"""

# [START import_libraries]
import argparse
import io
# [END import_libraries]


# [START def_transcribe_streaming]
# [START speech_transcribe_streaming]
def transcribe_streaming(stream_file):
"""Streams transcription of the given audio file."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_streaming_request]
# [START speech_python_migration_streaming_request]
with io.open(stream_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -50,9 +48,9 @@ def transcribe_streaming(stream_file):
streaming_config = types.StreamingRecognitionConfig(config=config)

# streaming_recognize returns a generator.
# [START migration_streaming_response]
# [START speech_python_migration_streaming_response]
responses = client.streaming_recognize(streaming_config, requests)
# [END migration_streaming_request]
# [END speech_python_migration_streaming_request]

for response in responses:
# Once the transcription has settled, the first result will contain the
Expand All @@ -66,8 +64,8 @@ def transcribe_streaming(stream_file):
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print(u'Transcript: {}'.format(alternative.transcript))
# [END migration_streaming_response]
# [END def_transcribe_streaming]
# [END speech_python_migration_streaming_response]
# [END speech_transcribe_streaming]


if __name__ == '__main__':
Expand Down
5 changes: 2 additions & 3 deletions speech/snippets/transcribe_streaming_mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
python transcribe_streaming_mic.py
"""

# [START import_libraries]
# [START speech_transcribe_streaming_mic]
from __future__ import division

import re
Expand All @@ -36,7 +36,6 @@
from google.cloud.speech import types
import pyaudio
from six.moves import queue
# [END import_libraries]

# Audio recording parameters
RATE = 16000
Expand Down Expand Up @@ -106,7 +105,6 @@ def generator(self):
break

yield b''.join(data)
# [END audio_stream]


def listen_print_loop(responses):
Expand Down Expand Up @@ -191,3 +189,4 @@ def main():

if __name__ == '__main__':
main()
# [END speech_transcribe_streaming_mic]
4 changes: 2 additions & 2 deletions speech/snippets/transcribe_word_time_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def transcribe_file_with_word_time_offsets(speech_file):
end_time.seconds + end_time.nanos * 1e-9))


# [START def_transcribe_gcs]
# [START speech_transcribe_async_word_time_offsets_gcs]
def transcribe_gcs_with_word_time_offsets(gcs_uri):
"""Transcribe the given audio file asynchronously and output the word time
offsets."""
Expand Down Expand Up @@ -94,7 +94,7 @@ def transcribe_gcs_with_word_time_offsets(gcs_uri):
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
# [END def_transcribe_gcs]
# [END speech_transcribe_async_word_time_offsets_gcs]


if __name__ == '__main__':
Expand Down

0 comments on commit 19133f0

Please sign in to comment.