GoogleCloudPlatform · theacodes · Jul 14, 2017 · Jun 13, 2017 · Jun 13, 2017 · Jun 14, 2017
diff --git a/speech/cloud-client/quickstart.py b/speech/cloud-client/quickstart.py
@@ -22,9 +22,10 @@ def run_quickstart():
 
     # Imports the Google Cloud client library
     from google.cloud import speech
+    from google.cloud.speech import types
 
     # Instantiates a client
-    speech_client = speech.Client()
+    client = speech.SpeechClient()
 
     # The name of the audio file to transcribe
     file_name = os.path.join(
@@ -35,14 +36,16 @@ def run_quickstart():
     # Loads the audio into memory
     with io.open(file_name, 'rb') as audio_file:
         content = audio_file.read()
-        sample = speech_client.sample(
-            content,
-            source_uri=None,
-            encoding='LINEAR16',
-            sample_rate_hertz=16000)
+        audio = types.RecognitionAudio(content=content)
+
+    config = types.RecognitionConfig(
+        encoding='LINEAR16',
+        sample_rate_hertz=16000,
+        language_code='en-US')
 
     # Detects speech in the audio file
-    alternatives = sample.recognize('en-US')
+    response = client.recognize(config, audio)
+    alternatives = response.results[0].alternatives
 
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))

diff --git a/speech/cloud-client/requirements.txt b/speech/cloud-client/requirements.txt
@@ -1 +1 @@
-google-cloud-speech==0.26.0
+google-cloud-speech==0.27.0
diff --git a/speech/cloud-client/transcribe.py b/speech/cloud-client/transcribe.py
@@ -31,33 +31,41 @@
 def transcribe_file(speech_file):
     """Transcribe the given audio file."""
     from google.cloud import speech
-    speech_client = speech.Client()
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
-        audio_sample = speech_client.sample(
-            content=content,
-            source_uri=None,
-            encoding='LINEAR16',
-            sample_rate_hertz=16000)
+        audio = types.RecognitionAudio(content=content)
+
+    config = types.RecognitionConfig(
+        encoding='LINEAR16',
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    response = client.recognize(config, audio)
+    alternatives = response.results[0].alternatives
 
-    alternatives = audio_sample.recognize('en-US')
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
 
 
 def transcribe_gcs(gcs_uri):
     """Transcribes the audio file specified by the gcs_uri."""
     from google.cloud import speech
-    speech_client = speech.Client()
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
 
-    audio_sample = speech_client.sample(
-        content=None,
-        source_uri=gcs_uri,
+    audio = types.RecognitionAudio(uri=gcs_uri)
+
+    config = types.RecognitionConfig(
         encoding='FLAC',
-        sample_rate_hertz=16000)
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    response = client.recognize(config, audio)
+    alternatives = response.results[0].alternatives
 
-    alternatives = audio_sample.recognize('en-US')
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
 

diff --git a/speech/cloud-client/transcribe_async.py b/speech/cloud-client/transcribe_async.py
@@ -30,63 +30,63 @@
 def transcribe_file(speech_file):
     """Transcribe the given audio file asynchronously."""
     from google.cloud import speech
-    speech_client = speech.Client()
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
-        audio_sample = speech_client.sample(
-            content,
-            source_uri=None,
-            encoding='LINEAR16',
-            sample_rate_hertz=16000)
+        audio = types.RecognitionAudio(content=content)
 
-    operation = audio_sample.long_running_recognize('en-US')
+    config = types.RecognitionConfig(
+        encoding='LINEAR16',
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    operation = client.long_running_recognize(config, audio)
 
     retry_count = 100
-    while retry_count > 0 and not operation.complete:
+    while retry_count > 0 and not operation.done():
         retry_count -= 1
         time.sleep(2)
-        operation.poll()
 
-    if not operation.complete:
+    if not operation.done():
         print('Operation not complete and retry limit reached.')
         return
 
-    alternatives = operation.results
+    alternatives = operation.result().results[0].alternatives
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
         print('Confidence: {}'.format(alternative.confidence))
-    # [END send_request]
 
 
 def transcribe_gcs(gcs_uri):
     """Asynchronously transcribes the audio file specified by the gcs_uri."""
     from google.cloud import speech
-    speech_client = speech.Client()
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    audio = types.RecognitionAudio(uri=gcs_uri)
 
-    audio_sample = speech_client.sample(
-        content=None,
-        source_uri=gcs_uri,
+    config = types.RecognitionConfig(
         encoding='FLAC',
-        sample_rate_hertz=16000)
+        sample_rate_hertz=16000,
+        language_code='en-US')
 
-    operation = audio_sample.long_running_recognize('en-US')
+    operation = client.long_running_recognize(config, audio)
 
     retry_count = 100
-    while retry_count > 0 and not operation.complete:
+    while retry_count > 0 and not operation.done():
         retry_count -= 1
         time.sleep(2)
-        operation.poll()
 
-    if not operation.complete:
+    if not operation.done():
         print('Operation not complete and retry limit reached.')
         return
 
-    alternatives = operation.results
+    alternatives = operation.result().results[0].alternatives
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
         print('Confidence: {}'.format(alternative.confidence))
-    # [END send_request_gcs]
 
 
 if __name__ == '__main__':

diff --git a/speech/cloud-client/transcribe_streaming.py b/speech/cloud-client/transcribe_streaming.py
@@ -29,20 +29,30 @@
 def transcribe_streaming(stream_file):
     """Streams transcription of the given audio file."""
     from google.cloud import speech
-    speech_client = speech.Client()
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
 
     with io.open(stream_file, 'rb') as audio_file:
-        audio_sample = speech_client.sample(
-            stream=audio_file,
-            encoding=speech.encoding.Encoding.LINEAR16,
-            sample_rate_hertz=16000)
-        alternatives = audio_sample.streaming_recognize('en-US')
-
-        for alternative in alternatives:
-            print('Finished: {}'.format(alternative.is_final))
-            print('Stability: {}'.format(alternative.stability))
-            print('Confidence: {}'.format(alternative.confidence))
-            print('Transcript: {}'.format(alternative.transcript))
+        content = audio_file.read()
+
+    config = types.RecognitionConfig(
+        encoding='LINEAR16',
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    # In practice requests should be a generator yielding chunks of audio data.
+    requests = (types.StreamingRecognizeRequest(audio_content=c)
+                for c in [content])
+    streaming_config = types.StreamingRecognitionConfig(config=config)
+
+    for response in client.streaming_recognize(streaming_config, requests):
+        for result in response.results:
+            print('Finished: {}'.format(result.is_final))
+            print('Stability: {}'.format(result.stability))
+            alternatives = result.alternatives
+            for alternative in alternatives:
+                print('Confidence: {}'.format(alternative.confidence))
+                print('Transcript: {}'.format(alternative.transcript))
 
 
 if __name__ == '__main__':

diff --git a/speech/cloud-client/transcribe_streaming_mic.py b/speech/cloud-client/transcribe_streaming_mic.py
@@ -32,6 +32,7 @@
 import sys
 
 from google.cloud import speech
+from google.cloud.speech import types
 import pyaudio
 from six.moves import queue
 # [END import_libraries]
@@ -41,8 +42,8 @@
 CHUNK = int(RATE / 10)  # 100ms
 
 
-class MicAsFile(object):
-    """Opens a recording stream as a file-like object."""
+class MicrophoneStream(object):
+    """Opens a recording stream as a generator yielding the audio chunks."""
     def __init__(self, rate, chunk):
         self._rate = rate
         self._chunk = chunk
@@ -73,7 +74,8 @@ def __exit__(self, type, value, traceback):
         self._audio_stream.stop_stream()
         self._audio_stream.close()
         self.closed = True
-        # Flush out the read, just in case
+        # Signal the generator to terminate so that the client's
+        # streaming_recognize method will not block the process termination.
         self._buff.put(None)
         self._audio_interface.terminate()
 
@@ -82,51 +84,64 @@ def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
         self._buff.put(in_data)
         return None, pyaudio.paContinue
 
-    def read(self, chunk_size):
-        if self.closed:
-            return
-
-        # Use a blocking get() to ensure there's at least one chunk of data.
-        data = [self._buff.get()]
-
-        # Now consume whatever other data's still buffered.
-        while True:
-            try:
-                data.append(self._buff.get(block=False))
-            except queue.Empty:
-                break
-
-        if self.closed:
-            return
-        return b''.join(data)
+    def generator(self):
+        while not self.closed:
+            # Use a blocking get() to ensure there's at least one chunk of
+            # data, and stop iteration if the chunk is None, indicating the
+            # end of the audio stream.
+            chunk = self._buff.get()
+            if chunk is None:
+                return
+            data = [chunk]
+
+            # Now consume whatever other data's still buffered.
+            while True:
+                try:
+                    chunk = self._buff.get(block=False)
+                    if chunk is None:
+                        return
+                    data.append(chunk)
+                except queue.Empty:
+                    break
+
+            yield b''.join(data)
 # [END audio_stream]
 
 
-def listen_print_loop(results_gen):
+def listen_print_loop(responses):
     """Iterates through server responses and prints them.
 
-    The results_gen passed is a generator that will block until a response
-    is provided by the server. When the transcription response comes, print it.
+    The responses passed is a generator that will block until a response
+    is provided by the server.
+
+    Each response may contain multiple results, and each result may contain
+    multiple alternatives; for details, see https://goo.gl/tjCPAU.  Here we
+    print only the transcription for the top alternative of the top result.
 
     In this case, responses are provided for interim results as well. If the
     response is an interim one, print a line feed at the end of it, to allow
     the next result to overwrite it, until the response is a final one. For the
     final one, print a newline to preserve the finalized transcription.
     """
     num_chars_printed = 0
-    for result in results_gen:
+    for response in responses:
+        if not response.results:
+            continue
+
+        # There could be multiple results in each response.
+        result = response.results[0]
         if not result.alternatives:
             continue
 
-        # Display the top transcription
-        transcript = result.transcript
+        # Display the transcription of the top alternative.
+        transcript = result.alternatives[0].transcript
 
         # Display interim results, but with a carriage return at the end of the
         # line, so subsequent lines will overwrite them.
         #
         # If the previous result was longer than this one, we need to print
         # some extra spaces to overwrite the previous result
-        overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))
+        overwrite_chars = ' ' * (num_chars_printed - len(transcript))
 
         if not result.is_final:
             sys.stdout.write(transcript + overwrite_chars + '\r')
@@ -147,21 +162,27 @@ def listen_print_loop(results_gen):
 
 
 def main():
-    speech_client = speech.Client()
-
-    with MicAsFile(RATE, CHUNK) as stream:
-        audio_sample = speech_client.sample(
-            stream=stream,
-            encoding=speech.encoding.Encoding.LINEAR16,
-            sample_rate_hertz=RATE)
-        # See http://g.co/cloud/speech/docs/languages
-        # for a list of supported languages.
-        language_code = 'en-US'  # a BCP-47 language tag
-        results_gen = audio_sample.streaming_recognize(
-                language_code=language_code, interim_results=True)
+    # See http://g.co/cloud/speech/docs/languages
+    # for a list of supported languages.
+    language_code = 'en-US'  # a BCP-47 language tag
+
+    client = speech.SpeechClient()
+    config = types.RecognitionConfig(
+        encoding='LINEAR16',
+        sample_rate_hertz=RATE,
+        language_code=language_code)
+    streaming_config = types.StreamingRecognitionConfig(config=config,
+                                                        interim_results=True)
+
+    with MicrophoneStream(RATE, CHUNK) as stream:
+        audio_generator = stream.generator()
+        requests = (types.StreamingRecognizeRequest(audio_content=content)
+                    for content in audio_generator)
+
+        responses = client.streaming_recognize(streaming_config, requests)
 
         # Now, put the transcription responses to use.
-        listen_print_loop(results_gen)
+        listen_print_loop(responses)
 
 
 if __name__ == '__main__':