diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst index aedd5fa9990c..b70e80a86614 100644 --- a/docs/speech-usage.rst +++ b/docs/speech-usage.rst @@ -61,7 +61,7 @@ See: `Speech Asynchronous Recognize`_ >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.LINEAR16, ... sample_rate=44100) - >>> operation = client.async_recognize(sample, max_alternatives=2) + >>> operation = sample.async_recognize(max_alternatives=2) >>> retry_count = 100 >>> while retry_count > 0 and not operation.complete: ... retry_count -= 1 @@ -94,8 +94,7 @@ Great Britian. >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) - >>> operation = client.async_recognize(sample, max_alternatives=2) - >>> alternatives = client.sync_recognize( + >>> alternatives = sample.sync_recognize( ... speech.Encoding.FLAC, 16000, ... source_uri='gs://my-bucket/recording.flac', language_code='en-GB', ... max_alternatives=2) @@ -119,7 +118,7 @@ Example of using the profanity filter. >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) - >>> alternatives = client.sync_recognize(sample, max_alternatives=1, + >>> alternatives = sample.sync_recognize(max_alternatives=1, ... profanity_filter=True) >>> for alternative in alternatives: ... print('=' * 20) @@ -141,7 +140,7 @@ words to the vocabulary of the recognizer. ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) >>> hints = ['hi', 'good afternoon'] - >>> alternatives = client.sync_recognize(sample, max_alternatives=2, + >>> alternatives = sample.sync_recognize(max_alternatives=2, ... speech_context=hints) >>> for alternative in alternatives: ... print('=' * 20) @@ -171,7 +170,7 @@ speech data to possible text alternatives on the fly. ... sample = client.sample(content=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... results = list(client.streaming_recognize(sample)) + ... results = list(sample.streaming_recognize()) >>> print(results[0].alternatives[0].transcript) 'hello' >>> print(results[0].alternatives[0].confidence) @@ -194,8 +193,7 @@ See: `Single Utterance`_ ... sample = client.sample(content=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... responses = client.streaming_recognize(sample, - ... single_utterance=True) + ... responses = sample.streaming_recognize(single_utterance=True) ... results = list(responses) >>> print(results[0].alternatives[0].transcript) hello @@ -214,8 +212,7 @@ If ``interim_results`` is set to :data:`True`, interim results ... sample = client.sample(content=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... for results in client.streaming_recognize(sample, - ... interim_results=True): + ... for results in sample.streaming_recognize(interim_results=True): ... print('=' * 20) ... print(results[0].alternatives[0].transcript) ... print(results[0].alternatives[0].confidence) diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 125722d128ef..fde4ce89309f 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -25,9 +25,7 @@ from google.cloud.speech._gax import GAPICSpeechAPI from google.cloud.speech.alternative import Alternative from google.cloud.speech.connection import Connection -from google.cloud.speech.encoding import Encoding from google.cloud.speech.operation import Operation -from google.cloud.speech.result import StreamingSpeechResult from google.cloud.speech.sample import Sample @@ -65,58 +63,7 @@ def __init__(self, credentials=None, http=None, use_gax=None): _connection_class = Connection _speech_api = None - def async_recognize(self, sample, language_code=None, - max_alternatives=None, profanity_filter=None, - speech_context=None): - """Asychronous Recognize request to Google Speech API. - - .. _async_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/asyncrecognize - - See `async_recognize`_. - - :type sample: :class:`~google.cloud.speech.sample.Sample` - :param sample: Instance of ``Sample`` containing audio information. - - :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. - - :type max_alternatives: int - :param max_alternatives: (Optional) Maximum number of recognition - hypotheses to be returned. The server may - return fewer than maxAlternatives. - Valid values are 0-30. A value of 0 or 1 - will return a maximum of 1. Defaults to 1 - - :type profanity_filter: bool - :param profanity_filter: If True, the server will attempt to filter - out profanities, replacing all but the - initial character in each filtered word with - asterisks, e.g. ``'f***'``. If False or - omitted, profanities won't be filtered out. - - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. - - :rtype: :class:`~google.cloud.speech.operation.Operation` - :returns: Operation for asynchronous request to Google Speech API. - """ - if sample.encoding is not Encoding.LINEAR16: - raise ValueError('Only LINEAR16 encoding is supported by ' - 'asynchronous speech requests.') - api = self.speech_api - return api.async_recognize(sample, language_code, max_alternatives, - profanity_filter, speech_context) - - @staticmethod - def sample(content=None, source_uri=None, encoding=None, + def sample(self, content=None, source_uri=None, encoding=None, sample_rate=None): """Factory: construct Sample to use when making recognize requests. @@ -148,7 +95,7 @@ def sample(content=None, source_uri=None, encoding=None, :returns: Instance of ``Sample``. """ return Sample(content=content, source_uri=source_uri, - encoding=encoding, sample_rate=sample_rate) + encoding=encoding, sample_rate=sample_rate, client=self) @property def speech_api(self): @@ -160,145 +107,6 @@ def speech_api(self): self._speech_api = _JSONSpeechAPI(self) return self._speech_api - def streaming_recognize(self, sample, language_code=None, - max_alternatives=None, profanity_filter=None, - speech_context=None, single_utterance=False, - interim_results=False): - """Streaming speech recognition. - - .. note:: - - Streaming recognition requests are limited to 1 minute of audio. - See: https://cloud.google.com/speech/limits#content - - Yields: Instance of - :class:`~google.cloud.speech.result.StreamingSpeechResult` - containing results and metadata from the streaming request. - - :type sample: :class:`~google.cloud.speech.sample.Sample` - :param sample: Instance of ``Sample`` containing audio information. - - :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. - - :type max_alternatives: int - :param max_alternatives: (Optional) Maximum number of recognition - hypotheses to be returned. The server may - return fewer than maxAlternatives. - Valid values are 0-30. A value of 0 or 1 - will return a maximum of 1. Defaults to 1 - - :type profanity_filter: bool - :param profanity_filter: If True, the server will attempt to filter - out profanities, replacing all but the - initial character in each filtered word with - asterisks, e.g. ``'f***'``. If False or - omitted, profanities won't be filtered out. - - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. - - :type single_utterance: bool - :param single_utterance: (Optional) If false or omitted, the recognizer - will perform continuous recognition - (continuing to process audio even if the user - pauses speaking) until the client closes the - output stream (gRPC API) or when the maximum - time limit has been reached. Multiple - SpeechRecognitionResults with the is_final - flag set to true may be returned. - If true, the recognizer will detect a single - spoken utterance. When it detects that the - user has paused or stopped speaking, it will - return an END_OF_UTTERANCE event and cease - recognition. It will return no more than one - SpeechRecognitionResult with the is_final flag - set to true. - - :type interim_results: bool - :param interim_results: (Optional) If true, interim results (tentative - hypotheses) may be returned as they become - available (these interim results are indicated - with the ``is_final=False`` flag). If false or - omitted, only is_final=true result(s) are - returned. - - :raises: EnvironmentError if gRPC is not available. - """ - if not self._use_gax: - raise EnvironmentError('gRPC is required to use this API.') - - responses = self.speech_api.streaming_recognize(sample, language_code, - max_alternatives, - profanity_filter, - speech_context, - single_utterance, - interim_results) - for response in responses: - for result in response.results: - if result.is_final or interim_results: - yield StreamingSpeechResult.from_pb(result) - - def sync_recognize(self, sample, language_code=None, - max_alternatives=None, profanity_filter=None, - speech_context=None): - """Synchronous Speech Recognition. - - .. _sync_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/syncrecognize - - See `sync_recognize`_. - - :type sample: :class:`~google.cloud.speech.sample.Sample` - :param sample: Instance of ``Sample`` containing audio information. - - :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. - - :type max_alternatives: int - :param max_alternatives: (Optional) Maximum number of recognition - hypotheses to be returned. The server may - return fewer than maxAlternatives. - Valid values are 0-30. A value of 0 or 1 - will return a maximum of 1. Defaults to 1 - - :type profanity_filter: bool - :param profanity_filter: If True, the server will attempt to filter - out profanities, replacing all but the - initial character in each filtered word with - asterisks, e.g. ``'f***'``. If False or - omitted, profanities won't be filtered out. - - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. - - :rtype: list - :returns: A list of dictionaries. One dict for each alternative. Each - dictionary typically contains two keys (though not - all will be present in all cases) - - * ``transcript``: The detected text from the audio recording. - * ``confidence``: The confidence in language detection, float - between 0 and 1. - """ - api = self.speech_api - return api.sync_recognize(sample, language_code, max_alternatives, - profanity_filter, speech_context) - class _JSONSpeechAPI(object): """Speech API for interacting with the JSON/REST version of the API. diff --git a/speech/google/cloud/speech/sample.py b/speech/google/cloud/speech/sample.py index 17394b7b2f31..f1820e148729 100644 --- a/speech/google/cloud/speech/sample.py +++ b/speech/google/cloud/speech/sample.py @@ -15,6 +15,7 @@ """Sample class to handle content for Google Cloud Speech API.""" from google.cloud.speech.encoding import Encoding +from google.cloud.speech.result import StreamingSpeechResult class Sample(object): @@ -43,12 +44,16 @@ class Sample(object): to 16000 Hz. If that's not possible, use the native sample rate of the audio source (instead of re-sampling). + + :type client: :class:`~google.cloud.speech.client.Client` + :param client: (Optional) The client that owns this instance of sample. """ default_encoding = Encoding.FLAC default_sample_rate = 16000 def __init__(self, content=None, source_uri=None, - encoding=None, sample_rate=None): + encoding=None, sample_rate=None, client=None): + self._client = client no_source = content is None and source_uri is None both_source = content is not None and source_uri is not None @@ -112,3 +117,180 @@ def encoding(self): :returns: String value of Encoding type. """ return self._encoding + + def async_recognize(self, language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Asychronous Recognize request to Google Speech API. + + .. _async_recognize: https://cloud.google.com/speech/reference/\ + rest/v1beta1/speech/asyncrecognize + + See `async_recognize`_. + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: :class:`~google.cloud.speech.operation.Operation` + :returns: Operation for asynchronous request to Google Speech API. + """ + if self.encoding is not Encoding.LINEAR16: + raise ValueError('Only LINEAR16 encoding is supported by ' + 'asynchronous speech requests.') + api = self._client.speech_api + return api.async_recognize(self, language_code, max_alternatives, + profanity_filter, speech_context) + + def streaming_recognize(self, language_code=None, + max_alternatives=None, profanity_filter=None, + speech_context=None, single_utterance=False, + interim_results=False): + """Streaming speech recognition. + + .. note:: + + Streaming recognition requests are limited to 1 minute of audio. + See: https://cloud.google.com/speech/limits#content + + Yields: Instance of + :class:`~google.cloud.speech.result.StreamingSpeechResult` + containing results and metadata from the streaming request. + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :type single_utterance: bool + :param single_utterance: (Optional) If false or omitted, the recognizer + will perform continuous recognition + (continuing to process audio even if the user + pauses speaking) until the client closes the + output stream (gRPC API) or when the maximum + time limit has been reached. Multiple + SpeechRecognitionResults with the is_final + flag set to true may be returned. + If true, the recognizer will detect a single + spoken utterance. When it detects that the + user has paused or stopped speaking, it will + return an END_OF_UTTERANCE event and cease + recognition. It will return no more than one + SpeechRecognitionResult with the is_final flag + set to true. + + :type interim_results: bool + :param interim_results: (Optional) If true, interim results (tentative + hypotheses) may be returned as they become + available (these interim results are indicated + with the ``is_final=False`` flag). If false or + omitted, only is_final=true result(s) are + returned. + + :raises: EnvironmentError if gRPC is not available. + """ + if not self._client._use_gax: + raise EnvironmentError('gRPC is required to use this API.') + + api = self._client.speech_api + responses = api.streaming_recognize(self, language_code, + max_alternatives, profanity_filter, + speech_context, single_utterance, + interim_results) + for response in responses: + for result in response.results: + if result.is_final or interim_results: + yield StreamingSpeechResult.from_pb(result) + + def sync_recognize(self, language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Synchronous Speech Recognition. + + .. _sync_recognize: https://cloud.google.com/speech/reference/\ + rest/v1beta1/speech/syncrecognize + + See `sync_recognize`_. + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: list + :returns: A list of dictionaries. One dict for each alternative. Each + dictionary typically contains two keys (though not + all will be present in all cases) + + * ``transcript``: The detected text from the audio recording. + * ``confidence``: The confidence in language detection, float + between 0 and 1. + """ + api = self._client.speech_api + return api.sync_recognize(self, language_code, max_alternatives, + profanity_filter, speech_context) diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py index f0436e6d5c17..59769f259d45 100644 --- a/speech/unit_tests/test_client.py +++ b/speech/unit_tests/test_client.py @@ -122,7 +122,6 @@ def test_sync_recognize_content_with_optional_params_no_gax(self): from google.cloud import speech from google.cloud.speech.alternative import Alternative - from google.cloud.speech.sample import Sample from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE _AUDIO_CONTENT = _to_bytes(self.AUDIO_CONTENT) @@ -151,11 +150,10 @@ def test_sync_recognize_content_with_optional_params_no_gax(self): encoding = speech.Encoding.FLAC - sample = Sample(content=self.AUDIO_CONTENT, encoding=encoding, - sample_rate=self.SAMPLE_RATE) + sample = client.sample(content=self.AUDIO_CONTENT, encoding=encoding, + sample_rate=self.SAMPLE_RATE) - response = client.sync_recognize(sample, - language_code='EN', + response = sample.sync_recognize(language_code='EN', max_alternatives=2, profanity_filter=True, speech_context=self.HINTS) @@ -177,7 +175,6 @@ def test_sync_recognize_content_with_optional_params_no_gax(self): def test_sync_recognize_source_uri_without_optional_params_no_gax(self): from google.cloud import speech from google.cloud.speech.alternative import Alternative - from google.cloud.speech.sample import Sample from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE RETURNED = SYNC_RECOGNIZE_RESPONSE @@ -196,10 +193,10 @@ def test_sync_recognize_source_uri_without_optional_params_no_gax(self): encoding = speech.Encoding.FLAC - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, encoding=encoding, - sample_rate=self.SAMPLE_RATE) + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=encoding, sample_rate=self.SAMPLE_RATE) - response = client.sync_recognize(sample) + response = sample.sync_recognize() self.assertEqual(len(client._connection._requested), 1) req = client._connection._requested[0] @@ -217,26 +214,24 @@ def test_sync_recognize_source_uri_without_optional_params_no_gax(self): def test_sync_recognize_with_empty_results_no_gax(self): from google.cloud import speech - from google.cloud.speech.sample import Sample from unit_tests._fixtures import SYNC_RECOGNIZE_EMPTY_RESPONSE credentials = _Credentials() client = self._make_one(credentials=credentials, use_gax=False) client._connection = _Connection(SYNC_RECOGNIZE_EMPTY_RESPONSE) - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, - encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) with self.assertRaises(ValueError): - client.sync_recognize(sample) + sample.sync_recognize() def test_sync_recognize_with_empty_results_gax(self): from google.cloud._testing import _Monkey from google.cloud import speech from google.cloud.speech import _gax - from google.cloud.speech.sample import Sample credentials = _Credentials() client = self._make_one(credentials=credentials, use_gax=True) @@ -268,12 +263,12 @@ def speech_api(channel=None): channel_args, [(credentials, _gax.DEFAULT_USER_AGENT, host)]) - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, - encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) with self.assertRaises(ValueError): - client.sync_recognize(sample) + sample.sync_recognize() def test_sync_recognize_with_gax(self): from google.cloud._testing import _Monkey @@ -326,7 +321,7 @@ def speech_api(channel=None): channel_args, [(creds, _gax.DEFAULT_USER_AGENT, host)]) - results = client.sync_recognize(sample) + results = sample.sync_recognize() self.assertEqual(len(results), 2) self.assertEqual(results[0].transcript, @@ -340,22 +335,20 @@ def speech_api(channel=None): def test_async_supported_encodings(self): from google.cloud import speech - from google.cloud.speech.sample import Sample credentials = _Credentials() client = self._make_one(credentials=credentials) client._connection = _Connection({}) - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, - encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) with self.assertRaises(ValueError): - client.async_recognize(sample) + sample.async_recognize() def test_async_recognize_no_gax(self): from google.cloud import speech from google.cloud.speech.operation import Operation - from google.cloud.speech.sample import Sample from unit_tests._fixtures import ASYNC_RECOGNIZE_RESPONSE RETURNED = ASYNC_RECOGNIZE_RESPONSE @@ -364,10 +357,10 @@ def test_async_recognize_no_gax(self): client = self._make_one(credentials=credentials, use_gax=False) client._connection = _Connection(RETURNED) - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, - encoding=speech.Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) - operation = client.async_recognize(sample) + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.LINEAR16, + sample_rate=self.SAMPLE_RATE) + operation = sample.async_recognize() self.assertIsInstance(operation, Operation) self.assertIs(operation.client, client) self.assertEqual(operation.caller_metadata, @@ -416,20 +409,24 @@ def speech_api(channel=None): low_level.SERVICE_ADDRESS) self.assertEqual(channel_args, [expected]) - operation = client.async_recognize(sample) + operation = sample.async_recognize() self.assertIsInstance(operation, Operation) self.assertFalse(operation.complete) self.assertIsNone(operation.response) def test_streaming_depends_on_gax(self): + from google.cloud import speech from google.cloud._testing import _Monkey credentials = _Credentials() client = self._make_one(credentials=credentials, use_gax=False) client.connection = _Connection() + sample = client.sample(content=self.AUDIO_CONTENT, + encoding=speech.Encoding.LINEAR16, + sample_rate=self.SAMPLE_RATE) with self.assertRaises(EnvironmentError): - list(client.streaming_recognize({})) + list(sample.streaming_recognize()) def test_streaming_closed_stream(self): from io import BytesIO @@ -469,7 +466,7 @@ def speech_api(channel=None): client._speech_api = _gax.GAPICSpeechAPI(client) with self.assertRaises(ValueError): - list(client.streaming_recognize(sample)) + list(sample.streaming_recognize()) def test_stream_recognize_interim_results(self): from io import BytesIO @@ -478,7 +475,7 @@ def test_stream_recognize_interim_results(self): from google.cloud.speech import _gax from google.cloud.speech.encoding import Encoding - from google.cloud.speech.client import StreamingSpeechResult + from google.cloud.speech.result import StreamingSpeechResult stream = BytesIO(b'Some audio data...') credentials = _Credentials() @@ -524,8 +521,7 @@ def speech_api(channel=None): encoding=Encoding.LINEAR16, sample_rate=self.SAMPLE_RATE) - results = list(client.streaming_recognize(sample, - interim_results=True)) + results = list(sample.streaming_recognize(interim_results=True)) self.assertEqual(len(results), 3) self.assertIsInstance(results[0], StreamingSpeechResult) @@ -598,7 +594,7 @@ def speech_api(channel=None): encoding=Encoding.LINEAR16, sample_rate=self.SAMPLE_RATE) - results = list(client.streaming_recognize(sample)) + results = list(sample.streaming_recognize()) self.assertEqual(len(results), 1) self.assertEqual(results[0].alternatives[0].transcript, alternatives[0]['transcript']) @@ -642,7 +638,7 @@ def speech_api(channel=None): encoding=Encoding.LINEAR16, sample_rate=self.SAMPLE_RATE) - results = list(client.streaming_recognize(sample)) + results = list(sample.streaming_recognize()) self.assertEqual(results, []) def test_speech_api_with_gax(self): diff --git a/system_tests/speech.py b/system_tests/speech.py index f213e133a120..e3685502f77c 100644 --- a/system_tests/speech.py +++ b/system_tests/speech.py @@ -98,8 +98,7 @@ def _make_sync_request(self, content=None, source_uri=None, source_uri=source_uri, encoding=speech.Encoding.LINEAR16, sample_rate=16000) - return client.sync_recognize(sample, - language_code='en-US', + return sample.sync_recognize(language_code='en-US', max_alternatives=max_alternatives, profanity_filter=True, speech_context=['Google', 'cloud']) @@ -111,8 +110,7 @@ def _make_async_request(self, content=None, source_uri=None, source_uri=source_uri, encoding=speech.Encoding.LINEAR16, sample_rate=16000) - return client.async_recognize(sample, - language_code='en-US', + return sample.async_recognize(language_code='en-US', max_alternatives=max_alternatives, profanity_filter=True, speech_context=['Google', 'cloud']) @@ -123,8 +121,7 @@ def _make_streaming_request(self, file_obj, single_utterance=True, sample = client.sample(content=file_obj, encoding=speech.Encoding.LINEAR16, sample_rate=16000) - return client.streaming_recognize(sample, - single_utterance=single_utterance, + return sample.streaming_recognize(single_utterance=single_utterance, interim_results=interim_results, speech_context=['hello', 'google'])