Skip to content

Commit

Permalink
feat: Support output transcript to Google Cloud Storage for LongRunni…
Browse files Browse the repository at this point in the history
…ngRecognize (#128)


* chore: upgrade gapic-generator-python to 0.42.2

PiperOrigin-RevId: 361662015

Source-Author: Google APIs <noreply@google.com>
Source-Date: Mon Mar 8 14:47:18 2021 -0800
Source-Repo: googleapis/googleapis
Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf
Source-Link: googleapis/googleapis@28a5919

* feat: Support output transcript to GCS for LongRunningRecognize.

PiperOrigin-RevId: 362294447

Source-Author: Google APIs <noreply@google.com>
Source-Date: Thu Mar 11 08:07:37 2021 -0800
Source-Repo: googleapis/googleapis
Source-Sha: b6ebac16c3aecb798d4f25443d96df2f42a965ca
Source-Link: googleapis/googleapis@b6ebac1

* feat: Support output transcript to GCS for LongRunningRecognize.

PiperOrigin-RevId: 362934100

Source-Author: Google APIs <noreply@google.com>
Source-Date: Mon Mar 15 07:18:03 2021 -0700
Source-Repo: googleapis/googleapis
Source-Sha: 72326861be446be27d53af95c87e6e313367c371
Source-Link: googleapis/googleapis@7232686
  • Loading branch information
yoshi-automation authored Mar 19, 2021
1 parent a39331b commit 5fdbd67
Show file tree
Hide file tree
Showing 11 changed files with 469 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,41 @@
#

from .cloud_speech import (
RecognizeRequest,
LongRunningRecognizeMetadata,
LongRunningRecognizeRequest,
StreamingRecognizeRequest,
StreamingRecognitionConfig,
LongRunningRecognizeResponse,
RecognitionAudio,
RecognitionConfig,
SpeakerDiarizationConfig,
RecognitionMetadata,
SpeechContext,
RecognitionAudio,
RecognizeRequest,
RecognizeResponse,
LongRunningRecognizeResponse,
LongRunningRecognizeMetadata,
StreamingRecognizeResponse,
StreamingRecognitionResult,
SpeechRecognitionResult,
SpeakerDiarizationConfig,
SpeechContext,
SpeechRecognitionAlternative,
SpeechRecognitionResult,
StreamingRecognitionConfig,
StreamingRecognitionResult,
StreamingRecognizeRequest,
StreamingRecognizeResponse,
WordInfo,
)

__all__ = (
"RecognizeRequest",
"LongRunningRecognizeMetadata",
"LongRunningRecognizeRequest",
"StreamingRecognizeRequest",
"StreamingRecognitionConfig",
"LongRunningRecognizeResponse",
"RecognitionAudio",
"RecognitionConfig",
"SpeakerDiarizationConfig",
"RecognitionMetadata",
"SpeechContext",
"RecognitionAudio",
"RecognizeRequest",
"RecognizeResponse",
"LongRunningRecognizeResponse",
"LongRunningRecognizeMetadata",
"StreamingRecognizeResponse",
"StreamingRecognitionResult",
"SpeechRecognitionResult",
"SpeakerDiarizationConfig",
"SpeechContext",
"SpeechRecognitionAlternative",
"SpeechRecognitionResult",
"StreamingRecognitionConfig",
"StreamingRecognitionResult",
"StreamingRecognizeRequest",
"StreamingRecognizeResponse",
"WordInfo",
)
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .types.cloud_speech import StreamingRecognitionResult
from .types.cloud_speech import StreamingRecognizeRequest
from .types.cloud_speech import StreamingRecognizeResponse
from .types.cloud_speech import TranscriptOutputConfig
from .types.cloud_speech import WordInfo
from .types.cloud_speech_adaptation import CreateCustomClassRequest
from .types.cloud_speech_adaptation import CreatePhraseSetRequest
Expand All @@ -59,7 +60,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):


__all__ = (
"AdaptationClient",
"CreateCustomClassRequest",
"CreatePhraseSetRequest",
"CustomClass",
Expand All @@ -82,15 +82,17 @@ class SpeechClient(SpeechHelpers, SpeechClient):
"RecognizeResponse",
"SpeakerDiarizationConfig",
"SpeechAdaptation",
"SpeechClient",
"SpeechContext",
"SpeechRecognitionAlternative",
"SpeechRecognitionResult",
"StreamingRecognitionConfig",
"StreamingRecognitionResult",
"StreamingRecognizeRequest",
"StreamingRecognizeResponse",
"TranscriptOutputConfig",
"UpdateCustomClassRequest",
"UpdatePhraseSetRequest",
"WordInfo",
"SpeechClient",
"AdaptationClient",
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package google.cloud.speech.v1p1beta1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/speech/v1p1beta1/resource.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/any.proto";
Expand All @@ -37,8 +36,7 @@ option objc_class_prefix = "GCS";
// Service that implements Google Cloud Speech API.
service Speech {
option (google.api.default_host) = "speech.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";

// Performs synchronous speech recognition: receive results after all audio
// has been sent and processed.
Expand All @@ -56,8 +54,7 @@ service Speech {
// a `LongRunningRecognizeResponse` message.
// For more information on asynchronous speech recognition, see the
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
rpc LongRunningRecognize(LongRunningRecognizeRequest)
returns (google.longrunning.Operation) {
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1p1beta1/speech:longrunningrecognize"
body: "*"
Expand All @@ -71,8 +68,8 @@ service Speech {

// Performs bidirectional streaming speech recognition: receive results while
// sending audio. This method is only available via the gRPC API (not REST).
rpc StreamingRecognize(stream StreamingRecognizeRequest)
returns (stream StreamingRecognizeResponse) {}
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
}
}

// The top-level message sent by the client for the `Recognize` method.
Expand All @@ -94,6 +91,19 @@ message LongRunningRecognizeRequest {

// Required. The audio data to be recognized.
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Specifies an optional destination for the recognition results.
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Specifies an optional destination for the recognition results.
message TranscriptOutputConfig {
oneof output_type {
// Specifies a Cloud Storage URI for the recognition results. Must be
// specified in the format: `gs://bucket_name/object_name`, and the bucket
// must already exist.
string gcs_uri = 1;
}
}

// The top-level message sent by the client for the `StreamingRecognize` method.
Expand Down Expand Up @@ -171,7 +181,7 @@ message RecognitionConfig {
// a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
// recognition can be reduced if lossy codecs are used to capture or transmit
// audio, particularly if background noise is present. Lossy codecs include
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
//
// The `FLAC` and `WAV` audio file formats include a header that describes the
// included audio content. You can request recognition for `WAV` files that
Expand All @@ -182,8 +192,7 @@ message RecognitionConfig {
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
// encoding configuration must match the encoding described in the audio
// header; otherwise the request returns an
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
// code.
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
enum AudioEncoding {
// Not specified.
ENCODING_UNSPECIFIED = 0;
Expand Down Expand Up @@ -237,8 +246,7 @@ message RecognitionConfig {

// Encoding of audio data sent in all `RecognitionAudio` messages.
// This field is optional for `FLAC` and `WAV` audio files and required
// for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
AudioEncoding encoding = 1;

// Sample rate in Hertz of the audio data sent in all
Expand All @@ -247,8 +255,7 @@ message RecognitionConfig {
// source to 16000 Hz. If that's not possible, use the native sample rate of
// the audio source (instead of re-sampling).
// This field is optional for FLAC and WAV audio files, but is
// required for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
int32 sample_rate_hertz = 2;

// The number of channels in the input audio data.
Expand Down Expand Up @@ -424,8 +431,10 @@ message SpeakerDiarizationConfig {
int32 max_speaker_count = 3;

// Output only. Unused.
int32 speaker_tag = 5
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
int32 speaker_tag = 5 [
deprecated = true,
(google.api.field_behavior) = OUTPUT_ONLY
];
}

// Description of audio data to be recognized.
Expand Down Expand Up @@ -589,8 +598,8 @@ message SpeechContext {

// Contains audio data in the encoding specified in the `RecognitionConfig`.
// Either `content` or `uri` must be supplied. Supplying both or neither
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
message RecognitionAudio {
// The audio source, which is either inline content or a Google Cloud
// Storage uri.
Expand All @@ -605,9 +614,8 @@ message RecognitionAudio {
// Currently, only Google Cloud Storage URIs are
// supported, which must be specified in the following format:
// `gs://bucket_name/object_name` (other URI formats return
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
// For more information, see [Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 2;
}
}
Expand All @@ -630,6 +638,12 @@ message LongRunningRecognizeResponse {
// Sequential list of transcription results corresponding to
// sequential portions of audio.
repeated SpeechRecognitionResult results = 2;

// Original output config if present in the request.
TranscriptOutputConfig output_config = 6;

// If the transcript output fails this field contains the relevant error.
google.rpc.Status output_error = 7;
}

// Describes the progress of a long-running `LongRunningRecognize` call. It is
Expand All @@ -646,9 +660,12 @@ message LongRunningRecognizeMetadata {
// Time of the most recent processing update.
google.protobuf.Timestamp last_update_time = 3;

// Output only. The URI of the audio file being transcribed. Empty if the
// audio was sent as byte content.
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
// as byte content.
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. A copy of the TranscriptOutputConfig if it was set in the request.
TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// `StreamingRecognizeResponse` is the only message returned to the client by
Expand Down Expand Up @@ -762,9 +779,9 @@ message StreamingRecognitionResult {
// For audio_channel_count = N, its output values can range from '1' to 'N'.
int32 channel_tag = 5;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand All @@ -781,9 +798,9 @@ message SpeechRecognitionResult {
// For audio_channel_count = N, its output values can range from '1' to 'N'.
int32 channel_tag = 2;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ service Adaptation {
patch: "/v1p1beta1/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
body: "phrase_set"
};
option (google.api.method_signature) = "phrase_set,update_mask";
}

// Delete a phrase set.
Expand Down Expand Up @@ -110,6 +111,7 @@ service Adaptation {
patch: "/v1p1beta1/{custom_class.name=projects/*/locations/*/customClasses/*}"
body: "custom_class"
};
option (google.api.method_signature) = "custom_class,update_mask";
}

// Delete a custom class.
Expand Down
Loading

0 comments on commit 5fdbd67

Please sign in to comment.