@@ -23,14 +23,15 @@ option java_multiple_files = true;
23
23
option java_outer_classname = "SpeechProto" ;
24
24
option java_package = "com.google.cloud.speech.v1" ;
25
25
26
+
26
27
// Service that implements Google Cloud Speech API.
27
28
service Speech {
28
29
// Perform bidirectional streaming speech recognition on audio using gRPC.
29
30
rpc Recognize (stream RecognizeRequest ) returns (stream RecognizeResponse );
30
31
31
32
// Perform non-streaming speech recognition on audio using HTTPS.
32
33
rpc NonStreamingRecognize (RecognizeRequest ) returns (NonStreamingRecognizeResponse ) {
33
- option (. google .api .http ) = { post : "/v1 /speech :recognize " body : "*" };
34
+ option (google.api.http ) = { post : "/v1/speech:recognize" body: "*" };
34
35
}
35
36
}
36
37
@@ -54,7 +55,7 @@ message RecognizeRequest {
54
55
55
56
// The audio data to be recognized. For `NonStreamingRecognize`, all the
56
57
// audio data must be contained in the first (and only) `RecognizeRequest`
57
- // message. For streaming `Recognize`, sequential chunks of audio data are
58
+ // message. For streaming `Recognize`, sequential chunks of audio data are
58
59
// sent in sequential `RecognizeRequest` messages.
59
60
AudioRequest audio_request = 2 ;
60
61
}
@@ -64,7 +65,7 @@ message RecognizeRequest {
64
65
message InitialRecognizeRequest {
65
66
// Audio encoding of the data sent in the audio message.
66
67
enum AudioEncoding {
67
- // Not specified. Will return result ` INVALID_ARGUMENT` .
68
+ // Not specified. Will return result [google.rpc.Code. INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] .
68
69
ENCODING_UNSPECIFIED = 0 ;
69
70
70
71
// Uncompressed 16-bit signed little-endian samples.
@@ -118,8 +119,6 @@ message InitialRecognizeRequest {
118
119
// profanities, replacing all but the initial character in each filtered word
119
120
// with asterisks, e.g. "f***". If set to `false` or omitted, profanities
120
121
// won't be filtered out.
121
- // Note that profanity filtering is not implemented for all languages.
122
- // If the language is not supported, this setting has no effect.
123
122
bool profanity_filter = 5 ;
124
123
125
124
// [Optional] If `false` or omitted, the recognizer will detect a single
@@ -146,13 +145,38 @@ message InitialRecognizeRequest {
146
145
// as they become available.
147
146
// If `false` or omitted, no `EndpointerEvents` are returned.
148
147
bool enable_endpointer_events = 8 ;
148
+
149
+ // [Optional] URI that points to a file where the recognition result should
150
+ // be stored in JSON format. If omitted or empty string, the recognition
151
+ // result is returned in the response. Should be specified only for
152
+ // `NonStreamingRecognize`. If specified in a `Recognize` request,
153
+ // `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
154
+ // If specified in a `NonStreamingRecognize` request,
155
+ // `NonStreamingRecognize` returns immediately, and the output file
156
+ // is created asynchronously once the audio processing completes.
157
+ // Currently, only Google Cloud Storage URIs are supported, which must be
158
+ // specified in the following format: `gs://bucket_name/object_name`
159
+ // (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
160
+ // more information, see [Request URIs](/storage/docs/reference-uris).
161
+ string output_uri = 9 ;
149
162
}
150
163
151
164
// Contains audio data in the format specified in the `InitialRecognizeRequest`.
165
+ // Either `content` or `uri` must be supplied. Supplying both or neither
166
+ // returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
152
167
message AudioRequest {
153
- // [Required] The audio data bytes encoded as specified in
154
- // `InitialRecognizeRequest`.
168
+ // The audio data bytes encoded as specified in
169
+ // `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers
170
+ // use a pure binary representation, whereas JSON representations use base64.
155
171
bytes content = 1 ;
172
+
173
+ // URI that points to a file that contains audio data bytes as specified in
174
+ // `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are
175
+ // supported, which must be specified in the following format:
176
+ // `gs://bucket_name/object_name` (other URI formats return
177
+ // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
178
+ // [Request URIs](/storage/docs/reference-uris).
179
+ string uri = 2 ;
156
180
}
157
181
158
182
// `NonStreamingRecognizeResponse` is the only message returned to the client by
@@ -191,10 +215,14 @@ message RecognizeResponse {
191
215
192
216
// [Output-only] If set, returns a [google.rpc.Status][] message that
193
217
// specifies the error for the operation.
194
- .google.rpc.Status error = 1 ;
195
-
196
- // [Output-only] May contain zero or one `is_final=true` result (the newly
197
- // settled portion). May also contain zero or more `is_final=false` results.
218
+ google.rpc.Status error = 1 ;
219
+
220
+ // [Output-only] For `continuous=false`, this repeated list contains zero or
221
+ // one result that corresponds to all of the audio processed so far. For
222
+ // `continuous=true`, this repeated list contains zero or more results that
223
+ // correspond to consecutive portions of the audio being processed.
224
+ // In both cases, contains zero or one `is_final=true` result (the newly
225
+ // settled portion), followed by zero or more `is_final=false` results.
198
226
repeated SpeechRecognitionResult results = 2 ;
199
227
200
228
// [Output-only] Indicates the lowest index in the `results` array that has
@@ -206,7 +234,10 @@ message RecognizeResponse {
206
234
EndpointerEvent endpoint = 4 ;
207
235
}
208
236
237
+ // A speech recognition result corresponding to a portion of the audio.
209
238
message SpeechRecognitionResult {
239
+ // [Output-only] May contain one or more recognition hypotheses (up to the
240
+ // maximum specified in `max_alternatives`).
210
241
repeated SpeechRecognitionAlternative alternatives = 1 ;
211
242
212
243
// [Output-only] Set `true` if this is the final time the speech service will
0 commit comments