|
| 1 | +// Copyright 2018 Google LLC. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | +// |
| 15 | + |
| 16 | +syntax = "proto3"; |
| 17 | + |
| 18 | +package google.cloud.bigquery.storage.v1beta1; |
| 19 | + |
| 20 | +import "google/cloud/bigquery/storage/v1beta1/avro.proto"; |
| 21 | +import "google/cloud/bigquery/storage/v1beta1/read_options.proto"; |
| 22 | +import "google/cloud/bigquery/storage/v1beta1/table_reference.proto"; |
| 23 | +import "google/protobuf/empty.proto"; |
| 24 | +import "google/protobuf/timestamp.proto"; |
| 25 | + |
| 26 | +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; |
| 27 | +option java_package = "com.google.cloud.bigquery.storage.v1beta1"; |
| 28 | + |
| 29 | + |
| 30 | +// BigQuery storage API. |
| 31 | +// |
| 32 | +// The BigQuery storage API can be used to read data stored in BigQuery. |
| 33 | +service BigQueryStorage { |
| 34 | + // Creates a new read session. A read session divides the contents of a |
| 35 | + // BigQuery table into one or more streams, which can then be used to read |
| 36 | + // data from the table. The read session also specifies properties of the |
| 37 | + // data to be read, such as a list of columns or a push-down filter describing |
| 38 | + // the rows to be returned. |
| 39 | + // |
| 40 | + // A particular row can be read by at most one stream. When the caller has |
| 41 | + // reached the end of each stream in the session, then all the data in the |
| 42 | + // table has been read. |
| 43 | + // |
| 44 | + // Read sessions automatically expire 24 hours after they are created and do |
| 45 | + // not require manual clean-up by the caller. |
| 46 | + rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { |
| 47 | + } |
| 48 | + |
| 49 | + // Reads rows from the table in the format prescribed by the read session. |
| 50 | + // Each response contains one or more table rows, up to a maximum of 10 MiB |
| 51 | + // per response; read requests which attempt to read individual rows larger |
| 52 | + // than this will fail. |
| 53 | + // |
| 54 | + // Each request also returns a set of stream statistics reflecting the |
| 55 | + // estimated total number of rows in the read stream. This number is computed |
| 56 | + // based on the total table size and the number of active streams in the read |
| 57 | + // session, and may change as other streams continue to read data. |
| 58 | + rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) { |
| 59 | + } |
| 60 | + |
| 61 | + // Creates additional streams for a ReadSession. This API can be used to |
| 62 | + // dynamically adjust the parallelism of a batch processing task upwards by |
| 63 | + // adding additional workers. |
| 64 | + rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) { |
| 65 | + } |
| 66 | + |
| 67 | + // Triggers the graceful termination of a single stream in a ReadSession. This |
| 68 | + // API can be used to dynamically adjust the parallelism of a batch processing |
| 69 | + // task downwards without losing data. |
| 70 | + // |
| 71 | + // This API does not delete the stream -- it remains visible in the |
| 72 | + // ReadSession, and any data processed by the stream is not released to other |
| 73 | + // streams. However, no additional data will be assigned to the stream once |
| 74 | + // this call completes. Callers must continue reading data on the stream until |
| 75 | + // the end of the stream is reached so that data which has already been |
| 76 | + // assigned to the stream will be processed. |
| 77 | + // |
| 78 | + // This method will return an error if there are no other live streams |
| 79 | + // in the Session, or if SplitReadStream() has been called on the given |
| 80 | + // Stream. |
| 81 | + rpc FinalizeStream(FinalizeStreamRequest) returns (google.protobuf.Empty) { |
| 82 | + } |
| 83 | + |
| 84 | + // Splits a given read stream into two Streams. These streams are referred to |
| 85 | + // as the primary and the residual of the split. The original stream can still |
| 86 | + // be read from in the same manner as before. Both of the returned streams can |
| 87 | + // also be read from, and the total rows return by both child streams will be |
| 88 | + // the same as the rows read from the original stream. |
| 89 | + // |
| 90 | + // Moreover, the two child streams will be allocated back to back in the |
| 91 | + // original Stream. Concretely, it is guaranteed that for streams Original, |
| 92 | + // Primary, and Residual, that Original[0-j] = Primary[0-j] and |
| 93 | + // Original[j-n] = Residual[0-m] once the streams have been read to |
| 94 | + // completion. |
| 95 | + // |
| 96 | + // This method is guaranteed to be idempotent. |
| 97 | + rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) { |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +// Information about a single data stream within a read session. |
| 102 | +message Stream { |
| 103 | + // Name of the stream. In the form |
| 104 | + // `/projects/{project_id}/stream/{stream_id}` |
| 105 | + string name = 1; |
| 106 | + |
| 107 | + // Rows in the stream. |
| 108 | + int64 row_count = 2; |
| 109 | +} |
| 110 | + |
| 111 | +// Expresses a point within a given stream using an offset position. |
| 112 | +message StreamPosition { |
| 113 | + // Identifier for a given Stream. |
| 114 | + Stream stream = 1; |
| 115 | + |
| 116 | + // Position in the stream. |
| 117 | + int64 offset = 2; |
| 118 | +} |
| 119 | + |
| 120 | +// Information returned from a `CreateReadSession` request. |
| 121 | +message ReadSession { |
| 122 | + // Unique identifier for the session. In the form |
| 123 | + // `projects/{project_id}/sessions/{session_id}` |
| 124 | + string name = 1; |
| 125 | + |
| 126 | + // Time at which the session becomes invalid. After this time, subsequent |
| 127 | + // requests to read this Session will return errors. |
| 128 | + google.protobuf.Timestamp expire_time = 2; |
| 129 | + |
| 130 | + // The schema for the read. If read_options.selected_fields is set, the |
| 131 | + // schema may be different from the table schema as it will only contain |
| 132 | + // the selected fields. |
| 133 | + oneof schema { |
| 134 | + // Avro schema. |
| 135 | + AvroSchema avro_schema = 5; |
| 136 | + } |
| 137 | + |
| 138 | + // Streams associated with this session. |
| 139 | + repeated Stream streams = 4; |
| 140 | + |
| 141 | + // Table that this ReadSession is reading from. |
| 142 | + TableReference table_reference = 7; |
| 143 | + |
| 144 | + // Any modifiers which are applied when reading from the specified table. |
| 145 | + TableModifiers table_modifiers = 8; |
| 146 | +} |
| 147 | + |
| 148 | +// Creates a new read session, which may include additional options such as |
| 149 | +// requested parallelism, projection filters and constraints. |
| 150 | +message CreateReadSessionRequest { |
| 151 | + // Required. Reference to the table to read. |
| 152 | + TableReference table_reference = 1; |
| 153 | + |
| 154 | + // Required. String of the form "projects/your-project-id" indicating the |
| 155 | + // project this ReadSession is associated with. This is the project that will |
| 156 | + // be billed for usage. |
| 157 | + string parent = 6; |
| 158 | + |
| 159 | + // Optional. Any modifiers to the Table (e.g. snapshot timestamp). |
| 160 | + TableModifiers table_modifiers = 2; |
| 161 | + |
| 162 | + // Optional. Initial number of streams. If unset or 0, we will |
| 163 | + // provide a value of streams so as to produce reasonable throughput. Must be |
| 164 | + // non-negative. The number of streams may be lower than the requested number, |
| 165 | + // depending on the amount parallelism that is reasonable for the table and |
| 166 | + // the maximum amount of parallelism allowed by the system. |
| 167 | + // |
| 168 | + // Streams must be read starting from offset 0. |
| 169 | + int32 requested_streams = 3; |
| 170 | + |
| 171 | + // Optional. Read options for this session (e.g. column selection, filters). |
| 172 | + TableReadOptions read_options = 4; |
| 173 | + |
| 174 | + // Data output format. Currently default to Avro. |
| 175 | + DataFormat format = 5; |
| 176 | +} |
| 177 | + |
| 178 | +// Requesting row data via `ReadRows` must provide Stream position information. |
| 179 | +message ReadRowsRequest { |
| 180 | + // Required. Identifier of the position in the stream to start reading from. |
| 181 | + // The offset requested must be less than the last row read from ReadRows. |
| 182 | + // Requesting a larger offset is undefined. |
| 183 | + StreamPosition read_position = 1; |
| 184 | +} |
| 185 | + |
| 186 | +// Progress information for a given Stream. |
| 187 | +message StreamStatus { |
| 188 | + // Number of estimated rows in the current stream. May change over time as |
| 189 | + // different readers in the stream progress at rates which are relatively fast |
| 190 | + // or slow. |
| 191 | + int64 estimated_row_count = 1; |
| 192 | +} |
| 193 | + |
| 194 | +// Information on if the current connection is being throttled. |
| 195 | +message ThrottleStatus { |
| 196 | + // How much this connection is being throttled. |
| 197 | + // 0 is no throttling, 100 is completely throttled. |
| 198 | + int32 throttle_percent = 1; |
| 199 | +} |
| 200 | + |
| 201 | +// Response from calling `ReadRows` may include row data, progress and |
| 202 | +// throttling information. |
| 203 | +message ReadRowsResponse { |
| 204 | + // Row data is returned in format specified during session creation. |
| 205 | + oneof rows { |
| 206 | + // Serialized row data in AVRO format. |
| 207 | + AvroRows avro_rows = 3; |
| 208 | + } |
| 209 | + |
| 210 | + // Estimated stream statistics. |
| 211 | + StreamStatus status = 2; |
| 212 | + |
| 213 | + // Throttling status. If unset, the latest response still describes |
| 214 | + // the current throttling status. |
| 215 | + ThrottleStatus throttle_status = 5; |
| 216 | +} |
| 217 | + |
| 218 | +// Information needed to request additional streams for an established read |
| 219 | +// session. |
| 220 | +message BatchCreateReadSessionStreamsRequest { |
| 221 | + // Required. Must be a non-expired session obtained from a call to |
| 222 | + // CreateReadSession. Only the name field needs to be set. |
| 223 | + ReadSession session = 1; |
| 224 | + |
| 225 | + // Required. Number of new streams requested. Must be positive. |
| 226 | + // Number of added streams may be less than this, see CreateReadSessionRequest |
| 227 | + // for more information. |
| 228 | + int32 requested_streams = 2; |
| 229 | +} |
| 230 | + |
| 231 | +// The response from `BatchCreateReadSessionStreams` returns the stream |
| 232 | +// identifiers for the newly created streams. |
| 233 | +message BatchCreateReadSessionStreamsResponse { |
| 234 | + // Newly added streams. |
| 235 | + repeated Stream streams = 1; |
| 236 | +} |
| 237 | + |
| 238 | +// Request information for invoking `FinalizeStream`. |
| 239 | +message FinalizeStreamRequest { |
| 240 | + // Stream to finalize. |
| 241 | + Stream stream = 2; |
| 242 | +} |
| 243 | + |
| 244 | +// Request information for `SplitReadStream`. |
| 245 | +message SplitReadStreamRequest { |
| 246 | + // Stream to split. |
| 247 | + Stream original_stream = 1; |
| 248 | +} |
| 249 | + |
| 250 | +// Response from `SplitReadStream`. |
| 251 | +message SplitReadStreamResponse { |
| 252 | + // Primary stream. Will contain the beginning portion of |
| 253 | + // |original_stream|. |
| 254 | + Stream primary_stream = 1; |
| 255 | + |
| 256 | + // Remainder stream. Will contain the tail of |original_stream|. |
| 257 | + Stream remainder_stream = 2; |
| 258 | +} |
| 259 | + |
| 260 | +// Data format for input or output data. |
| 261 | +enum DataFormat { |
| 262 | + // Data format is unspecified. |
| 263 | + DATA_FORMAT_UNSPECIFIED = 0; |
| 264 | + |
| 265 | + // Avro is a standard open source row based file format. |
| 266 | + // See https://avro.apache.org/ for more details. |
| 267 | + AVRO = 1; |
| 268 | +} |
0 commit comments