Skip to content

Commit 96225eb

Browse files
authored
Add protos as an artifact to library (#7205)
1 parent 80228ff commit 96225eb

File tree

6 files changed

+400
-10
lines changed

6 files changed

+400
-10
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright 2018 Google LLC.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
16+
syntax = "proto3";
17+
18+
package google.cloud.bigquery.storage.v1beta1;
19+
20+
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
21+
option java_outer_classname = "AvroProto";
22+
option java_package = "com.google.cloud.bigquery.storage.v1beta1";
23+
24+
25+
// Avro schema.
26+
message AvroSchema {
27+
// Json serialized schema, as described at
28+
// https://avro.apache.org/docs/1.8.1/spec.html
29+
string schema = 1;
30+
}
31+
32+
// Avro rows.
33+
message AvroRows {
34+
// Binary serialized rows in a block.
35+
bytes serialized_binary_rows = 1;
36+
37+
// The count of rows in the returning block.
38+
int64 row_count = 2;
39+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright 2018 Google LLC.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
16+
syntax = "proto3";
17+
18+
package google.cloud.bigquery.storage.v1beta1;
19+
20+
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
21+
option java_package = "com.google.cloud.bigquery.storage.v1beta1";
22+
23+
24+
// Options dictating how we read a table.
25+
message TableReadOptions {
26+
// Optional. Names of the fields in the table that should be read. If empty,
27+
// all fields will be read. If the specified field is a nested field, all the
28+
// sub-fields in the field will be selected. The output field order is
29+
// unrelated to the order of fields in selected_fields.
30+
repeated string selected_fields = 1;
31+
32+
// Optional. SQL text filtering statement, similar to a WHERE clause in
33+
// a query. Currently, we support combinations of predicates that are
34+
// a comparison between a column and a constant value in SQL statement.
35+
// Aggregates are not supported.
36+
//
37+
// Example: "a > DATE '2014-9-27' AND (b > 5 and C LIKE 'date')"
38+
string row_restriction = 2;
39+
}
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
// Copyright 2018 Google LLC.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
16+
syntax = "proto3";
17+
18+
package google.cloud.bigquery.storage.v1beta1;
19+
20+
import "google/cloud/bigquery/storage/v1beta1/avro.proto";
21+
import "google/cloud/bigquery/storage/v1beta1/read_options.proto";
22+
import "google/cloud/bigquery/storage/v1beta1/table_reference.proto";
23+
import "google/protobuf/empty.proto";
24+
import "google/protobuf/timestamp.proto";
25+
26+
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
27+
option java_package = "com.google.cloud.bigquery.storage.v1beta1";
28+
29+
30+
// BigQuery storage API.
31+
//
32+
// The BigQuery storage API can be used to read data stored in BigQuery.
33+
service BigQueryStorage {
34+
// Creates a new read session. A read session divides the contents of a
35+
// BigQuery table into one or more streams, which can then be used to read
36+
// data from the table. The read session also specifies properties of the
37+
// data to be read, such as a list of columns or a push-down filter describing
38+
// the rows to be returned.
39+
//
40+
// A particular row can be read by at most one stream. When the caller has
41+
// reached the end of each stream in the session, then all the data in the
42+
// table has been read.
43+
//
44+
// Read sessions automatically expire 24 hours after they are created and do
45+
// not require manual clean-up by the caller.
46+
rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
47+
}
48+
49+
// Reads rows from the table in the format prescribed by the read session.
50+
// Each response contains one or more table rows, up to a maximum of 10 MiB
51+
// per response; read requests which attempt to read individual rows larger
52+
// than this will fail.
53+
//
54+
// Each request also returns a set of stream statistics reflecting the
55+
// estimated total number of rows in the read stream. This number is computed
56+
// based on the total table size and the number of active streams in the read
57+
// session, and may change as other streams continue to read data.
58+
rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
59+
}
60+
61+
// Creates additional streams for a ReadSession. This API can be used to
62+
// dynamically adjust the parallelism of a batch processing task upwards by
63+
// adding additional workers.
64+
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
65+
}
66+
67+
// Triggers the graceful termination of a single stream in a ReadSession. This
68+
// API can be used to dynamically adjust the parallelism of a batch processing
69+
// task downwards without losing data.
70+
//
71+
// This API does not delete the stream -- it remains visible in the
72+
// ReadSession, and any data processed by the stream is not released to other
73+
// streams. However, no additional data will be assigned to the stream once
74+
// this call completes. Callers must continue reading data on the stream until
75+
// the end of the stream is reached so that data which has already been
76+
// assigned to the stream will be processed.
77+
//
78+
// This method will return an error if there are no other live streams
79+
// in the Session, or if SplitReadStream() has been called on the given
80+
// Stream.
81+
rpc FinalizeStream(FinalizeStreamRequest) returns (google.protobuf.Empty) {
82+
}
83+
84+
// Splits a given read stream into two Streams. These streams are referred to
85+
// as the primary and the residual of the split. The original stream can still
86+
// be read from in the same manner as before. Both of the returned streams can
87+
// also be read from, and the total rows return by both child streams will be
88+
// the same as the rows read from the original stream.
89+
//
90+
// Moreover, the two child streams will be allocated back to back in the
91+
// original Stream. Concretely, it is guaranteed that for streams Original,
92+
// Primary, and Residual, that Original[0-j] = Primary[0-j] and
93+
// Original[j-n] = Residual[0-m] once the streams have been read to
94+
// completion.
95+
//
96+
// This method is guaranteed to be idempotent.
97+
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
98+
}
99+
}
100+
101+
// Information about a single data stream within a read session.
102+
message Stream {
103+
// Name of the stream. In the form
104+
// `/projects/{project_id}/stream/{stream_id}`
105+
string name = 1;
106+
107+
// Rows in the stream.
108+
int64 row_count = 2;
109+
}
110+
111+
// Expresses a point within a given stream using an offset position.
112+
message StreamPosition {
113+
// Identifier for a given Stream.
114+
Stream stream = 1;
115+
116+
// Position in the stream.
117+
int64 offset = 2;
118+
}
119+
120+
// Information returned from a `CreateReadSession` request.
121+
message ReadSession {
122+
// Unique identifier for the session. In the form
123+
// `projects/{project_id}/sessions/{session_id}`
124+
string name = 1;
125+
126+
// Time at which the session becomes invalid. After this time, subsequent
127+
// requests to read this Session will return errors.
128+
google.protobuf.Timestamp expire_time = 2;
129+
130+
// The schema for the read. If read_options.selected_fields is set, the
131+
// schema may be different from the table schema as it will only contain
132+
// the selected fields.
133+
oneof schema {
134+
// Avro schema.
135+
AvroSchema avro_schema = 5;
136+
}
137+
138+
// Streams associated with this session.
139+
repeated Stream streams = 4;
140+
141+
// Table that this ReadSession is reading from.
142+
TableReference table_reference = 7;
143+
144+
// Any modifiers which are applied when reading from the specified table.
145+
TableModifiers table_modifiers = 8;
146+
}
147+
148+
// Creates a new read session, which may include additional options such as
149+
// requested parallelism, projection filters and constraints.
150+
message CreateReadSessionRequest {
151+
// Required. Reference to the table to read.
152+
TableReference table_reference = 1;
153+
154+
// Required. String of the form "projects/your-project-id" indicating the
155+
// project this ReadSession is associated with. This is the project that will
156+
// be billed for usage.
157+
string parent = 6;
158+
159+
// Optional. Any modifiers to the Table (e.g. snapshot timestamp).
160+
TableModifiers table_modifiers = 2;
161+
162+
// Optional. Initial number of streams. If unset or 0, we will
163+
// provide a value of streams so as to produce reasonable throughput. Must be
164+
// non-negative. The number of streams may be lower than the requested number,
165+
// depending on the amount parallelism that is reasonable for the table and
166+
// the maximum amount of parallelism allowed by the system.
167+
//
168+
// Streams must be read starting from offset 0.
169+
int32 requested_streams = 3;
170+
171+
// Optional. Read options for this session (e.g. column selection, filters).
172+
TableReadOptions read_options = 4;
173+
174+
// Data output format. Currently default to Avro.
175+
DataFormat format = 5;
176+
}
177+
178+
// Requesting row data via `ReadRows` must provide Stream position information.
179+
message ReadRowsRequest {
180+
// Required. Identifier of the position in the stream to start reading from.
181+
// The offset requested must be less than the last row read from ReadRows.
182+
// Requesting a larger offset is undefined.
183+
StreamPosition read_position = 1;
184+
}
185+
186+
// Progress information for a given Stream.
187+
message StreamStatus {
188+
// Number of estimated rows in the current stream. May change over time as
189+
// different readers in the stream progress at rates which are relatively fast
190+
// or slow.
191+
int64 estimated_row_count = 1;
192+
}
193+
194+
// Information on if the current connection is being throttled.
195+
message ThrottleStatus {
196+
// How much this connection is being throttled.
197+
// 0 is no throttling, 100 is completely throttled.
198+
int32 throttle_percent = 1;
199+
}
200+
201+
// Response from calling `ReadRows` may include row data, progress and
202+
// throttling information.
203+
message ReadRowsResponse {
204+
// Row data is returned in format specified during session creation.
205+
oneof rows {
206+
// Serialized row data in AVRO format.
207+
AvroRows avro_rows = 3;
208+
}
209+
210+
// Estimated stream statistics.
211+
StreamStatus status = 2;
212+
213+
// Throttling status. If unset, the latest response still describes
214+
// the current throttling status.
215+
ThrottleStatus throttle_status = 5;
216+
}
217+
218+
// Information needed to request additional streams for an established read
219+
// session.
220+
message BatchCreateReadSessionStreamsRequest {
221+
// Required. Must be a non-expired session obtained from a call to
222+
// CreateReadSession. Only the name field needs to be set.
223+
ReadSession session = 1;
224+
225+
// Required. Number of new streams requested. Must be positive.
226+
// Number of added streams may be less than this, see CreateReadSessionRequest
227+
// for more information.
228+
int32 requested_streams = 2;
229+
}
230+
231+
// The response from `BatchCreateReadSessionStreams` returns the stream
232+
// identifiers for the newly created streams.
233+
message BatchCreateReadSessionStreamsResponse {
234+
// Newly added streams.
235+
repeated Stream streams = 1;
236+
}
237+
238+
// Request information for invoking `FinalizeStream`.
239+
message FinalizeStreamRequest {
240+
// Stream to finalize.
241+
Stream stream = 2;
242+
}
243+
244+
// Request information for `SplitReadStream`.
245+
message SplitReadStreamRequest {
246+
// Stream to split.
247+
Stream original_stream = 1;
248+
}
249+
250+
// Response from `SplitReadStream`.
251+
message SplitReadStreamResponse {
252+
// Primary stream. Will contain the beginning portion of
253+
// |original_stream|.
254+
Stream primary_stream = 1;
255+
256+
// Remainder stream. Will contain the tail of |original_stream|.
257+
Stream remainder_stream = 2;
258+
}
259+
260+
// Data format for input or output data.
261+
enum DataFormat {
262+
// Data format is unspecified.
263+
DATA_FORMAT_UNSPECIFIED = 0;
264+
265+
// Avro is a standard open source row based file format.
266+
// See https://avro.apache.org/ for more details.
267+
AVRO = 1;
268+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright 2018 Google LLC.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
16+
syntax = "proto3";
17+
18+
package google.cloud.bigquery.storage.v1beta1;
19+
20+
import "google/protobuf/timestamp.proto";
21+
22+
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
23+
option java_outer_classname = "TableReferenceProto";
24+
option java_package = "com.google.cloud.bigquery.storage.v1beta1";
25+
26+
27+
// Table reference that includes just the 3 strings needed to identify a table.
28+
message TableReference {
29+
// The assigned project ID of the project.
30+
string project_id = 1;
31+
32+
// The ID of the dataset in the above project.
33+
string dataset_id = 2;
34+
35+
// The ID of the table in the above dataset.
36+
string table_id = 3;
37+
}
38+
39+
// All fields in this message optional.
40+
message TableModifiers {
41+
// The snapshot time of the table. If not set, interpreted as now.
42+
google.protobuf.Timestamp snapshot_time = 1;
43+
}

0 commit comments

Comments
 (0)