Add protos as an artifact to library (#7205)

crwilcox · web-flow · commit 96225eb459e0 · 2019-01-29T13:28:49.000-08:00
diff --git a/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/avro.proto b/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/avro.proto
@@ -0,0 +1,39 @@
+// Copyright 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.bigquery.storage.v1beta1;
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
+option java_outer_classname = "AvroProto";
+option java_package = "com.google.cloud.bigquery.storage.v1beta1";
+
+
+// Avro schema.
+message AvroSchema {
+  // Json serialized schema, as described at
+  // https://avro.apache.org/docs/1.8.1/spec.html
+  string schema = 1;
+}
+
+// Avro rows.
+message AvroRows {
+  // Binary serialized rows in a block.
+  bytes serialized_binary_rows = 1;
+
+  // The count of rows in the returning block.
+  int64 row_count = 2;
+}
diff --git a/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/read_options.proto b/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/read_options.proto
@@ -0,0 +1,39 @@
+// Copyright 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.bigquery.storage.v1beta1;
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
+option java_package = "com.google.cloud.bigquery.storage.v1beta1";
+
+
+// Options dictating how we read a table.
+message TableReadOptions {
+  // Optional. Names of the fields in the table that should be read. If empty,
+  // all fields will be read. If the specified field is a nested field, all the
+  // sub-fields in the field will be selected. The output field order is
+  // unrelated to the order of fields in selected_fields.
+  repeated string selected_fields = 1;
+
+  // Optional. SQL text filtering statement, similar to a WHERE clause in
+  // a query. Currently, we support combinations of predicates that are
+  // a comparison between a column and a constant value in SQL statement.
+  // Aggregates are not supported.
+  //
+  // Example: "a > DATE '2014-9-27' AND (b > 5 and C LIKE 'date')"
+  string row_restriction = 2;
+}
diff --git a/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/storage.proto b/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/storage.proto
@@ -0,0 +1,268 @@
+// Copyright 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.bigquery.storage.v1beta1;
+
+import "google/cloud/bigquery/storage/v1beta1/avro.proto";
+import "google/cloud/bigquery/storage/v1beta1/read_options.proto";
+import "google/cloud/bigquery/storage/v1beta1/table_reference.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
+option java_package = "com.google.cloud.bigquery.storage.v1beta1";
+
+
+// BigQuery storage API.
+//
+// The BigQuery storage API can be used to read data stored in BigQuery.
+service BigQueryStorage {
+  // Creates a new read session. A read session divides the contents of a
+  // BigQuery table into one or more streams, which can then be used to read
+  // data from the table. The read session also specifies properties of the
+  // data to be read, such as a list of columns or a push-down filter describing
+  // the rows to be returned.
+  //
+  // A particular row can be read by at most one stream. When the caller has
+  // reached the end of each stream in the session, then all the data in the
+  // table has been read.
+  //
+  // Read sessions automatically expire 24 hours after they are created and do
+  // not require manual clean-up by the caller.
+  rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
+  }
+
+  // Reads rows from the table in the format prescribed by the read session.
+  // Each response contains one or more table rows, up to a maximum of 10 MiB
+  // per response; read requests which attempt to read individual rows larger
+  // than this will fail.
+  //
+  // Each request also returns a set of stream statistics reflecting the
+  // estimated total number of rows in the read stream. This number is computed
+  // based on the total table size and the number of active streams in the read
+  // session, and may change as other streams continue to read data.
+  rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
+  }
+
+  // Creates additional streams for a ReadSession. This API can be used to
+  // dynamically adjust the parallelism of a batch processing task upwards by
+  // adding additional workers.
+  rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
+  }
+
+  // Triggers the graceful termination of a single stream in a ReadSession. This
+  // API can be used to dynamically adjust the parallelism of a batch processing
+  // task downwards without losing data.
+  //
+  // This API does not delete the stream -- it remains visible in the
+  // ReadSession, and any data processed by the stream is not released to other
+  // streams. However, no additional data will be assigned to the stream once
+  // this call completes. Callers must continue reading data on the stream until
+  // the end of the stream is reached so that data which has already been
+  // assigned to the stream will be processed.
+  //
+  // This method will return an error if there are no other live streams
+  // in the Session, or if SplitReadStream() has been called on the given
+  // Stream.
+  rpc FinalizeStream(FinalizeStreamRequest) returns (google.protobuf.Empty) {
+  }
+
+  // Splits a given read stream into two Streams. These streams are referred to
+  // as the primary and the residual of the split. The original stream can still
+  // be read from in the same manner as before. Both of the returned streams can
+  // also be read from, and the total rows return by both child streams will be
+  // the same as the rows read from the original stream.
+  //
+  // Moreover, the two child streams will be allocated back to back in the
+  // original Stream. Concretely, it is guaranteed that for streams Original,
+  // Primary, and Residual, that Original[0-j] = Primary[0-j] and
+  // Original[j-n] = Residual[0-m] once the streams have been read to
+  // completion.
+  //
+  // This method is guaranteed to be idempotent.
+  rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
+  }
+}
+
+// Information about a single data stream within a read session.
+message Stream {
+  // Name of the stream. In the form
+  // `/projects/{project_id}/stream/{stream_id}`
+  string name = 1;
+
+  // Rows in the stream.
+  int64 row_count = 2;
+}
+
+// Expresses a point within a given stream using an offset position.
+message StreamPosition {
+  // Identifier for a given Stream.
+  Stream stream = 1;
+
+  // Position in the stream.
+  int64 offset = 2;
+}
+
+// Information returned from a `CreateReadSession` request.
+message ReadSession {
+  // Unique identifier for the session. In the form
+  // `projects/{project_id}/sessions/{session_id}`
+  string name = 1;
+
+  // Time at which the session becomes invalid. After this time, subsequent
+  // requests to read this Session will return errors.
+  google.protobuf.Timestamp expire_time = 2;
+
+  // The schema for the read. If read_options.selected_fields is set, the
+  // schema may be different from the table schema as it will only contain
+  // the selected fields.
+  oneof schema {
+    // Avro schema.
+    AvroSchema avro_schema = 5;
+  }
+
+  // Streams associated with this session.
+  repeated Stream streams = 4;
+
+  // Table that this ReadSession is reading from.
+  TableReference table_reference = 7;
+
+  // Any modifiers which are applied when reading from the specified table.
+  TableModifiers table_modifiers = 8;
+}
+
+// Creates a new read session, which may include additional options such as
+// requested parallelism, projection filters and constraints.
+message CreateReadSessionRequest {
+  // Required. Reference to the table to read.
+  TableReference table_reference = 1;
+
+  // Required. String of the form "projects/your-project-id" indicating the
+  // project this ReadSession is associated with. This is the project that will
+  // be billed for usage.
+  string parent = 6;
+
+  // Optional. Any modifiers to the Table (e.g. snapshot timestamp).
+  TableModifiers table_modifiers = 2;
+
+  // Optional. Initial number of streams. If unset or 0, we will
+  // provide a value of streams so as to produce reasonable throughput. Must be
+  // non-negative. The number of streams may be lower than the requested number,
+  // depending on the amount parallelism that is reasonable for the table and
+  // the maximum amount of parallelism allowed by the system.
+  //
+  // Streams must be read starting from offset 0.
+  int32 requested_streams = 3;
+
+  // Optional. Read options for this session (e.g. column selection, filters).
+  TableReadOptions read_options = 4;
+
+  // Data output format. Currently default to Avro.
+  DataFormat format = 5;
+}
+
+// Requesting row data via `ReadRows` must provide Stream position information.
+message ReadRowsRequest {
+  // Required. Identifier of the position in the stream to start reading from.
+  // The offset requested must be less than the last row read from ReadRows.
+  // Requesting a larger offset is undefined.
+  StreamPosition read_position = 1;
+}
+
+// Progress information for a given Stream.
+message StreamStatus {
+  // Number of estimated rows in the current stream. May change over time as
+  // different readers in the stream progress at rates which are relatively fast
+  // or slow.
+  int64 estimated_row_count = 1;
+}
+
+// Information on if the current connection is being throttled.
+message ThrottleStatus {
+  // How much this connection is being throttled.
+  // 0 is no throttling, 100 is completely throttled.
+  int32 throttle_percent = 1;
+}
+
+// Response from calling `ReadRows` may include row data, progress and
+// throttling information.
+message ReadRowsResponse {
+  // Row data is returned in format specified during session creation.
+  oneof rows {
+    // Serialized row data in AVRO format.
+    AvroRows avro_rows = 3;
+  }
+
+  // Estimated stream statistics.
+  StreamStatus status = 2;
+
+  // Throttling status. If unset, the latest response still describes
+  // the current throttling status.
+  ThrottleStatus throttle_status = 5;
+}
+
+// Information needed to request additional streams for an established read
+// session.
+message BatchCreateReadSessionStreamsRequest {
+  // Required. Must be a non-expired session obtained from a call to
+  // CreateReadSession. Only the name field needs to be set.
+  ReadSession session = 1;
+
+  // Required. Number of new streams requested. Must be positive.
+  // Number of added streams may be less than this, see CreateReadSessionRequest
+  // for more information.
+  int32 requested_streams = 2;
+}
+
+// The response from `BatchCreateReadSessionStreams` returns the stream
+// identifiers for the newly created streams.
+message BatchCreateReadSessionStreamsResponse {
+  // Newly added streams.
+  repeated Stream streams = 1;
+}
+
+// Request information for invoking `FinalizeStream`.
+message FinalizeStreamRequest {
+  // Stream to finalize.
+  Stream stream = 2;
+}
+
+// Request information for `SplitReadStream`.
+message SplitReadStreamRequest {
+  // Stream to split.
+  Stream original_stream = 1;
+}
+
+// Response from `SplitReadStream`.
+message SplitReadStreamResponse {
+  // Primary stream. Will contain the beginning portion of
+  // |original_stream|.
+  Stream primary_stream = 1;
+
+  // Remainder stream. Will contain the tail of |original_stream|.
+  Stream remainder_stream = 2;
+}
+
+// Data format for input or output data.
+enum DataFormat {
+  // Data format is unspecified.
+  DATA_FORMAT_UNSPECIFIED = 0;
+
+  // Avro is a standard open source row based file format.
+  // See https://avro.apache.org/ for more details.
+  AVRO = 1;
+}
diff --git a/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto b/packages/google-cloud-bigquery-storage/google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto
@@ -0,0 +1,43 @@
+// Copyright 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.bigquery.storage.v1beta1;
+
+import "google/protobuf/timestamp.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
+option java_outer_classname = "TableReferenceProto";
+option java_package = "com.google.cloud.bigquery.storage.v1beta1";
+
+
+// Table reference that includes just the 3 strings needed to identify a table.
+message TableReference {
+  // The assigned project ID of the project.
+  string project_id = 1;
+
+  // The ID of the dataset in the above project.
+  string dataset_id = 2;
+
+  // The ID of the table in the above dataset.
+  string table_id = 3;
+}
+
+// All fields in this message optional.
+message TableModifiers {
+  // The snapshot time of the table. If not set, interpreted as now.
+  google.protobuf.Timestamp snapshot_time = 1;
+}
diff --git a/packages/google-cloud-bigquery-storage/synth.metadata b/packages/google-cloud-bigquery-storage/synth.metadata
diff --git a/packages/google-cloud-bigquery-storage/synth.py b/packages/google-cloud-bigquery-storage/synth.py