Skip to content

Commit d8a9225

Browse files
authored
BigQuery: add sample for bigquerystorage API. (GoogleCloudPlatform#1342)
* BigQuery: add sample for bigquerystorage API.
1 parent 944b26f commit d8a9225

File tree

5 files changed

+322
-0
lines changed

5 files changed

+322
-0
lines changed

bigquery/bigquerystorage/README.md

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Getting Started with the BigQuery Storage API
2+
3+
<a href="https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/java-docs-samples&page=editor&open_in_editor=bigquery/bigquerystorage/README.md">
4+
<img alt="Open in Cloud Shell" src ="http://gstatic.com/cloudssh/images/open-btn.png"></a>
5+
6+
Google's BigQuery Service features a Storage API for performing reads of BigQuery-managed data at
7+
scale. This sample demonstrates using the API to read a sample table from the BigQuery public
8+
datasets, projecting a subset of columns, and filtering that data on the server side.
9+
10+
## Quickstart
11+
12+
Install [Maven](http://maven.apache.org/).
13+
14+
Build your project with:
15+
16+
mvn clean package -DskipTests
17+
18+
You can then run a given `ClassName` via:
19+
20+
mvn exec:java -Dexec.mainClass=com.example.bigquery.ClassName \
21+
-Dexec.args="any arguments to the app"
22+
23+
### Reading a Table with the BigQuery Storage API
24+
25+
mvn exec:java -Dexec.mainClass=com.example.bigquerystorage.BigQueryStorage \
26+
-Dexec.args="project-id"
27+
28+
## Testing
29+
30+
To run the tests for this sample, first set the `GOOGLE_CLOUD_PROJECT`
31+
environment variable.
32+
33+
export GOOGLE_CLOUD_PROJECT=my-project
34+
35+
Then run the tests with Maven.
36+
37+
mvn clean verify

bigquery/bigquerystorage/pom.xml

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<!--
2+
Copyright 2019 Google LLC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
-->
16+
<project>
17+
<modelVersion>4.0.0</modelVersion>
18+
<groupId>com.example.bigquerystorage</groupId>
19+
<artifactId>bigquery-storage-google-cloud-samples</artifactId>
20+
<packaging>jar</packaging>
21+
22+
<!--
23+
The parent pom defines common style checks and testing strategies for our samples.
24+
Removing or replacing it should not affect the execution of the samples in anyway.
25+
-->
26+
<parent>
27+
<groupId>com.google.cloud.samples</groupId>
28+
<artifactId>shared-configuration</artifactId>
29+
<version>1.0.10</version>
30+
</parent>
31+
32+
<properties>
33+
<maven.compiler.target>1.8</maven.compiler.target>
34+
<maven.compiler.source>1.8</maven.compiler.source>
35+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
36+
</properties>
37+
38+
<dependencies>
39+
<dependency>
40+
<groupId>com.google.cloud</groupId>
41+
<artifactId>google-cloud-bigquerystorage</artifactId>
42+
<version>0.80.0-alpha</version>
43+
</dependency>
44+
45+
<dependency>
46+
<groupId>org.apache.avro</groupId>
47+
<artifactId>avro</artifactId>
48+
<version>1.8.2</version>
49+
</dependency>
50+
51+
<!-- Test dependencies -->
52+
<dependency>
53+
<groupId>junit</groupId>
54+
<artifactId>junit</artifactId>
55+
<version>4.13-beta-2</version>
56+
<scope>test</scope>
57+
</dependency>
58+
<dependency>
59+
<groupId>com.google.truth</groupId>
60+
<artifactId>truth</artifactId>
61+
<version>0.42</version>
62+
<scope>test</scope>
63+
</dependency>
64+
</dependencies>
65+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquerystorage;
18+
19+
// [START bigquerystorage_quickstart]
20+
21+
import com.google.api.gax.rpc.ServerStream;
22+
import com.google.cloud.bigquery.storage.v1beta1.AvroProto.AvroRows;
23+
import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient;
24+
import com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions;
25+
import com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest;
26+
import com.google.cloud.bigquery.storage.v1beta1.Storage.DataFormat;
27+
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest;
28+
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse;
29+
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession;
30+
import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition;
31+
import com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableModifiers;
32+
import com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableReference;
33+
import com.google.common.base.Preconditions;
34+
import com.google.protobuf.Timestamp;
35+
import java.io.IOException;
36+
import org.apache.avro.Schema;
37+
import org.apache.avro.generic.GenericDatumReader;
38+
import org.apache.avro.generic.GenericRecord;
39+
import org.apache.avro.io.BinaryDecoder;
40+
import org.apache.avro.io.DatumReader;
41+
import org.apache.avro.io.DecoderFactory;
42+
43+
44+
public class StorageSample {
45+
46+
/*
47+
* SimpleRowReader handles deserialization of the Avro-encoded row blocks transmitted
48+
* from the storage API using a generic datum decoder.
49+
*/
50+
private static class SimpleRowReader {
51+
52+
private final DatumReader<GenericRecord> datumReader;
53+
54+
// Decoder object will be reused to avoid re-allocation and too much garbage collection.
55+
private BinaryDecoder decoder = null;
56+
57+
// GenericRecord object will be reused.
58+
private GenericRecord row = null;
59+
60+
public SimpleRowReader(Schema schema) {
61+
Preconditions.checkNotNull(schema);
62+
datumReader = new GenericDatumReader<>(schema);
63+
}
64+
65+
/**
66+
* Sample method for processing AVRO rows which only validates decoding.
67+
*
68+
* @param avroRows object returned from the ReadRowsResponse.
69+
*/
70+
public void processRows(AvroRows avroRows) throws IOException {
71+
decoder = DecoderFactory.get()
72+
.binaryDecoder(avroRows.getSerializedBinaryRows().toByteArray(), decoder);
73+
74+
while (!decoder.isEnd()) {
75+
// Reusing object row
76+
row = datumReader.read(row, decoder);
77+
System.out.println(row.toString());
78+
}
79+
}
80+
}
81+
82+
public static void main(String... args) throws Exception {
83+
// Sets your Google Cloud Platform project ID.
84+
// String projectId = "YOUR_PROJECT_ID";
85+
String projectId = args[0];
86+
Integer snapshotMillis = null;
87+
if (args.length > 1) {
88+
snapshotMillis = Integer.parseInt(args[1]);
89+
}
90+
91+
try (BigQueryStorageClient client = BigQueryStorageClient.create()) {
92+
String parent = String.format("projects/%s", projectId);
93+
94+
// This example uses baby name data from the public datasets.
95+
TableReference tableReference = TableReference.newBuilder()
96+
.setProjectId("bigquery-public-data")
97+
.setDatasetId("usa_names")
98+
.setTableId("usa_1910_current")
99+
.build();
100+
101+
// We specify the columns to be projected by adding them to the selected fields,
102+
// and set a simple filter to restrict which rows are transmitted.
103+
TableReadOptions options = TableReadOptions.newBuilder()
104+
.addSelectedFields("name")
105+
.addSelectedFields("number")
106+
.addSelectedFields("state")
107+
.setRowRestriction("state = \"WA\"")
108+
.build();
109+
110+
// Begin building the session request.
111+
CreateReadSessionRequest.Builder builder = CreateReadSessionRequest.newBuilder()
112+
.setParent(parent)
113+
.setTableReference(tableReference)
114+
.setReadOptions(options)
115+
.setRequestedStreams(1)
116+
.setFormat(DataFormat.AVRO);
117+
118+
// Optionally specify the snapshot time. When unspecified, snapshot time is "now".
119+
if (snapshotMillis != null) {
120+
Timestamp t = Timestamp.newBuilder()
121+
.setSeconds(snapshotMillis / 1000)
122+
.setNanos((int) ((snapshotMillis % 1000) * 1000000))
123+
.build();
124+
TableModifiers modifiers = TableModifiers.newBuilder()
125+
.setSnapshotTime(t)
126+
.build();
127+
builder.setTableModifiers(modifiers);
128+
}
129+
130+
// Request the session creation.
131+
ReadSession session = client.createReadSession(builder.build());
132+
133+
SimpleRowReader reader = new SimpleRowReader(
134+
new Schema.Parser().parse(session.getAvroSchema().getSchema()));
135+
136+
// Assert that there are streams available in the session. An empty table may not have
137+
// data available. If no sessions are available for an anonymous (cached) table, consider
138+
// writing results of a query to a named table rather than consuming cached results directly.
139+
Preconditions.checkState(session.getStreamsCount() > 0);
140+
141+
// Use the first stream to perform reading.
142+
StreamPosition readPosition = StreamPosition.newBuilder()
143+
.setStream(session.getStreams(0))
144+
.build();
145+
146+
ReadRowsRequest readRowsRequest = ReadRowsRequest.newBuilder()
147+
.setReadPosition(readPosition)
148+
.build();
149+
150+
// Process each block of rows as they arrive and decode using our simple row reader.
151+
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
152+
for (ReadRowsResponse response : stream) {
153+
Preconditions.checkState(response.hasAvroRows());
154+
reader.processRows(response.getAvroRows());
155+
}
156+
client.close();
157+
}
158+
}
159+
}
160+
161+
// [END bigquerystorage_quickstart]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright 2019 Google Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquerystorage;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
21+
import java.io.ByteArrayOutputStream;
22+
import java.io.PrintStream;
23+
import org.junit.After;
24+
import org.junit.Before;
25+
import org.junit.Test;
26+
import org.junit.runner.RunWith;
27+
import org.junit.runners.JUnit4;
28+
29+
/** Tests for quickstart sample. */
30+
@RunWith(JUnit4.class)
31+
@SuppressWarnings("checkstyle:abbreviationaswordinname")
32+
public class QuickstartSampleIT {
33+
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
34+
35+
private ByteArrayOutputStream bout;
36+
private PrintStream out;
37+
38+
@Before
39+
public void setUp() {
40+
bout = new ByteArrayOutputStream();
41+
out = new PrintStream(bout);
42+
System.setOut(out);
43+
}
44+
45+
@After
46+
public void tearDown() {
47+
System.setOut(null);
48+
}
49+
50+
@Test
51+
public void testQuickstart() throws Exception {
52+
StorageSample.main(PROJECT_ID);
53+
String got = bout.toString();
54+
// Ensure at least 1k of output generated and a specific token was present in the output.
55+
assertThat(bout.size()).isGreaterThan(1024);
56+
assertThat(got).contains("Zayvion");
57+
}
58+
}

pom.xml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
<module>bigquery/cloud-client</module>
4646
<module>bigquery/datatransfer/cloud-client</module>
4747
<module>bigquery/rest</module>
48+
<module>bigquery/bigquerystorage</module>
4849

4950
<module>cdn/signed-urls</module>
5051

0 commit comments

Comments
 (0)