Skip to content

Commit

Permalink
bigquery: add simple benchmark (#4273)
Browse files Browse the repository at this point in the history
  • Loading branch information
pongad authored and lukesneeringer committed Oct 27, 2017
1 parent 27afed5 commit 4834b3c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 0 deletions.
8 changes: 8 additions & 0 deletions bigquery/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# BigQuery Benchmark
This directory contains benchmarks for BigQuery client.

## Usage
`python benchmark.py queries.json`

BigQuery service caches requests so the benchmark should be run
at least twice, disregarding the first result.
32 changes: 32 additions & 0 deletions bigquery/benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from google.cloud import bigquery
from datetime import datetime
import json
import sys

if len(sys.argv) < 2:
raise Exception('need query file, usage: python {0} <queries.json>'.format(sys.argv[0]))

with open(sys.argv[1], 'r') as f:
queries = json.loads(f.read())

client = bigquery.Client()

for query in queries:
start_time = datetime.now()
job = client.query(query)
rows = job.result()

num_rows = 0
num_cols = None
first_byte_time = None

for row in rows:
if num_rows == 0:
num_cols = len(row)
first_byte_time = datetime.now() - start_time
elif num_cols != len(row):
raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols))
num_rows += 1
total_time = datetime.now() - start_time
print "query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec"\
.format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds())
10 changes: 10 additions & 0 deletions bigquery/benchmark/queries.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000",
"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000",
"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000",
"SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000",
"SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id",
"SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId",
"SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000",
"SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000"
]

0 comments on commit 4834b3c

Please sign in to comment.