Skip to content

chore: update benchmark to better compare performance #157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions create_test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def create_test_instance():

instance = CLIENT.instance(instance_id, instance_config, labels=labels)


try:
created_op = instance.create()
created_op.result(1800) # block until completion
Expand Down
101 changes: 76 additions & 25 deletions test/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@
A test suite to check Spanner dialect for SQLAlchemy performance
in comparison with the original Spanner client.
"""
import base64
import datetime
import random
from scipy.stats import sem
import statistics
import time

from google.api_core.exceptions import Aborted
from google.api_core.exceptions import NotFound
from google.cloud import spanner
from google.cloud import spanner_dbapi
from google.cloud.spanner_v1 import Client, KeySet
from sqlalchemy import (
Expand Down Expand Up @@ -64,26 +67,42 @@ class BenchmarkTestBase:
Organizes testing data preparation and cleanup.
"""

_many_rows_ids = []
_many_rows2_ids = []

def __init__(self):
self._cleanup()
self._create_table()

self._one_row = (
1,
"Pete",
"Allison",
datetime.datetime(1998, 10, 6).strftime("%Y-%m-%d"),
b"123",
)
self._one_row = {
"id": 1,
"first_name": "Pete",
"last_name": "Allison",
"birth_date": datetime.date(1998, 10, 6),
"picture": b"123",
}
self.keys = set([1])
if not self._many_rows_ids:
for i in range(99):
self._many_rows_ids.append(self._generate_id())
self._many_rows2_ids.append(self._generate_id())

def _cleanup(self):
"""Drop the test table."""
conn = spanner_dbapi.connect(INSTANCE, DATABASE)
conn.database.update_ddl(["DROP TABLE Singers"])
try:
conn.database.update_ddl(["DROP TABLE Singers"])
except NotFound:
pass
conn.close()

def _create_table(self):
"""Create a table for performace testing."""
conn = spanner_dbapi.connect(INSTANCE, DATABASE)
try:
conn.database.update_ddl(["DROP TABLE Singers"])
except NotFound:
pass
conn.database.update_ddl(
[
"""
Expand All @@ -96,10 +115,17 @@ def _create_table(self):
) PRIMARY KEY (id)
"""
]
).result(120)
).result()

conn.close()

def _generate_id(self):
num = 1
while num in self.keys:
num = round(random.random() * 1000000)
self.keys.add(num)
return num

def run(self):
"""Execute every test case."""
measures = {}
Expand All @@ -117,7 +143,7 @@ def run(self):


class SpannerBenchmarkTest(BenchmarkTestBase):
"""The original Spanner performace testing class."""
"""The original Spanner performance testing class."""

def __init__(self):
super().__init__()
Expand All @@ -127,12 +153,20 @@ def __init__(self):

self._many_rows = []
self._many_rows2 = []
birth_date = datetime.datetime(1998, 10, 6).strftime("%Y-%m-%d")
for i in range(99):
num = round(random.random() * 1000000)
self._many_rows.append((num, "Pete", "Allison", birth_date, b"123"))
num2 = round(random.random() * 1000000)
self._many_rows2.append((num2, "Pete", "Allison", birth_date, b"123"))
birth_date = datetime.date(1998, 10, 6)
picture = base64.b64encode(u"123".encode())
for num in self._many_rows_ids:
self._many_rows.append(
{
"id": num,
"first_name": "Pete",
"last_name": "Allison",
"birth_date": birth_date,
"picture": picture,
}
)
for num in self._many_rows2_ids:
self._many_rows2.append((num, "Pete", "Allison", birth_date, picture))

# initiate a session
with self._database.snapshot():
Expand Down Expand Up @@ -192,9 +226,8 @@ def __init__(self):

self._many_rows = []
self._many_rows2 = []
birth_date = datetime.datetime(1998, 10, 6).strftime("%Y-%m-%d")
for i in range(99):
num = round(random.random() * 1000000)
birth_date = datetime.date(1998, 10, 6)
for num in self._many_rows_ids:
self._many_rows.append(
{
"id": num,
Expand All @@ -204,10 +237,10 @@ def __init__(self):
"picture": b"123",
}
)
num2 = round(random.random() * 1000000)
for num in self._many_rows2_ids:
self._many_rows2.append(
{
"id": num2,
"id": num,
"first_name": "Pete",
"last_name": "Allison",
"birth_date": birth_date,
Expand Down Expand Up @@ -255,8 +288,16 @@ def insert_one_row(transaction, one_row):
Inserts a single row into a database and then fetches it back.
"""
transaction.execute_update(
"INSERT Singers (id, first_name, last_name, birth_date, picture) "
" VALUES {}".format(str(one_row))
"INSERT INTO `Singers` (id, first_name, last_name, birth_date, picture)"
" VALUES (@id, @first_name, @last_name, @birth_date, @picture)",
params=one_row,
param_types={
"id": spanner.param_types.INT64,
"first_name": spanner.param_types.STRING,
"last_name": spanner.param_types.STRING,
"birth_date": spanner.param_types.DATE,
"picture": spanner.param_types.BYTES,
},
)
last_name = transaction.execute_sql(
"SELECT last_name FROM Singers WHERE id=1"
Expand All @@ -273,8 +314,18 @@ def insert_many_rows(transaction, many_rows):
statements = []
for row in many_rows:
statements.append(
"INSERT Singers (id, first_name, last_name, birth_date, picture) "
" VALUES {}".format(str(row))
(
"INSERT INTO `Singers` (id, first_name, last_name, birth_date, picture)"
" VALUES (@id, @first_name, @last_name, @birth_date, @picture)",
row,
{
"id": spanner.param_types.INT64,
"first_name": spanner.param_types.STRING,
"last_name": spanner.param_types.STRING,
"birth_date": spanner.param_types.DATE,
"picture": spanner.param_types.BYTES,
},
)
)
_, count = transaction.batch_update(statements)
if sum(count) != 99:
Expand Down