Google cloud spanner client fails when reading large columns #3998
Closed
Description
Consider following table:
CREATE TABLE big_arrays (
key INT64 NOT NULL,
a_big_array ARRAY<TIMESTAMP> NOT NULL,
) PRIMARY KEY (key)
Which gets populated by 100 rows each containing a large array of 1000 timestamps.
from google.cloud import spanner
import datetime
import random
spanner_client = spanner.Client(project='my-project')
instance = spanner_client.instance('my-instance')
db = instance.database('my-db')
def random_timestamp():
return datetime.datetime.utcfromtimestamp(random.uniform(1e9, 2e9))
def random_big_array(size=100):
return [random_timestamp() for i in range(1000)]
batch_size = 10
batches = 10
for batch_index in range(batches):
with db.batch() as batch:
batch.insert_or_update(
table='big_arrays',
columns=['key', 'a_big_array'],
values = [(key, random_big_array()) for key in range(batch_index*batch_size, (batch_index+1)*batch_size)]
)
Trying to fetch these rows with db.execute_sql('select * from big_arrays limit 100').consume_all()
results in following error and stacktrace:
Traceback (most recent call last):
File "Untitled5.py", line 60, in <module>
db.execute_sql("SELECT * FROM big_arrays limit 100").consume_all()
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 159, in consume_all
self.consume_next()
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 148, in consume_next
values[0] = self._merge_chunk(values[0])
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 108, in _merge_chunk
merged = _merge_by_type(self._pending_chunk, value, field.type)
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 272, in _merge_by_type
return merger(lhs, rhs, type_)
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 233, in _merge_array
merged = _merge_by_type(last, first, element_type)
File "/Users/myname/Repos/transformations/venv/lib/python2.7/site-packages/google/cloud/spanner/streamed.py", line 271, in _merge_by_type
merger = _MERGE_BY_TYPE[type_.code]
KeyError: 4
Running the above query with the gcloud cli works as intended
gcloud spanner databases execute-sql my-db --instance=my-instance --sql='select * from big_arrays limit 100' >/dev/null