Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly model JSON repr of complex nested records. #2787

Merged
merged 2 commits into from
Dec 2, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ def _record_from_json(value, field):
"""Coerce 'value' to a mapping, if set or not nullable."""
if _not_null(value, field):
record = {}
for subfield, cell in zip(field.fields, value['f']):
record_iter = zip(field.fields, value['f'])
for subfield, cell in record_iter:
converter = _CELLDATA_FROM_JSON[subfield.field_type]
if field.mode == 'REPEATED':
value = [converter(item, subfield) for item in cell['v']]
if subfield.mode == 'REPEATED':

This comment was marked as spam.

value = [converter(item['v'], subfield) for item in cell['v']]
else:
value = converter(cell['v'], subfield)
record[subfield.name] = value
Expand Down Expand Up @@ -103,7 +104,7 @@ def _row_from_json(row, schema):
for field, cell in zip(schema, row['f']):
converter = _CELLDATA_FROM_JSON[field.field_type]
if field.mode == 'REPEATED':
row_data.append([converter(item, field)
row_data.append([converter(item['v'], field)
for item in cell['v']])
else:
row_data.append(converter(cell['v'], field))
Expand Down
97 changes: 94 additions & 3 deletions bigquery/unit_tests/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_w_scalar_subfield(self):
def test_w_repeated_subfield(self):
subfield = _Field('REPEATED', 'color', 'STRING')
field = _Field('REQUIRED', fields=[subfield])
value = {'f': [{'v': ['red', 'yellow', 'blue']}]}
value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}

This comment was marked as spam.

This comment was marked as spam.

coerced = self._call_fut(value, field)
self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})

Expand Down Expand Up @@ -234,6 +234,97 @@ def test_w_string_value(self):
self.assertEqual(coerced, 'Wonderful!')


class Test_row_from_json(unittest.TestCase):

def _call_fut(self, row, schema):
from google.cloud.bigquery._helpers import _row_from_json
return _row_from_json(row, schema)

def test_w_single_scalar_column(self):
# SELECT 1 AS col
col = _Field('REQUIRED', 'col', 'INTEGER')
row = {u'f': [{u'v': u'1'}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_single_struct_column(self):
# SELECT (1, 2) AS col
sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
self.assertEqual(self._call_fut(row, schema=[col]),
({'sub_1': 1, 'sub_2': 2},))

def test_w_single_array_column(self):
# SELECT [1, 2, 3] as col
col = _Field('REPEATED', 'col', 'INTEGER')
row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
self.assertEqual(self._call_fut(row, schema=[col]),
([1, 2, 3],))

def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REPEATED', 'third', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD',
fields=[first, second, third])
row = {
u'f': [
{u'v': {
u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}]},
{u'v': u'3'},
{u'v': [{u'v': u'4'}, {u'v': u'5'}]}
]
}},
]
}
self.assertEqual(
self._call_fut(row, schema=[col]),
({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))

def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field('REQUIRED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REQUIRED', 'third', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD',
fields=[first, second, third])
row = {u'f': [{u'v': [
{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
{u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': 1, u'second': 2, u'third': 3},
{u'first': 4, u'second': 5, u'third': 6},
],))

def test_w_array_of_struct_w_array(self):
# SELECT [([1, 2, 3], 4), ([5, 6], 7)]
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
row = {u'f': [{u'v': [
{u'v': {u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
{u'v': u'4'}
]}},
{u'v': {u'f': [
{u'v': [{u'v': u'5'}, {u'v': u'6'}]},
{u'v': u'7'}
]}}
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': [1, 2, 3], u'second': 4},
{u'first': [5, 6], u'second': 7},
],))


class Test_rows_from_json(unittest.TestCase):

def _call_fut(self, value, field):
Expand All @@ -253,12 +344,12 @@ def test_w_record_subfield(self):
{'f': [
{'v': 'Phred Phlyntstone'},
{'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
{'v': ['orange', 'black']},
{'v': [{'v': 'orange'}, {'v': 'black'}]},
]},
{'f': [
{'v': 'Bharney Rhubble'},
{'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
{'v': ['brown']},
{'v': [{'v': 'brown'}]},
]},
{'f': [
{'v': 'Wylma Phlyntstone'},
Expand Down
15 changes: 10 additions & 5 deletions bigquery/unit_tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self):
'pageToken': TOKEN,
'rows': [
{'f': [
{'v': ['red', 'green']},
{'v': [{'f': [{'v': ['1', '2']},
{'v': ['3.1415', '1.414']}]}]},
{'v': [{'v': 'red'}, {'v': 'green'}]},
{'v': [{
'v': {
'f': [
{'v': [{'v': '1'}, {'v': '2'}]},
{'v': [{'v': '3.1415'}, {'v': '1.414'}]},
]}
}]},
]},
]
}
conn = _Connection(DATA)
client = _Client(project=self.PROJECT, connection=conn)
dataset = _Dataset(client)
full_name = SchemaField('color', 'STRING', mode='REPEATED')
color = SchemaField('color', 'STRING', mode='REPEATED')
index = SchemaField('index', 'INTEGER', 'REPEATED')
score = SchemaField('score', 'FLOAT', 'REPEATED')
struct = SchemaField('struct', 'RECORD', mode='REPEATED',
fields=[index, score])
table = self._make_one(self.TABLE_NAME, dataset=dataset,
schema=[full_name, struct])
schema=[color, struct])

iterator = table.fetch_data()
page = six.next(iterator.pages)
Expand Down
46 changes: 46 additions & 0 deletions system_tests/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,3 +478,49 @@ def _job_done(instance):
# them here. The best we can do is not that the API call didn't
# raise an error, and that the job completed (in the `retry()`
# above).

def test_sync_query_w_nested_arrays_and_structs(self):
EXAMPLES = [
{
'sql': 'SELECT 1',
'expected': 1,
},
{
'sql': 'SELECT (1, 2)',
'expected': {'_field_1': 1, '_field_2': 2},
},
{
'sql': 'SELECT [1, 2, 3]',
'expected': [1, 2, 3],
},
{
'sql': 'SELECT ([1, 2], 3, [4, 5])',
'expected':
{'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]},
},
{
'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]',
'expected': [
{'_field_1': 1, '_field_2': 2, '_field_3': 3},
{'_field_1': 4, '_field_2': 5, '_field_3': 6},
],
},
{
'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]',
'expected': [
{u'_field_1': [1, 2, 3], u'_field_2': 4},
{u'_field_1': [5, 6], u'_field_2': 7},
],
},
{
'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))',
'expected': [{u'_field_1': [1, 2]}],
},
]
for example in EXAMPLES:
query = Config.CLIENT.run_sync_query(example['sql'])
query.use_legacy_sql = False
query.run()
self.assertEqual(len(query.rows), 1)
self.assertEqual(len(query.rows[0]), 1)
self.assertEqual(query.rows[0][0], example['expected'])