Skip to content

Commit

Permalink
Fix schema recognition of struct field types (googleapis#9001)
Browse files Browse the repository at this point in the history
* Fix schema recognition of struct field types

A struct field can be referred to as "RECORD" or "STRUCT", and this
commit assures that the to_api_repr() logic is correct.

* Mark STRUCT_TYPES as private in schema.py
  • Loading branch information
plamut authored and emar-kar committed Sep 18, 2019
1 parent 0f2d377 commit 8f7e610
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
5 changes: 2 additions & 3 deletions bigquery/google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
"please install google-cloud-bigquery-storage to use bqstorage features."
)

STRUCT_TYPES = ("RECORD", "STRUCT")
_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.


Expand Down Expand Up @@ -126,7 +125,7 @@ def bq_to_arrow_data_type(field):
return pyarrow.list_(inner_type)
return None

if field.field_type.upper() in STRUCT_TYPES:
if field.field_type.upper() in schema._STRUCT_TYPES:
return bq_to_arrow_struct_data_type(field)

data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper())
Expand Down Expand Up @@ -168,7 +167,7 @@ def bq_to_arrow_array(series, bq_field):
arrow_type = bq_to_arrow_data_type(bq_field)
if bq_field.mode.upper() == "REPEATED":
return pyarrow.ListArray.from_pandas(series, type=arrow_type)
if bq_field.field_type.upper() in STRUCT_TYPES:
if bq_field.field_type.upper() in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
return pyarrow.array(series, type=arrow_type)

Expand Down
4 changes: 3 additions & 1 deletion bigquery/google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from google.cloud.bigquery_v2 import types


_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
Expand Down Expand Up @@ -150,7 +152,7 @@ def to_api_repr(self):

# If this is a RECORD type, then sub-fields are also included,
# add this to the serialized representation.
if self.field_type.upper() == "RECORD":
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]

# Done; return the serialized dictionary.
Expand Down
39 changes: 20 additions & 19 deletions bigquery/tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,25 +71,26 @@ def test_to_api_repr(self):
)

def test_to_api_repr_with_subfield(self):
subfield = self._make_one("bar", "INTEGER", "NULLABLE")
field = self._make_one("foo", "RECORD", "REQUIRED", fields=(subfield,))
self.assertEqual(
field.to_api_repr(),
{
"fields": [
{
"mode": "NULLABLE",
"name": "bar",
"type": "INTEGER",
"description": None,
}
],
"mode": "REQUIRED",
"name": "foo",
"type": "RECORD",
"description": None,
},
)
for record_type in ("RECORD", "STRUCT"):
subfield = self._make_one("bar", "INTEGER", "NULLABLE")
field = self._make_one("foo", record_type, "REQUIRED", fields=(subfield,))
self.assertEqual(
field.to_api_repr(),
{
"fields": [
{
"mode": "NULLABLE",
"name": "bar",
"type": "INTEGER",
"description": None,
}
],
"mode": "REQUIRED",
"name": "foo",
"type": record_type,
"description": None,
},
)

def test_from_api_repr(self):
field = self._get_target_class().from_api_repr(
Expand Down

0 comments on commit 8f7e610

Please sign in to comment.