Skip to content

Commit

Permalink
Call get_table in list_rows if the schema is not available (#7621)
Browse files Browse the repository at this point in the history
This is kinder than raising an error message saying to call get_table
yourself. Also, it guarantees the schema is as up-to-date as possible.

This also fixes an issue where rows could not be listed on the
TableListItem objects that are returned from list_tables.
  • Loading branch information
tswast authored Apr 1, 2019
1 parent 28ad1a8 commit 019e905
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 20 deletions.
23 changes: 12 additions & 11 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1694,15 +1694,19 @@ def list_rows(
Args:
table (Union[ \
:class:`~google.cloud.bigquery.table.Table`, \
:class:`~google.cloud.bigquery.table.TableListItem`, \
:class:`~google.cloud.bigquery.table.TableReference`, \
str, \
]):
The table to list, or a reference to it.
The table to list, or a reference to it. When the table
object does not contain a schema and ``selected_fields`` is
not supplied, this method calls ``get_table`` to fetch the
table schema.
selected_fields (Sequence[ \
:class:`~google.cloud.bigquery.schema.SchemaField` \
]):
The fields to return. Required if ``table`` is a
:class:`~google.cloud.bigquery.table.TableReference`.
The fields to return. If not supplied, data for all columns
are downloaded.
max_results (int):
(Optional) maximum number of rows to return.
page_token (str):
Expand Down Expand Up @@ -1741,14 +1745,11 @@ def list_rows(
if selected_fields is not None:
schema = selected_fields

# Allow listing rows of an empty table by not raising if the table exists.
elif len(schema) == 0 and table.created is None:
raise ValueError(
(
"Could not determine schema for table '{}'. Call client.get_table() "
"or pass in a list of schema fields to the selected_fields argument."
).format(table)
)
# No schema, but no selected_fields. Assume the developer wants all
# columns, so get the table resource for them rather than failing.
elif len(schema) == 0:
table = self.get_table(table.reference, retry=retry)
schema = table.schema

params = {}
if selected_fields is not None:
Expand Down
4 changes: 4 additions & 0 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1752,5 +1752,9 @@ def _table_arg_to_table(value, default_project=None):
value = TableReference.from_string(value, default_project=default_project)
if isinstance(value, TableReference):
value = Table(value)
if isinstance(value, TableListItem):
newvalue = Table(value.reference)
newvalue._properties = value._properties
value = newvalue

return value
70 changes: 61 additions & 9 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4276,20 +4276,72 @@ def test_list_rows_w_record_schema(self):
method="GET", path="/%s" % PATH, query_params={}
)

def test_list_rows_errors(self):
from google.cloud.bigquery.table import Table
def test_list_rows_with_missing_schema(self):
from google.cloud.bigquery.table import Table, TableListItem

table_path = "/projects/{}/datasets/{}/tables/{}".format(
self.PROJECT, self.DS_ID, self.TABLE_ID
)
tabledata_path = "{}/data".format(table_path)

table_list_item_data = {
"id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID),
"tableReference": {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
}
table_data = copy.deepcopy(table_list_item_data)
# Intentionally make wrong, since total_rows can update during iteration.
table_data["numRows"] = 2
table_data["schema"] = {
"fields": [
{"name": "name", "type": "STRING"},
{"name": "age", "type": "INTEGER"},
]
}
rows_data = {
"totalRows": 3,
"pageToken": None,
"rows": [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "31"}]},
{"f": [{"v": "Wylma Phlyntstone"}, {"v": None}]},
],
}

creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)

# table ref with no selected fields
with self.assertRaises(ValueError):
client.list_rows(self.TABLE_REF)
schemaless_tables = (
"{}.{}".format(self.DS_ID, self.TABLE_ID),
self.TABLE_REF,
Table(self.TABLE_REF),
TableListItem(table_list_item_data),
)

# table with no schema
with self.assertRaises(ValueError):
client.list_rows(Table(self.TABLE_REF))
for table in schemaless_tables:
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
conn = client._connection = _make_connection(table_data, rows_data)

row_iter = client.list_rows(table)

conn.api_request.assert_called_once_with(method="GET", path=table_path)
conn.api_request.reset_mock()
self.assertIsNone(row_iter.total_rows, msg=repr(table))

rows = list(row_iter)
conn.api_request.assert_called_once_with(
method="GET", path=tabledata_path, query_params={}
)
self.assertEqual(row_iter.total_rows, 3, msg=repr(table))
self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table))
self.assertEqual(rows[1].age, 31, msg=repr(table))
self.assertIsNone(rows[2].age, msg=repr(table))

def test_list_rows_error(self):
client = self._make_one()

# neither Table nor tableReference
with self.assertRaises(TypeError):
Expand Down

0 comments on commit 019e905

Please sign in to comment.