Call get_table in list_rows if the schema is not available (#7621)

This is kinder than raising an error message saying to call get_table yourself. Also, it guarantees the schema is as up-to-date as possible. This also fixes an issue where rows could not be listed on the TableListItem objects that are returned from list_tables.
googleapis · Apr 1, 2019 · 019e905 · 019e905
1 parent 28ad1a8
commit 019e905
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 20 deletions.
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -1694,15 +1694,19 @@ def list_rows(
         Args:
             table (Union[ \
                 :class:`~google.cloud.bigquery.table.Table`, \
+                :class:`~google.cloud.bigquery.table.TableListItem`, \
                 :class:`~google.cloud.bigquery.table.TableReference`, \
                 str, \
             ]):
-                The table to list, or a reference to it.
+                The table to list, or a reference to it. When the table
+                object does not contain a schema and ``selected_fields`` is
+                not supplied, this method calls ``get_table`` to fetch the
+                table schema.
             selected_fields (Sequence[ \
                 :class:`~google.cloud.bigquery.schema.SchemaField` \
             ]):
-                The fields to return. Required if ``table`` is a
-                :class:`~google.cloud.bigquery.table.TableReference`.
+                The fields to return. If not supplied, data for all columns
+                are downloaded.
             max_results (int):
                 (Optional) maximum number of rows to return.
             page_token (str):
@@ -1741,14 +1745,11 @@ def list_rows(
         if selected_fields is not None:
             schema = selected_fields
 
-        # Allow listing rows of an empty table by not raising if the table exists.
-        elif len(schema) == 0 and table.created is None:
-            raise ValueError(
-                (
-                    "Could not determine schema for table '{}'. Call client.get_table() "
-                    "or pass in a list of schema fields to the selected_fields argument."
-                ).format(table)
-            )
+        # No schema, but no selected_fields. Assume the developer wants all
+        # columns, so get the table resource for them rather than failing.
+        elif len(schema) == 0:
+            table = self.get_table(table.reference, retry=retry)
+            schema = table.schema
 
         params = {}
         if selected_fields is not None:

diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py
@@ -1752,5 +1752,9 @@ def _table_arg_to_table(value, default_project=None):
         value = TableReference.from_string(value, default_project=default_project)
     if isinstance(value, TableReference):
         value = Table(value)
+    if isinstance(value, TableListItem):
+        newvalue = Table(value.reference)
+        newvalue._properties = value._properties
+        value = newvalue
 
     return value
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
@@ -4276,20 +4276,72 @@ def test_list_rows_w_record_schema(self):
             method="GET", path="/%s" % PATH, query_params={}
         )
 
-    def test_list_rows_errors(self):
-        from google.cloud.bigquery.table import Table
+    def test_list_rows_with_missing_schema(self):
+        from google.cloud.bigquery.table import Table, TableListItem
+
+        table_path = "/projects/{}/datasets/{}/tables/{}".format(
+            self.PROJECT, self.DS_ID, self.TABLE_ID
+        )
+        tabledata_path = "{}/data".format(table_path)
+
+        table_list_item_data = {
+            "id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+            "tableReference": {
+                "projectId": self.PROJECT,
+                "datasetId": self.DS_ID,
+                "tableId": self.TABLE_ID,
+            },
+        }
+        table_data = copy.deepcopy(table_list_item_data)
+        # Intentionally make wrong, since total_rows can update during iteration.
+        table_data["numRows"] = 2
+        table_data["schema"] = {
+            "fields": [
+                {"name": "name", "type": "STRING"},
+                {"name": "age", "type": "INTEGER"},
+            ]
+        }
+        rows_data = {
+            "totalRows": 3,
+            "pageToken": None,
+            "rows": [
+                {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+                {"f": [{"v": "Bharney Rhubble"}, {"v": "31"}]},
+                {"f": [{"v": "Wylma Phlyntstone"}, {"v": None}]},
+            ],
+        }
 
         creds = _make_credentials()
         http = object()
-        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
 
-        # table ref with no selected fields
-        with self.assertRaises(ValueError):
-            client.list_rows(self.TABLE_REF)
+        schemaless_tables = (
+            "{}.{}".format(self.DS_ID, self.TABLE_ID),
+            self.TABLE_REF,
+            Table(self.TABLE_REF),
+            TableListItem(table_list_item_data),
+        )
 
-        # table with no schema
-        with self.assertRaises(ValueError):
-            client.list_rows(Table(self.TABLE_REF))
+        for table in schemaless_tables:
+            client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+            conn = client._connection = _make_connection(table_data, rows_data)
+
+            row_iter = client.list_rows(table)
+
+            conn.api_request.assert_called_once_with(method="GET", path=table_path)
+            conn.api_request.reset_mock()
+            self.assertIsNone(row_iter.total_rows, msg=repr(table))
+
+            rows = list(row_iter)
+            conn.api_request.assert_called_once_with(
+                method="GET", path=tabledata_path, query_params={}
+            )
+            self.assertEqual(row_iter.total_rows, 3, msg=repr(table))
+            self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table))
+            self.assertEqual(rows[1].age, 31, msg=repr(table))
+            self.assertIsNone(rows[2].age, msg=repr(table))
+
+    def test_list_rows_error(self):
+        client = self._make_one()
 
         # neither Table nor tableReference
         with self.assertRaises(TypeError):