Skip to content

Commit d20afc4

Browse files
tseavertswast
authored andcommitted
Add 'QueryJob.referenced_tables' property. (#3801)
1 parent 9103ba0 commit d20afc4

File tree

2 files changed

+86
-2
lines changed

2 files changed

+86
-2
lines changed

bigquery/google/cloud/bigquery/job.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,36 @@ def statement_type(self):
13871387
"""
13881388
return self._job_statistics().get('statementType')
13891389

1390+
@property
1391+
def referenced_tables(self):
1392+
"""Return referenced tables from job statistics, if present.
1393+
1394+
See:
1395+
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables
1396+
1397+
:rtype: list of dict
1398+
:returns: mappings describing the query plan, or an empty list
1399+
if the query has not yet completed.
1400+
"""
1401+
tables = []
1402+
client = self._require_client(None)
1403+
datasets_by_project_name = {}
1404+
1405+
for table in self._job_statistics().get('referencedTables', ()):
1406+
1407+
t_project = table['projectId']
1408+
1409+
ds_name = table['datasetId']
1410+
t_dataset = datasets_by_project_name.get((t_project, ds_name))
1411+
if t_dataset is None:
1412+
t_dataset = client.dataset(ds_name, project=t_project)
1413+
datasets_by_project_name[(t_project, ds_name)] = t_dataset
1414+
1415+
t_name = table['tableId']
1416+
tables.append(t_dataset.table(t_name))
1417+
1418+
return tables
1419+
13901420
def query_results(self):
13911421
"""Construct a QueryResults instance, bound to this job.
13921422

bigquery/tests/unit/test_job.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,60 @@ def test_statement_type(self):
18331833
query_stats['statementType'] = statement_type
18341834
self.assertEqual(job.statement_type, statement_type)
18351835

1836+
def test_referenced_tables(self):
1837+
from google.cloud.bigquery.dataset import Dataset
1838+
from google.cloud.bigquery.table import Table
1839+
1840+
ref_tables_resource = [{
1841+
'projectId': self.PROJECT,
1842+
'datasetId': 'dataset',
1843+
'tableId': 'local1',
1844+
}, {
1845+
1846+
'projectId': self.PROJECT,
1847+
'datasetId': 'dataset',
1848+
'tableId': 'local2',
1849+
}, {
1850+
1851+
'projectId': 'other-project-123',
1852+
'datasetId': 'other-dataset',
1853+
'tableId': 'other-table',
1854+
}]
1855+
client = _Client(self.PROJECT)
1856+
job = self._make_one(self.JOB_NAME, self.QUERY, client)
1857+
self.assertEqual(job.referenced_tables, [])
1858+
1859+
statistics = job._properties['statistics'] = {}
1860+
self.assertEqual(job.referenced_tables, [])
1861+
1862+
query_stats = statistics['query'] = {}
1863+
self.assertEqual(job.referenced_tables, [])
1864+
1865+
query_stats['referencedTables'] = ref_tables_resource
1866+
1867+
local1, local2, remote = job.referenced_tables
1868+
1869+
self.assertIsInstance(local1, Table)
1870+
self.assertEqual(local1.name, 'local1')
1871+
self.assertIsInstance(local1._dataset, Dataset)
1872+
self.assertEqual(local1.dataset_name, 'dataset')
1873+
self.assertEqual(local1.project, self.PROJECT)
1874+
self.assertIs(local1._dataset._client, client)
1875+
1876+
self.assertIsInstance(local2, Table)
1877+
self.assertEqual(local2.name, 'local2')
1878+
self.assertIsInstance(local2._dataset, Dataset)
1879+
self.assertEqual(local2.dataset_name, 'dataset')
1880+
self.assertEqual(local2.project, self.PROJECT)
1881+
self.assertIs(local2._dataset._client, client)
1882+
1883+
self.assertIsInstance(remote, Table)
1884+
self.assertEqual(remote.name, 'other-table')
1885+
self.assertIsInstance(remote._dataset, Dataset)
1886+
self.assertEqual(remote.dataset_name, 'other-dataset')
1887+
self.assertEqual(remote.project, 'other-project-123')
1888+
self.assertIs(remote._dataset._client, client)
1889+
18361890
def test_query_results(self):
18371891
from google.cloud.bigquery.query import QueryResults
18381892

@@ -2490,10 +2544,10 @@ def __init__(self, project='project', connection=None):
24902544
self.project = project
24912545
self._connection = connection
24922546

2493-
def dataset(self, name):
2547+
def dataset(self, name, project=None):
24942548
from google.cloud.bigquery.dataset import Dataset
24952549

2496-
return Dataset(name, client=self)
2550+
return Dataset(name, client=self, project=project)
24972551

24982552
def _get_query_results(self, job_id):
24992553
from google.cloud.bigquery.query import QueryResults

0 commit comments

Comments
 (0)