Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow string in addition to DatasetReference / TableReference in Clie… #6164

Merged
merged 2 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
469 changes: 282 additions & 187 deletions bigquery/google/cloud/bigquery/client.py

Large diffs are not rendered by default.

42 changes: 28 additions & 14 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,35 +211,49 @@ def from_api_repr(cls, resource):
return cls(project, dataset_id)

@classmethod
def from_string(cls, full_dataset_id):
"""Construct a dataset reference from fully-qualified dataset ID.
def from_string(cls, dataset_id, default_project=None):
"""Construct a dataset reference from dataset ID string.

Args:
full_dataset_id (str):
A fully-qualified dataset ID in standard SQL format. Must
included both the project ID and the dataset ID, separated by
``.``.
dataset_id (str):
A dataset ID in standard SQL format. If ``default_project``
is not specified, this must included both the project ID and
the dataset ID, separated by ``.``.
default_project (str):
Optional. The project ID to use when ``dataset_id`` does not
include a project ID.

Returns:
DatasetReference:
Dataset reference parsed from ``full_dataset_id``.
Dataset reference parsed from ``dataset_id``.

Examples:
>>> DatasetReference.from_string('my-project-id.some_dataset')
DatasetReference('my-project-id', 'some_dataset')

Raises:
ValueError:
If ``full_dataset_id`` is not a fully-qualified dataset ID in
If ``dataset_id`` is not a fully-qualified dataset ID in
standard SQL format.
"""
parts = full_dataset_id.split('.')
if len(parts) != 2:
output_dataset_id = dataset_id
output_project_id = default_project
parts = dataset_id.split('.')

if len(parts) == 1 and not default_project:
raise ValueError(
'full_dataset_id must be a fully-qualified dataset ID in '
'standard SQL format. e.g. "project.dataset_id", got '
'{}'.format(full_dataset_id))
return cls(*parts)
'When default_project is not set, dataset_id must be a '
'fully-qualified dataset ID in standard SQL format. '
'e.g. "project.dataset_id", got {}'.format(dataset_id))
elif len(parts) == 2:
output_project_id, output_dataset_id = parts
elif len(parts) > 2:
raise ValueError(
'Too many parts in dataset_id. Expected a fully-qualified '
'dataset ID in standard SQL format. e.g. '
'"project.dataset_id", got {}'.format(dataset_id))

return cls(output_project_id, output_dataset_id)

def to_api_repr(self):
"""Construct the API resource representation of this dataset reference
Expand Down
51 changes: 38 additions & 13 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,37 +172,62 @@ def path(self):
self._project, self._dataset_id, self._table_id)

@classmethod
def from_string(cls, full_table_id):
"""Construct a table reference from fully-qualified table ID.
def from_string(cls, table_id, default_project=None):
"""Construct a table reference from table ID string.

Args:
full_table_id (str):
A fully-qualified table ID in standard SQL format. Must
included a project ID, dataset ID, and table ID, each
separated by ``.``.
table_id (str):
A table ID in standard SQL format. If ``default_project``
is not specified, this must included a project ID, dataset
ID, and table ID, each separated by ``.``.
default_project (str):
Optional. The project ID to use when ``table_id`` does not
include a project ID.

Returns:
TableReference: Table reference parsed from ``full_table_id``.
TableReference: Table reference parsed from ``table_id``.

Examples:
>>> TableReference.from_string('my-project.mydataset.mytable')
TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable')

Raises:
ValueError:
If ``full_table_id`` is not a fully-qualified table ID in
If ``table_id`` is not a fully-qualified table ID in
standard SQL format.
"""
from google.cloud.bigquery.dataset import DatasetReference

parts = full_table_id.split('.')
if len(parts) != 3:
output_project_id = default_project
output_dataset_id = None
output_table_id = None
parts = table_id.split('.')

if len(parts) < 2:
raise ValueError(
'full_table_id must be a fully-qualified table ID in '
'table_id must be a fully-qualified table ID in '
'standard SQL format. e.g. "project.dataset.table", got '
'{}'.format(full_table_id))
'{}'.format(table_id))
elif len(parts) == 2:
if not default_project:
raise ValueError(
'When default_project is not set, table_id must be a '
'fully-qualified table ID in standard SQL format. '
'e.g. "project.dataset_id.table_id", got {}'.format(
table_id))
output_dataset_id, output_table_id = parts
elif len(parts) == 3:
output_project_id, output_dataset_id, output_table_id = parts
if len(parts) > 3:
raise ValueError(
'Too many parts in table_id. Must be a fully-qualified table '
'ID in standard SQL format. e.g. "project.dataset.table", '
'got {}'.format(table_id))

return cls(DatasetReference(parts[0], parts[1]), parts[2])
return cls(
DatasetReference(output_project_id, output_dataset_id),
output_table_id,
)

@classmethod
def from_api_repr(cls, resource):
Expand Down
62 changes: 48 additions & 14 deletions bigquery/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,27 @@ def test_create_dataset(self):
self.assertEqual(dataset.project, Config.CLIENT.project)

def test_get_dataset(self):
DATASET_ID = _make_dataset_id('get_dataset')
dataset_id = _make_dataset_id('get_dataset')
client = Config.CLIENT
dataset_arg = Dataset(client.dataset(DATASET_ID))
dataset_arg = Dataset(client.dataset(dataset_id))
dataset_arg.friendly_name = 'Friendly'
dataset_arg.description = 'Description'
dataset = retry_403(client.create_dataset)(dataset_arg)
self.to_delete.append(dataset)
dataset_ref = client.dataset(DATASET_ID)
dataset_ref = client.dataset(dataset_id)

# Get with a reference.
got = client.get_dataset(dataset_ref)
self.assertEqual(got.friendly_name, 'Friendly')
self.assertEqual(got.description, 'Description')

# Get with a string.
got = client.get_dataset(dataset_id)
self.assertEqual(got.friendly_name, 'Friendly')
self.assertEqual(got.description, 'Description')

# Get with a fully-qualified string.
got = client.get_dataset('{}.{}'.format(client.project, dataset_id))
self.assertEqual(got.friendly_name, 'Friendly')
self.assertEqual(got.description, 'Description')

Expand Down Expand Up @@ -281,6 +291,14 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self):
self.assertEqual(time_partitioning.field, 'transaction_time')
self.assertEqual(table.clustering_fields, ['user_email', 'store_code'])

def test_delete_dataset_with_string(self):
dataset_id = _make_dataset_id('delete_table_true')
dataset_ref = Config.CLIENT.dataset(dataset_id)
retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
self.assertTrue(_dataset_exists(dataset_ref))
Config.CLIENT.delete_dataset(dataset_id)
self.assertFalse(_dataset_exists(dataset_ref))

def test_delete_dataset_delete_contents_true(self):
dataset_id = _make_dataset_id('delete_table_true')
dataset = retry_403(Config.CLIENT.create_dataset)(
Expand All @@ -304,20 +322,27 @@ def test_delete_dataset_delete_contents_false(self):
Config.CLIENT.delete_dataset(dataset)

def test_get_table_w_public_dataset(self):
PUBLIC = 'bigquery-public-data'
DATASET_ID = 'samples'
TABLE_ID = 'shakespeare'
table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID)
public = 'bigquery-public-data'
dataset_id = 'samples'
table_id = 'shakespeare'
table_ref = DatasetReference(public, dataset_id).table(table_id)

# Get table with reference.
table = Config.CLIENT.get_table(table_ref)

self.assertEqual(table.table_id, TABLE_ID)
self.assertEqual(table.dataset_id, DATASET_ID)
self.assertEqual(table.project, PUBLIC)
self.assertEqual(table.table_id, table_id)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, public)
schema_names = [field.name for field in table.schema]
self.assertEqual(
schema_names, ['word', 'word_count', 'corpus', 'corpus_date'])

# Get table with string.
table = Config.CLIENT.get_table(
'{}.{}.{}'.format(public, dataset_id, table_id))
self.assertEqual(table.table_id, table_id)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, public)

def test_list_partitions(self):
table_ref = DatasetReference(
'bigquery-public-data',
Expand All @@ -327,8 +352,8 @@ def test_list_partitions(self):
self.assertGreater(len(all_rows), 1000)

def test_list_tables(self):
DATASET_ID = _make_dataset_id('list_tables')
dataset = self.temp_dataset(DATASET_ID)
dataset_id = _make_dataset_id('list_tables')
dataset = self.temp_dataset(dataset_id)
# Retrieve tables before any are created for the dataset.
iterator = Config.CLIENT.list_tables(dataset)
all_tables = list(iterator)
Expand All @@ -352,9 +377,18 @@ def test_list_tables(self):
self.assertIsNone(iterator.next_page_token)
created = [table for table in all_tables
if (table.table_id in tables_to_create and
table.dataset_id == DATASET_ID)]
table.dataset_id == dataset_id)]
self.assertEqual(len(created), len(tables_to_create))

# List tables with a string ID.
iterator = Config.CLIENT.list_tables(dataset_id)
self.assertGreater(len(list(iterator)), 0)

# List tables with a fully-qualified string ID.
iterator = Config.CLIENT.list_tables(
'{}.{}'.format(Config.CLIENT.project, dataset_id))
self.assertGreater(len(list(iterator)), 0)

def test_update_table(self):
dataset = self.temp_dataset(_make_dataset_id('update_table'))

Expand Down
Loading