Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery): add Dataset.default_partition_expiration_ms and Table.require_partition_filter properties #9464

Merged
merged 3 commits into from
Oct 15, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ class Dataset(object):
_PROPERTY_TO_API_FIELD = {
"access_entries": "access",
"created": "creationTime",
"default_partition_expiration_ms": "defaultPartitionExpirationMs",
"default_table_expiration_ms": "defaultTableExpirationMs",
"friendly_name": "friendlyName",
}
Expand Down Expand Up @@ -460,6 +461,34 @@ def self_link(self):
"""
return self._properties.get("selfLink")

@property
def default_partition_expiration_ms(self):
"""Optional[int]: The default partition expiration for all
partitioned tables in the dataset, in milliseconds.

Once this property is set, all newly-created partitioned tables in
the dataset will have an ``time_paritioning.expiration_ms`` property
set to this value, and changing the value will only affect new
tables, not existing ones. The storage in a partition will have an
expiration time of its partition time plus this value.

Setting this property overrides the use of
``default_table_expiration_ms`` for partitioned tables: only one of
``default_table_expiration_ms`` and
``default_partition_expiration_ms`` will be used for any new
partitioned table. If you provide an explicit
``time_partitioning.expiration_ms`` when creating or updating a
partitioned table, that value takes precedence over the default
partition expiration time indicated by this property.
"""
return _helpers._int_or_none(
self._properties.get("defaultPartitionExpirationMs")
)

@default_partition_expiration_ms.setter
def default_partition_expiration_ms(self, value):
self._properties["defaultPartitionExpirationMs"] = _helpers._str_or_none(value)

@property
def default_table_expiration_ms(self):
"""Union[int, None]: Default expiration time for tables in the dataset
Expand Down
23 changes: 20 additions & 3 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ class Table(object):
"view_query": "view",
"external_data_configuration": "externalDataConfiguration",
"encryption_configuration": "encryptionConfiguration",
"require_partition_filter": "requirePartitionFilter",
}

def __init__(self, table_ref, schema=None):
Expand Down Expand Up @@ -420,6 +421,18 @@ def path(self):
self.table_id,
)

@property
def require_partition_filter(self):
"""bool: If set to true, queries over the partitioned table require a
partition filter that can be used for partition elimination to be
specified.
"""
return self._properties.get("requirePartitionFilter")

@require_partition_filter.setter
def require_partition_filter(self, value):
self._properties["requirePartitionFilter"] = value

@property
def schema(self):
"""List[google.cloud.bigquery.schema.SchemaField]: Table's schema.
Expand Down Expand Up @@ -1722,9 +1735,9 @@ class TimePartitioning(object):
Number of milliseconds for which to keep the storage for a
partition.
require_partition_filter (bool, optional):
If set to true, queries over the partitioned table require a
partition filter that can be used for partition elimination to be
specified.
DEPRECATED: Use
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any annotations we can/should add to help docs/and existing consumers to understand this is no longer the preferred mechanism?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Added deprecation warning message in 84e2c3c

:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
instead.
"""

def __init__(
Expand Down Expand Up @@ -1777,6 +1790,10 @@ def expiration_ms(self, value):
@property
def require_partition_filter(self):
"""bool: Specifies whether partition filters are required for queries

DEPRECATED: Use
:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
instead.
"""
return self._properties.get("requirePartitionFilter")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .. import update_dataset_default_partition_expiration


def test_update_dataset_default_partition_expiration(capsys, client, dataset_id):

ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds

update_dataset_default_partition_expiration.update_dataset_default_partition_expiration(
client, dataset_id
)
out, _ = capsys.readouterr()
assert (
"Updated dataset {} with new default partition expiration {}".format(
dataset_id, ninety_days_ms
)
in out
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import bigquery
from .. import update_table_require_partition_filter


def test_update_table_require_partition_filter(capsys, client, random_table_id):
# Make a partitioned table.
schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
table = bigquery.Table(random_table_id, schema)
table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
table = client.create_table(table)

update_table_require_partition_filter.update_table_require_partition_filter(
client, random_table_id
)
out, _ = capsys.readouterr()
assert (
"Updated table '{}' with require_partition_filter=True".format(random_table_id)
in out
)
43 changes: 43 additions & 0 deletions bigquery/samples/update_dataset_default_partition_expiration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def update_dataset_default_partition_expiration(client, dataset_id):

# [START bigquery_update_dataset_partition_expiration]
# TODO(developer): Import the client library.
# from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id) # Make an API request.

# Set the default partition expiration (applies to new tables, only) in
# milliseconds. This example sets the default expiration to 90 days.
dataset.default_partition_expiration_ms = 90 * 24 * 60 * 60 * 1000

dataset = client.update_dataset(
dataset, ["default_partition_expiration_ms"]
) # Make an API request.

print(
"Updated dataset {}.{} with new default partition expiration {}".format(
dataset.project, dataset.dataset_id, dataset.default_partition_expiration_ms
)
)
# [END bigquery_update_dataset_partition_expiration]
41 changes: 41 additions & 0 deletions bigquery/samples/update_table_require_partition_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def update_table_require_partition_filter(client, table_id):

# [START bigquery_update_table_require_partition_filter]
# TODO(developer): Import the client library.
# from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the model to fetch.
# table_id = 'your-project.your_dataset.your_table'

table = client.get_table(table_id) # Make an API request.
table.require_partition_filter = True
table = client.update_table(table, ["require_partition_filter"])

# View table properties
print(
"Updated table '{}.{}.{}' with require_partition_filter={}.".format(
table.project,
table.dataset_id,
table.table_id,
table.require_partition_filter,
)
)
# [END bigquery_update_table_require_partition_filter]
8 changes: 8 additions & 0 deletions bigquery/tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,14 @@ def test_access_entries_setter(self):
dataset.access_entries = [phred, bharney]
self.assertEqual(dataset.access_entries, [phred, bharney])

def test_default_partition_expiration_ms(self):
dataset = self._make_one("proj.dset")
assert dataset.default_partition_expiration_ms is None
dataset.default_partition_expiration_ms = 12345
assert dataset.default_partition_expiration_ms == 12345
dataset.default_partition_expiration_ms = None
assert dataset.default_partition_expiration_ms is None

def test_default_table_expiration_ms_setter_bad_value(self):
dataset = self._make_one(self.DS_REF)
with self.assertRaises(ValueError):
Expand Down
11 changes: 11 additions & 0 deletions bigquery/tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,17 @@ def test__build_resource_w_custom_field_not_in__properties(self):
with self.assertRaises(ValueError):
table._build_resource(["bad"])

def test_require_partitioning_filter(self):
table = self._make_one("proj.dset.tbl")
assert table.require_partition_filter is None
table.require_partition_filter = True
assert table.require_partition_filter
table.require_partition_filter = False
assert table.require_partition_filter is not None
assert not table.require_partition_filter
table.require_partition_filter = None
assert table.require_partition_filter is None

def test_time_partitioning_getter(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
Expand Down