Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: resource tags in dataset #2090

Merged
merged 24 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
66e09af
feat: resource tags in dataset
keunsoopark Dec 19, 2024
120abc8
fix: fix unittets
keunsoopark Dec 19, 2024
3d03e73
Delete dataset/pyvenv.cfg
Linchin Dec 20, 2024
c56d005
Update google/cloud/bigquery/dataset.py
keunsoopark Dec 23, 2024
d26539f
Update google/cloud/bigquery/dataset.py
keunsoopark Dec 23, 2024
c647485
added system tests & fix unittest for none
keunsoopark Dec 23, 2024
3d317d1
add missing assert
keunsoopark Dec 23, 2024
359a9e1
remove venv
keunsoopark Dec 23, 2024
32ce734
Merge branch 'main' into feat/resource_tags_in_dataset
Linchin Jan 2, 2025
ae83228
Merge branch 'main' into feat/resource_tags_in_dataset
Linchin Jan 2, 2025
a780412
Merge branch 'main' into feat/resource_tags_in_dataset
Linchin Jan 3, 2025
ddb3bf6
include resourcemanager in noxfile.py
keunsoopark Jan 7, 2025
d38e99a
Merge branch 'main' into feat/resource_tags_in_dataset
keunsoopark Jan 7, 2025
14e3e47
fix fixture for tag keys
keunsoopark Jan 8, 2025
8c48758
register tags before using in tests
keunsoopark Jan 8, 2025
5a22c33
Merge branch 'main' into feat/resource_tags_in_dataset
keunsoopark Jan 9, 2025
34e4857
Merge branch 'main' into feat/resource_tags_in_dataset
keunsoopark Jan 9, 2025
7e75599
handle alreadyexist error
keunsoopark Jan 9, 2025
6883dc2
Merge branch 'main' into feat/resource_tags_in_dataset
Linchin Jan 10, 2025
dd823a7
fix: tag keys & values creation & deletion
keunsoopark Jan 10, 2025
eb85e24
fix comment
keunsoopark Jan 10, 2025
233f276
make tag keys unique
keunsoopark Jan 14, 2025
fed4570
remove unused import
keunsoopark Jan 14, 2025
3fd848b
Merge branch 'main' into feat/resource_tags_in_dataset
Linchin Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ class Dataset(object):
"storage_billing_model": "storageBillingModel",
"max_time_travel_hours": "maxTimeTravelHours",
"default_rounding_mode": "defaultRoundingMode",
"resource_tags": "resourceTags",
}

def __init__(self, dataset_ref) -> None:
Expand Down Expand Up @@ -801,6 +802,28 @@ def labels(self, value):
raise ValueError("Pass a dict")
self._properties["labels"] = value

@property
def resource_tags(self):
"""Dict[str, str]: Resource tags of the dataset.
Optional. The tags attached to this dataset. Tag keys are globally
unique. Tag key is expected to be in the namespaced format, for
example "123456789012/environment" where 123456789012 is
the ID of the parent organization or project resource for this tag
key. Tag value is expected to be the short name, for example
"Production".
Raises:
ValueError: for invalid value types.
"""
return self._properties.setdefault("resourceTags", {})

@resource_tags.setter
def resource_tags(self, value):
if not isinstance(value, dict) and value is not None:
raise ValueError("Pass a dict")
self._properties["resourceTags"] = value

@property
def default_encryption_configuration(self):
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
Expand Down
4 changes: 4 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ def system(session):
# Data Catalog needed for the column ACL test with a real Policy Tag.
session.install("google-cloud-datacatalog", "-c", constraints_path)

# Resource Manager needed for test with a real Resource Tag.
session.install("google-cloud-resource-manager", "-c", constraints_path)

if session.python in ["3.11", "3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
else:
Expand Down Expand Up @@ -366,6 +369,7 @@ def prerelease_deps(session):
session.install(
"freezegun",
"google-cloud-datacatalog",
"google-cloud-resource-manager",
"google-cloud-storage",
"google-cloud-testutils",
"psutil",
Expand Down
51 changes: 49 additions & 2 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
from google.cloud import storage
from google.cloud.datacatalog_v1 import types as datacatalog_types
from google.cloud.datacatalog_v1 import PolicyTagManagerClient
from google.cloud.resourcemanager_v3 import types as resourcemanager_types
from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient
import psutil
import pytest
from test_utils.retry import RetryErrors
Expand Down Expand Up @@ -159,6 +161,8 @@ def setUp(self):

def tearDown(self):
policy_tag_client = PolicyTagManagerClient()
tag_keys_client = TagKeysClient()
tag_values_client = TagValuesClient()

def _still_in_use(bad_request):
return any(
Expand All @@ -178,6 +182,10 @@ def _still_in_use(bad_request):
retry_in_use(Config.CLIENT.delete_table)(doomed)
elif isinstance(doomed, datacatalog_types.Taxonomy):
policy_tag_client.delete_taxonomy(name=doomed.name)
elif isinstance(doomed, resourcemanager_types.TagKey):
tag_keys_client.delete_tag_key(name=doomed.name).result()
elif isinstance(doomed, resourcemanager_types.TagValue):
tag_values_client.delete_tag_value(name=doomed.name).result()
else:
doomed.delete()

Expand Down Expand Up @@ -278,33 +286,72 @@ def test_create_dataset_with_default_rounding_mode(self):
self.assertTrue(_dataset_exists(dataset))
self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN")

def _create_resource_tag_key_and_values(self, key, values):
Copy link
Contributor Author

@keunsoopark keunsoopark Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got inspired from this implementation. Just took out this method to be used in both dataset & test implementation.

Do you have any tips on how to run system test locally, @Linchin? There is some setup to the actual project required, but this explanation is too vague to follow.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general you can just use pytest to run system tests, it's slightly easier than nox, and should be able to run on any python version:

pytest tests/system/test_client.py

You can specify the test class or method too:

pytest tests/system/test_client.py::TestBigQuery::test_update_dataset

This should make it faster.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you also need to run lint, that will need to use nox:

nox -r -s lint

Copy link
Contributor Author

@keunsoopark keunsoopark Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks I ran linting locally.

I still could not run system tests locally still. I think I need to specify GOOGLE_APPLICATION_CREDENTIALS env var (see here) anyway, but not sure what credentials I can use.
I tried to use the credentials of a service account in my personal Google Cloud project, which has a owner role in this project, but I got all PERMISSION_DENIED errors for dataset creation, table creation etc.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you run pytest instead of nox, you don't have to have GOOGLE_APPLICATION_CREDENTIALS set up, because we are bypassing noxfile.py.

Copy link
Contributor Author

@keunsoopark keunsoopark Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I managed running system test locally. This is how:

  • Make a python script, install_dependencies.py parsing all dependencies in specifiednoxfile.py, to install dependencies in my local pip.
  • For some reason, google-cloud-testutils was not installed in this manner, as specified here, so install this one manually.
  • Configure gcloud auth to my personal GCP project
  • Run pytest

tag_key_client = TagKeysClient()
tag_value_client = TagValuesClient()

tag_key_parent = f"projects/{Config.CLIENT.project}"
new_tag_key = resourcemanager_types.TagKey(
short_name=key, parent=tag_key_parent
)
tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result()
self.to_delete.insert(0, tag_key)

for value in values:
new_tag_value = resourcemanager_types.TagValue(
short_name=value, parent=tag_key.name
)
tag_value = tag_value_client.create_tag_value(
tag_value=new_tag_value
).result()
self.to_delete.insert(0, tag_value)

def test_update_dataset(self):
dataset = self.temp_dataset(_make_dataset_id("update_dataset"))
self.assertTrue(_dataset_exists(dataset))
self.assertIsNone(dataset.friendly_name)
self.assertIsNone(dataset.description)
self.assertEqual(dataset.labels, {})
self.assertEqual(dataset.resource_tags, {})
self.assertIs(dataset.is_case_insensitive, False)

# Tags need to be created before they can be used in a dataset.
self._create_resource_tag_key_and_values("env", ["prod", "dev"])
self._create_resource_tag_key_and_values("component", ["batch"])
self._create_resource_tag_key_and_values("project", ["atlas"])

dataset.friendly_name = "Friendly"
dataset.description = "Description"
dataset.labels = {"priority": "high", "color": "blue"}
dataset.resource_tags = {f"{Config.CLIENT.project}/env": "prod", f"{Config.CLIENT.project}/component": "batch"}
dataset.is_case_insensitive = True
ds2 = Config.CLIENT.update_dataset(
dataset, ("friendly_name", "description", "labels", "is_case_insensitive")
dataset, ("friendly_name", "description", "labels", "resource_tags", "is_case_insensitive")
)
self.assertEqual(ds2.friendly_name, "Friendly")
self.assertEqual(ds2.description, "Description")
self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"})
self.assertEqual(ds2.resource_tags, {f"{Config.CLIENT.project}/env": "prod", f"{Config.CLIENT.project}/component": "batch"})
self.assertIs(ds2.is_case_insensitive, True)

ds2.labels = {
"color": "green", # change
"shape": "circle", # add
"priority": None, # delete
}
ds3 = Config.CLIENT.update_dataset(ds2, ["labels"])
ds2.resource_tags = {
f"{Config.CLIENT.project}/env": "dev", # change
f"{Config.CLIENT.project}/project": "atlas", # add
f"{Config.CLIENT.project}/component": None, # delete
}
ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"])
self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"})
self.assertEqual(ds3.resource_tags, {f"{Config.CLIENT.project}/env": "dev", f"{Config.CLIENT.project}/project": "atlas"})

# Remove all tags
ds3.resource_tags = None
ds4 = Config.CLIENT.update_table(ds3, ["resource_tags"])
self.assertEqual(ds4.resource_tags, {})

# If we try to update using d2 again, it will fail because the
# previous update changed the ETag.
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2028,6 +2028,7 @@ def test_update_dataset(self):
LABELS = {"priority": "high"}
ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}]
EXP = 17
RESOURCE_TAGS = {"123456789012/key": "value"}
RESOURCE = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
Expand All @@ -2037,6 +2038,7 @@ def test_update_dataset(self):
"defaultTableExpirationMs": EXP,
"labels": LABELS,
"access": ACCESS,
"resourceTags": RESOURCE_TAGS,
}
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
Expand All @@ -2048,12 +2050,14 @@ def test_update_dataset(self):
ds.default_table_expiration_ms = EXP
ds.labels = LABELS
ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")]
ds.resource_tags = RESOURCE_TAGS
fields = [
"description",
"friendly_name",
"location",
"labels",
"access_entries",
"resource_tags",
]

with mock.patch(
Expand All @@ -2077,6 +2081,7 @@ def test_update_dataset(self):
"location": LOCATION,
"labels": LABELS,
"access": ACCESS,
"resourceTags": RESOURCE_TAGS,
},
path="/" + PATH,
timeout=7.5,
Expand All @@ -2086,6 +2091,7 @@ def test_update_dataset(self):
self.assertEqual(ds2.location, ds.location)
self.assertEqual(ds2.labels, ds.labels)
self.assertEqual(ds2.access_entries, ds.access_entries)
self.assertEqual(ds2.resource_tags, ds.resource_tags)

# ETag becomes If-Match header.
ds._properties["etag"] = "etag"
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"tableId": "northern-hemisphere",
}
DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN"
RESOURCE_TAGS = {"123456789012/foo": "bar"}
RESOURCE = {
"datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
"etag": "etag",
Expand All @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"labels": LABELS,
"access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
"defaultRoundingMode": DEFAULT_ROUNDING_MODE,
"resourceTags": RESOURCE_TAGS,
}
conn = client._connection = make_connection(RESOURCE)
entries = [
Expand All @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
before.default_table_expiration_ms = 3600
before.location = LOCATION
before.labels = LABELS
before.resource_tags = RESOURCE_TAGS
before.default_rounding_mode = DEFAULT_ROUNDING_MODE
after = client.create_dataset(before)
assert after.dataset_id == DS_ID
Expand All @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
assert after.default_table_expiration_ms == 3600
assert after.labels == LABELS
assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE
assert after.resource_tags == RESOURCE_TAGS

conn.api_request.assert_called_once_with(
method="POST",
Expand All @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
{"view": VIEW, "role": None},
],
"labels": LABELS,
"resourceTags": RESOURCE_TAGS,
},
timeout=DEFAULT_TIMEOUT,
)
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,28 @@ def test_location_setter(self):
dataset.location = "LOCATION"
self.assertEqual(dataset.location, "LOCATION")

def test_resource_tags_update_in_place(self):
dataset = self._make_one(self.DS_REF)
tags = dataset.resource_tags
tags["123456789012/foo"] = "bar" # update in place
self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})

def test_resource_tags_setter(self):
dataset = self._make_one(self.DS_REF)
dataset.resource_tags = {"123456789012/foo": "bar"}
self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})

def test_resource_tags_setter_bad_value(self):
dataset = self._make_one(self.DS_REF)
with self.assertRaises(ValueError):
dataset.resource_tags = "invalid"
with self.assertRaises(ValueError):
dataset.resource_tags = 123

def test_resource_tags_getter_missing_value(self):
dataset = self._make_one(self.DS_REF)
self.assertEqual(dataset.resource_tags, {})

def test_labels_update_in_place(self):
dataset = self._make_one(self.DS_REF)
del dataset._properties["labels"] # don't start w/ existing dict
Expand Down