Skip to content

Commit c0f918b

Browse files
[DLP] Implemented create and update stored infoType code samples (#10158)
## Description Implemented create and update stored infoType code samples. 1. [Create stored InfoType](https://cloud.google.com/dlp/docs/creating-stored-infotypes#dlp-create-stored-dictionary-infotype-protocol) 2. [Update stored InfoType](https://cloud.google.com/dlp/docs/creating-stored-infotypes#dlp-update-stored-dictionary-infotype-protocol) I have created two new files (stored_infotype and stored_infotype_test) for code samples related to stored infoTypes as none of the other files felt to comply with these samples. Fixes #<ISSUE-NUMBER> Note: Before submitting a pull request, please open an issue for discussion if you are not associated with Google. ## Checklist - [x] I have followed [Sample Guidelines from AUTHORING_GUIDE.MD](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md) - [ ] README is updated to include [all relevant information](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#readme-file) - [x] **Tests** pass: `nox -s py-3.9` (see [Test Environment Setup](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#test-environment-setup)) - [x] **Lint** pass: `nox -s lint` (see [Test Environment Setup](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#test-environment-setup)) - [ ] These samples need a new **API enabled** in testing projects to pass (let us know which ones) - [ ] These samples need a new/updated **env vars** in testing projects set to pass (let us know which ones) - [ ] This sample adds a new sample directory, and I updated the [CODEOWNERS file](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/.github/CODEOWNERS) with the codeowners for this sample - [ ] This sample adds a new **Product API**, and I updated the [Blunderbuss issue/PR auto-assigner](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/.github/blunderbuss.yml) with the codeowners for this sample - [x] Please **merge** this PR for me once it is approved
1 parent ebd7b5d commit c0f918b

File tree

4 files changed

+314
-0
lines changed

4 files changed

+314
-0
lines changed

.github/header-checker-lint.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ ignoreFiles:
2323
- "dlp/snippets/resources/accounts.txt"
2424
- "dlp/snippets/resources/harmless.txt"
2525
- "dlp/snippets/resources/test.txt"
26+
- "dlp/snippets/resources/term_list.txt"
2627

2728
ignoreLicenseYear: true
2829

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
kevin2010
2+
gary1998
3+
john879
4+
james678

dlp/snippets/stored_infotype.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Sample app that queries the Data Loss Prevention API for stored
16+
infoTypes."""
17+
18+
19+
import argparse
20+
import os
21+
22+
23+
# [START dlp_create_stored_infotype]
24+
import google.cloud.dlp # noqa: F811, E402
25+
26+
27+
def create_stored_infotype(
28+
project: str,
29+
stored_info_type_id: str,
30+
output_bucket_name: str,
31+
) -> None:
32+
"""Uses the Data Loss Prevention API to create stored infoType.
33+
Args:
34+
project: The Google Cloud project id to use as a parent resource.
35+
stored_info_type_id: The identifier for large custom dictionary.
36+
output_bucket_name: The name of the bucket in Google Cloud Storage
37+
that would store the created dictionary.
38+
"""
39+
40+
# Instantiate a client.
41+
dlp = google.cloud.dlp_v2.DlpServiceClient()
42+
43+
# Construct the stored infoType Configuration dictionary. This example creates
44+
# a stored infoType from a term list stored in a publicly available BigQuery
45+
# database (bigquery-public-data.samples.github_nested).
46+
# The database contains all GitHub usernames used in commits.
47+
stored_info_type_config = {
48+
"display_name": "GitHub usernames",
49+
"description": "Dictionary of GitHub usernames used in commits",
50+
"large_custom_dictionary": {
51+
"output_path": {
52+
"path": f"gs://{output_bucket_name}"
53+
},
54+
# We can either use bigquery field or gcs file as a term list input option.
55+
"big_query_field": {
56+
"table": {
57+
"project_id": "bigquery-public-data",
58+
"dataset_id": "samples",
59+
"table_id": "github_nested"
60+
},
61+
"field": {"name": "actor"},
62+
},
63+
}
64+
}
65+
66+
# Convert the project id into a full resource id.
67+
parent = f"projects/{project}"
68+
69+
# Call the API.
70+
response = dlp.create_stored_info_type(
71+
request={
72+
"parent": parent,
73+
"config": stored_info_type_config,
74+
"stored_info_type_id": stored_info_type_id,
75+
}
76+
)
77+
78+
# Print the result
79+
print("Created Stored InfoType: {}".format(response.name))
80+
81+
82+
# [END dlp_create_stored_infotype]
83+
84+
85+
# [START dlp_update_stored_infotype]
86+
import google.cloud.dlp # noqa: F811, E402
87+
88+
89+
def update_stored_infotype(
90+
project: str,
91+
stored_info_type_id: str,
92+
gcs_input_file_path: str,
93+
output_bucket_name: str,
94+
) -> None:
95+
"""Uses the Data Loss Prevention API to update stored infoType
96+
detector by changing the source term list from one stored in Bigquery
97+
to one stored in Cloud Storage.
98+
Args:
99+
project: The Google Cloud project id to use as a parent resource.
100+
stored_info_type_id: The identifier of stored infoType which is to
101+
be updated.
102+
gcs_input_file_path: The url in the format <bucket>/<path_to_file>
103+
for the location of the source term list.
104+
output_bucket_name: The name of the bucket in Google Cloud Storage
105+
where large dictionary is stored.
106+
"""
107+
108+
# Instantiate a client.
109+
dlp = google.cloud.dlp_v2.DlpServiceClient()
110+
111+
# Construct the stored infoType configuration dictionary.
112+
stored_info_type_config = {
113+
"large_custom_dictionary": {
114+
"output_path": {
115+
"path": f"gs://{output_bucket_name}"
116+
},
117+
"cloud_storage_file_set": {
118+
"url": f"gs://{gcs_input_file_path}"
119+
},
120+
}
121+
}
122+
123+
# Set mask to control which fields get updated. For more details, refer
124+
# https://protobuf.dev/reference/protobuf/google.protobuf/#field-mask
125+
# for constructing the field mask paths.
126+
field_mask = {
127+
"paths": ["large_custom_dictionary.cloud_storage_file_set.url"]
128+
}
129+
130+
# Convert the stored infoType id into a full resource id.
131+
stored_info_type_name = f"projects/{project}/storedInfoTypes/{stored_info_type_id}"
132+
133+
# Call the API.
134+
response = dlp.update_stored_info_type(
135+
request={
136+
"name": stored_info_type_name,
137+
"config": stored_info_type_config,
138+
"update_mask": field_mask,
139+
}
140+
)
141+
142+
# Print the result
143+
print(f"Updated stored infoType successfully: {response.name}")
144+
145+
146+
# [END dlp_update_stored_infotype]
147+
148+
149+
if __name__ == "__main__":
150+
default_project = os.environ.get("GOOGLE_CLOUD_PROJECT")
151+
152+
parser = argparse.ArgumentParser(description=__doc__)
153+
subparsers = parser.add_subparsers(
154+
dest="content", help="Select how to submit content to the API."
155+
)
156+
subparsers.required = True
157+
158+
parser_create = subparsers.add_parser("create", help="Creates a stored infoType.")
159+
parser_create.add_argument(
160+
"--project",
161+
help="The Google Cloud project id to use as a parent resource.",
162+
default=default_project,
163+
)
164+
parser_create.add_argument(
165+
"stored_info_type_id",
166+
help="The identifier for large custom dictionary.",
167+
)
168+
parser_create.add_argument(
169+
"output_bucket_name",
170+
help="The name of the bucket in Google Cloud Storage that "
171+
"would store the created dictionary.",
172+
)
173+
174+
parser_update = subparsers.add_parser("update", help="Updates the stored infoType.")
175+
parser_update.add_argument(
176+
"--project",
177+
help="The Google Cloud project id to use as a parent resource.",
178+
default=default_project,
179+
)
180+
parser_update.add_argument(
181+
"stored_info_type_id",
182+
help="The identifier for large custom dictionary.",
183+
)
184+
parser_update.add_argument(
185+
"gcs_input_file_path",
186+
help="The url in the format <bucket>/<path_to_file> for the "
187+
"location of the source term list.",
188+
)
189+
parser_update.add_argument(
190+
"output_bucket_name",
191+
help="The name of the bucket in Google Cloud Storage that "
192+
"would store the created dictionary.",
193+
)
194+
195+
args = parser.parse_args()
196+
197+
if args.content == "create":
198+
create_stored_infotype(
199+
args.project,
200+
args.stored_info_type_id,
201+
args.output_bucket_name,
202+
)
203+
elif args.content == "update":
204+
update_stored_infotype(
205+
args.project,
206+
args.stored_info_type_id,
207+
args.gcs_input_file_path,
208+
args.output_bucket_name,
209+
)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
from typing import Iterator
17+
import uuid
18+
19+
import google.api_core.exceptions
20+
import google.cloud.dlp_v2
21+
import google.cloud.exceptions
22+
import google.cloud.storage
23+
import pytest
24+
25+
import stored_infotype
26+
27+
GCLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
28+
UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]
29+
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING
30+
RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources")
31+
RESOURCE_FILE_NAMES = ["term_list.txt"]
32+
STORED_INFO_TYPE_ID = "github-usernames"
33+
34+
DLP_CLIENT = google.cloud.dlp_v2.DlpServiceClient()
35+
36+
37+
@pytest.fixture(scope="module")
38+
def bucket() -> Iterator[google.cloud.storage.bucket.Bucket]:
39+
# Creates a GCS bucket, uploads files required for the test, and tears down
40+
# the entire bucket afterwards.
41+
42+
client = google.cloud.storage.Client()
43+
try:
44+
bucket = client.get_bucket(TEST_BUCKET_NAME)
45+
except google.cloud.exceptions.NotFound:
46+
bucket = client.create_bucket(TEST_BUCKET_NAME)
47+
48+
# Upload the blobs and keep track of them in a list.
49+
blobs = []
50+
for name in RESOURCE_FILE_NAMES:
51+
path = os.path.join(RESOURCE_DIRECTORY, name)
52+
blob = bucket.blob(name)
53+
blob.upload_from_filename(path)
54+
blobs.append(blob)
55+
56+
# Yield the object to the test; lines after this execute as a teardown.
57+
yield bucket
58+
59+
# Delete the files.
60+
for blob in blobs:
61+
try:
62+
blob.delete()
63+
except google.cloud.exceptions.NotFound:
64+
print("Issue during teardown, missing blob")
65+
66+
bucket.delete(force=True)
67+
68+
69+
def delete_stored_info_type(out: str) -> None:
70+
for line in str(out).split("\n"):
71+
if "Updated stored infoType successfully" in line:
72+
stored_info_type_id = line.split(":")[1].strip()
73+
DLP_CLIENT.delete_stored_info_type(name=stored_info_type_id)
74+
75+
76+
def test_create_and_update_stored_infotype(
77+
bucket: google.cloud.storage.bucket.Bucket, capsys: pytest.CaptureFixture
78+
) -> None:
79+
out = ""
80+
try:
81+
stored_infotype.create_stored_infotype(
82+
GCLOUD_PROJECT,
83+
STORED_INFO_TYPE_ID,
84+
bucket.name,
85+
)
86+
out, _ = capsys.readouterr()
87+
assert STORED_INFO_TYPE_ID in out
88+
89+
stored_info_type_id = str(out).split("\n")[0].split(":")[1].strip()
90+
91+
stored_infotype.update_stored_infotype(
92+
GCLOUD_PROJECT,
93+
STORED_INFO_TYPE_ID,
94+
f"{bucket.name}/{RESOURCE_FILE_NAMES[0]}",
95+
f"{bucket.name}"
96+
)
97+
out, _ = capsys.readouterr()
98+
assert stored_info_type_id in out
99+
finally:
100+
delete_stored_info_type(out)

0 commit comments

Comments
 (0)