Skip to content

Commit 748dcb1

Browse files
committed
Added code samples for stored infoTypes.
The samples were modeled after triggers.py and adapted to stored infoTypes. Note that this does not include code samples for actually using them - those will be added later.
1 parent 828fac6 commit 748dcb1

File tree

3 files changed

+397
-0
lines changed

3 files changed

+397
-0
lines changed

dlp/resources/dictionary.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(223) 456-7890
2+
gary@somedomain.com

dlp/stored_info_types.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Sample app that sets up Data Loss Prevention API stored infoTypes."""
16+
17+
from __future__ import print_function
18+
19+
import argparse
20+
import os
21+
import time
22+
23+
24+
# [START dlp_create_stored_info_type]
25+
def create_stored_info_type_from_gcs_files(project, gcs_input_file_path,
26+
gcs_output_path, stored_info_type_id=None,
27+
display_name=None, description=None):
28+
"""Creates a scheduled Data Loss Prevention API stored infoType from a set
29+
of GCS files.
30+
Args:
31+
project: The Google Cloud project id to use as a parent resource.
32+
gcs_input_file_path: The path specifying the input files containing the
33+
dictionary words.
34+
gcs_output_path: The path specifying where the dictionary data files
35+
should be stored.
36+
stored_info_type_id: The id of the stored infoType. If omitted, an id
37+
will be randomly generated.
38+
display_name: The optional display name of the stored infoType.
39+
description: The optional description of the stored infoType.
40+
Returns:
41+
None; the response from the API is printed to the terminal.
42+
"""
43+
44+
# Prepare the dictionary config.
45+
dictionary_config = {
46+
'output_path': {'path': gcs_output_path},
47+
'cloud_storage_file_set': {'url': gcs_input_file_path},
48+
}
49+
create_stored_info_type(project, dictionary_config,
50+
stored_info_type_id=stored_info_type_id,
51+
display_name=display_name, description=description)
52+
53+
def create_stored_info_type_from_bq_table(project, bq_input_project_id,
54+
bq_input_dataset_id, bq_input_table_id,
55+
bq_input_table_field, gcs_output_path,
56+
stored_info_type_id=None, display_name=None,
57+
description=None):
58+
"""Creates a scheduled Data Loss Prevention API stored infoType from a
59+
column of a BigQuery.
60+
Args:
61+
project: The Google Cloud project id to use as a parent resource.
62+
bq_input_project_id: The id of the project owning the input BigQuery
63+
table.
64+
bq_input_dataset_id: The dataset of the input BigQuery table.
65+
bq_input_table_id: The id of the input BigQuery table.
66+
bq_input_table_field: The name of the field of the BigQuery table_id
67+
containing the dictionary words.
68+
gcs_output_path: The path specifying where the dictionary data files
69+
should be stored.
70+
stored_info_type_id: The id of the stored infoType. If omitted, an id
71+
will be randomly generated.
72+
display_name: The optional display name of the stored infoType.
73+
description: The optional description of the stored infoType.
74+
Returns:
75+
None; the response from the API is printed to the terminal.
76+
"""
77+
78+
# Prepare the dictionary config.
79+
dictionary_config = {
80+
'output_path': {'path': gcs_output_path},
81+
'big_query_field': {
82+
'table': {
83+
'project_id': bq_input_project_id,
84+
'dataset_id': bq_input_dataset_id,
85+
'table_id': bq_input_table_id,
86+
},
87+
'field': {'name': bq_input_table_field},
88+
}
89+
}
90+
create_stored_info_type(project, dictionary_config,
91+
stored_info_type_id=stored_info_type_id,
92+
display_name=display_name, description=description)
93+
94+
def create_stored_info_type(project, dictionary_config,
95+
stored_info_type_id=None, display_name=None,
96+
description=None):
97+
"""Creates a scheduled Data Loss Prevention API stored infoType from a
98+
column of a BigQuery.
99+
Args:
100+
project: The Google Cloud project id to use as a parent resource.
101+
dictionary_config: The config for the large custom dictionary.
102+
stored_info_type_id: The id of the stored infoType. If omitted, an id
103+
will be randomly generated.
104+
display_name: The optional display name of the stored infoType.
105+
description: The optional description of the stored infoType.
106+
Returns:
107+
None; the response from the API is printed to the terminal.
108+
"""
109+
110+
# Import the client library
111+
import google.cloud.dlp
112+
113+
# Instantiate a client.
114+
dlp = google.cloud.dlp.DlpServiceClient()
115+
116+
# Create the stored infoType config.
117+
stored_info_type_config = {
118+
'display_name': display_name,
119+
'description': description,
120+
'large_custom_dictionary': dictionary_config
121+
}
122+
123+
# Convert the project id into a full resource id.
124+
parent = dlp.project_path(project)
125+
126+
# Call the API.
127+
response = dlp.create_stored_info_type(
128+
parent, config=stored_info_type_config,
129+
stored_info_type_id=stored_info_type_id)
130+
131+
print('Successfully created stored infoType {}'.format(response.name))
132+
133+
# [END dlp_create_stored_info_type]
134+
135+
136+
# [START dlp_list_stored_info_types]
137+
def list_stored_info_types(project):
138+
"""Lists all Data Loss Prevention API stored infoTypes.
139+
Args:
140+
project: The Google Cloud project id to use as a parent resource.
141+
Returns:
142+
None; the response from the API is printed to the terminal.
143+
"""
144+
145+
# Import the client library
146+
import google.cloud.dlp
147+
148+
# Instantiate a client.
149+
dlp = google.cloud.dlp.DlpServiceClient()
150+
151+
# Convert the project id into a full resource id.
152+
parent = dlp.project_path(project)
153+
154+
# Call the API.
155+
response = dlp.list_stored_info_types(parent)
156+
157+
# Define a helper function to convert the API's "seconds since the epoch"
158+
# time format into a human-readable string.
159+
def human_readable_time(timestamp):
160+
return str(time.localtime(timestamp.seconds))
161+
162+
for stored_info_type in response:
163+
print('Stored infoType {}:'.format(stored_info_type.name))
164+
if stored_info_type.current_version:
165+
version = stored_info_type.current_version
166+
print(' Current version:')
167+
print(' Created: {}'.format(
168+
human_readable_time(version.create_time)))
169+
print(' State: {}'.format(version.state))
170+
print(' Error count: {}'.format(len(version.errors)))
171+
if stored_info_type.pending_versions:
172+
print(' Pending versions:')
173+
for version in stored_info_type.pending_versions:
174+
print(' Created: {}'.format(
175+
human_readable_time(version.create_time)))
176+
print(' State: {}'.format(version.state))
177+
print(' Error count: {}'.format(len(version.errors)))
178+
179+
# [END dlp_list_stored_info_types]
180+
181+
182+
# [START dlp_delete_stored_info_type]
183+
def delete_stored_info_type(project, stored_info_type_id):
184+
"""Deletes a Data Loss Prevention API stored infoType.
185+
Args:
186+
project: The id of the Google Cloud project which owns the stored
187+
infoType.
188+
stored_info_type_id: The id of the stored infoType to delete.
189+
Returns:
190+
None; the response from the API is printed to the terminal.
191+
"""
192+
193+
# Import the client library
194+
import google.cloud.dlp
195+
196+
# Instantiate a client.
197+
dlp = google.cloud.dlp.DlpServiceClient()
198+
199+
# Convert the project id into a full resource id.
200+
parent = dlp.project_path(project)
201+
202+
# Combine the stored infoType id with the parent id.
203+
stored_info_type_resource = '{}/storedInfoTypes/{}'.format(
204+
parent, stored_info_type_id)
205+
206+
# Call the API.
207+
dlp.delete_stored_info_type(stored_info_type_resource)
208+
209+
print('Stored infoType {} successfully deleted.'.format(
210+
stored_info_type_resource))
211+
212+
# [END dlp_delete_stored_info_type]
213+
214+
215+
if __name__ == '__main__':
216+
default_project = os.environ.get('GCLOUD_PROJECT')
217+
218+
parser = argparse.ArgumentParser(description=__doc__)
219+
subparsers = parser.add_subparsers(
220+
dest='action', help='Select which action to perform.')
221+
subparsers.required = True
222+
223+
parser_create = subparsers.add_parser('create',
224+
help='Create a stored infoType.')
225+
parser_create.add_argument(
226+
'--gcs_input_file_path',
227+
help='GCS path of the input files containing the dictionary words.')
228+
parser_create.add_argument(
229+
'--bq_input_project_id',
230+
help='Project of the BigQuery table containing the dictionary words.',
231+
default=default_project)
232+
parser_create.add_argument(
233+
'--bq_input_dataset_id',
234+
help='Dataset of the BigQuery table containing the dictionary words.')
235+
parser_create.add_argument(
236+
'--bq_input_table_id',
237+
help='ID of the BigQuery table containing the dictionary words.')
238+
parser_create.add_argument(
239+
'--bq_input_table_field',
240+
help='Field of the BigQuery table containing the dictionary words.')
241+
parser_create.add_argument(
242+
'--gcs_output_path',
243+
help='GCS path where the output data files should be stored.')
244+
parser_create.add_argument(
245+
'--stored_info_type_id',
246+
help='The id of the stored infoType. If omitted, an id will be '
247+
'randomly generated')
248+
parser_create.add_argument(
249+
'--display_name',
250+
help='The optional display name of the stored infoType.')
251+
parser_create.add_argument(
252+
'--description',
253+
help='The optional description of the stored infoType.')
254+
parser_create.add_argument(
255+
'--project',
256+
help='The Google Cloud project id to use as a parent resource.',
257+
default=default_project)
258+
259+
parser_list = subparsers.add_parser('list',
260+
help='List all stored infoTypes.')
261+
parser_list.add_argument(
262+
'--project',
263+
help='The Google Cloud project id to use as a parent resource.',
264+
default=default_project)
265+
266+
parser_delete = subparsers.add_parser('delete',
267+
help='Delete a stored infoType.')
268+
parser_delete.add_argument(
269+
'stored_info_type_id',
270+
help='The id of the stored infoType to delete.')
271+
parser_delete.add_argument(
272+
'--project',
273+
help='The Google Cloud project id to use as a parent resource.',
274+
default=default_project)
275+
276+
args = parser.parse_args()
277+
278+
if args.action == 'create':
279+
if args.gcs_input_file_path:
280+
create_stored_info_type_from_gcs_files(
281+
args.project, args.gcs_input_file_path, args.gcs_output_path,
282+
stored_info_type_id=args.stored_info_type_id,
283+
display_name=args.display_name, description=args.description
284+
)
285+
else:
286+
create_stored_info_type_from_bq_table(
287+
args.project, args.bq_input_project_id,
288+
args.bq_input_dataset_id, args.bq_input_table_id,
289+
args.bq_input_table_field, args.gcs_output_path,
290+
stored_info_type_id=args.stored_info_type_id,
291+
display_name=args.display_name, description=args.description
292+
)
293+
elif args.action == 'list':
294+
list_stored_info_types(args.project)
295+
elif args.action == 'delete':
296+
delete_stored_info_type(args.project, args.stored_info_type_id)

0 commit comments

Comments
 (0)