|
| 1 | +# Copyright 2017 Google Inc. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""Sample app that sets up Data Loss Prevention API stored infoTypes.""" |
| 16 | + |
| 17 | +from __future__ import print_function |
| 18 | + |
| 19 | +import argparse |
| 20 | +import os |
| 21 | +import time |
| 22 | + |
| 23 | + |
| 24 | +# [START dlp_create_stored_info_type] |
| 25 | +def create_stored_info_type_from_gcs_files(project, gcs_input_file_path, |
| 26 | + gcs_output_path, stored_info_type_id=None, |
| 27 | + display_name=None, description=None): |
| 28 | + """Creates a scheduled Data Loss Prevention API stored infoType from a set |
| 29 | + of GCS files. |
| 30 | + Args: |
| 31 | + project: The Google Cloud project id to use as a parent resource. |
| 32 | + gcs_input_file_path: The path specifying the input files containing the |
| 33 | + dictionary words. |
| 34 | + gcs_output_path: The path specifying where the dictionary data files |
| 35 | + should be stored. |
| 36 | + stored_info_type_id: The id of the stored infoType. If omitted, an id |
| 37 | + will be randomly generated. |
| 38 | + display_name: The optional display name of the stored infoType. |
| 39 | + description: The optional description of the stored infoType. |
| 40 | + Returns: |
| 41 | + None; the response from the API is printed to the terminal. |
| 42 | + """ |
| 43 | + |
| 44 | + # Prepare the dictionary config. |
| 45 | + dictionary_config = { |
| 46 | + 'output_path': {'path': gcs_output_path}, |
| 47 | + 'cloud_storage_file_set': {'url': gcs_input_file_path}, |
| 48 | + } |
| 49 | + create_stored_info_type(project, dictionary_config, |
| 50 | + stored_info_type_id=stored_info_type_id, |
| 51 | + display_name=display_name, description=description) |
| 52 | + |
| 53 | +def create_stored_info_type_from_bq_table(project, bq_input_project_id, |
| 54 | + bq_input_dataset_id, bq_input_table_id, |
| 55 | + bq_input_table_field, gcs_output_path, |
| 56 | + stored_info_type_id=None, display_name=None, |
| 57 | + description=None): |
| 58 | + """Creates a scheduled Data Loss Prevention API stored infoType from a |
| 59 | + column of a BigQuery. |
| 60 | + Args: |
| 61 | + project: The Google Cloud project id to use as a parent resource. |
| 62 | + bq_input_project_id: The id of the project owning the input BigQuery |
| 63 | + table. |
| 64 | + bq_input_dataset_id: The dataset of the input BigQuery table. |
| 65 | + bq_input_table_id: The id of the input BigQuery table. |
| 66 | + bq_input_table_field: The name of the field of the BigQuery table_id |
| 67 | + containing the dictionary words. |
| 68 | + gcs_output_path: The path specifying where the dictionary data files |
| 69 | + should be stored. |
| 70 | + stored_info_type_id: The id of the stored infoType. If omitted, an id |
| 71 | + will be randomly generated. |
| 72 | + display_name: The optional display name of the stored infoType. |
| 73 | + description: The optional description of the stored infoType. |
| 74 | + Returns: |
| 75 | + None; the response from the API is printed to the terminal. |
| 76 | + """ |
| 77 | + |
| 78 | + # Prepare the dictionary config. |
| 79 | + dictionary_config = { |
| 80 | + 'output_path': {'path': gcs_output_path}, |
| 81 | + 'big_query_field': { |
| 82 | + 'table': { |
| 83 | + 'project_id': bq_input_project_id, |
| 84 | + 'dataset_id': bq_input_dataset_id, |
| 85 | + 'table_id': bq_input_table_id, |
| 86 | + }, |
| 87 | + 'field': {'name': bq_input_table_field}, |
| 88 | + } |
| 89 | + } |
| 90 | + create_stored_info_type(project, dictionary_config, |
| 91 | + stored_info_type_id=stored_info_type_id, |
| 92 | + display_name=display_name, description=description) |
| 93 | + |
| 94 | +def create_stored_info_type(project, dictionary_config, |
| 95 | + stored_info_type_id=None, display_name=None, |
| 96 | + description=None): |
| 97 | + """Creates a scheduled Data Loss Prevention API stored infoType from a |
| 98 | + column of a BigQuery. |
| 99 | + Args: |
| 100 | + project: The Google Cloud project id to use as a parent resource. |
| 101 | + dictionary_config: The config for the large custom dictionary. |
| 102 | + stored_info_type_id: The id of the stored infoType. If omitted, an id |
| 103 | + will be randomly generated. |
| 104 | + display_name: The optional display name of the stored infoType. |
| 105 | + description: The optional description of the stored infoType. |
| 106 | + Returns: |
| 107 | + None; the response from the API is printed to the terminal. |
| 108 | + """ |
| 109 | + |
| 110 | + # Import the client library |
| 111 | + import google.cloud.dlp |
| 112 | + |
| 113 | + # Instantiate a client. |
| 114 | + dlp = google.cloud.dlp.DlpServiceClient() |
| 115 | + |
| 116 | + # Create the stored infoType config. |
| 117 | + stored_info_type_config = { |
| 118 | + 'display_name': display_name, |
| 119 | + 'description': description, |
| 120 | + 'large_custom_dictionary': dictionary_config |
| 121 | + } |
| 122 | + |
| 123 | + # Convert the project id into a full resource id. |
| 124 | + parent = dlp.project_path(project) |
| 125 | + |
| 126 | + # Call the API. |
| 127 | + response = dlp.create_stored_info_type( |
| 128 | + parent, config=stored_info_type_config, |
| 129 | + stored_info_type_id=stored_info_type_id) |
| 130 | + |
| 131 | + print('Successfully created stored infoType {}'.format(response.name)) |
| 132 | + |
| 133 | +# [END dlp_create_stored_info_type] |
| 134 | + |
| 135 | + |
| 136 | +# [START dlp_list_stored_info_types] |
| 137 | +def list_stored_info_types(project): |
| 138 | + """Lists all Data Loss Prevention API stored infoTypes. |
| 139 | + Args: |
| 140 | + project: The Google Cloud project id to use as a parent resource. |
| 141 | + Returns: |
| 142 | + None; the response from the API is printed to the terminal. |
| 143 | + """ |
| 144 | + |
| 145 | + # Import the client library |
| 146 | + import google.cloud.dlp |
| 147 | + |
| 148 | + # Instantiate a client. |
| 149 | + dlp = google.cloud.dlp.DlpServiceClient() |
| 150 | + |
| 151 | + # Convert the project id into a full resource id. |
| 152 | + parent = dlp.project_path(project) |
| 153 | + |
| 154 | + # Call the API. |
| 155 | + response = dlp.list_stored_info_types(parent) |
| 156 | + |
| 157 | + # Define a helper function to convert the API's "seconds since the epoch" |
| 158 | + # time format into a human-readable string. |
| 159 | + def human_readable_time(timestamp): |
| 160 | + return str(time.localtime(timestamp.seconds)) |
| 161 | + |
| 162 | + for stored_info_type in response: |
| 163 | + print('Stored infoType {}:'.format(stored_info_type.name)) |
| 164 | + if stored_info_type.current_version: |
| 165 | + version = stored_info_type.current_version |
| 166 | + print(' Current version:') |
| 167 | + print(' Created: {}'.format( |
| 168 | + human_readable_time(version.create_time))) |
| 169 | + print(' State: {}'.format(version.state)) |
| 170 | + print(' Error count: {}'.format(len(version.errors))) |
| 171 | + if stored_info_type.pending_versions: |
| 172 | + print(' Pending versions:') |
| 173 | + for version in stored_info_type.pending_versions: |
| 174 | + print(' Created: {}'.format( |
| 175 | + human_readable_time(version.create_time))) |
| 176 | + print(' State: {}'.format(version.state)) |
| 177 | + print(' Error count: {}'.format(len(version.errors))) |
| 178 | + |
| 179 | +# [END dlp_list_stored_info_types] |
| 180 | + |
| 181 | + |
| 182 | +# [START dlp_delete_stored_info_type] |
| 183 | +def delete_stored_info_type(project, stored_info_type_id): |
| 184 | + """Deletes a Data Loss Prevention API stored infoType. |
| 185 | + Args: |
| 186 | + project: The id of the Google Cloud project which owns the stored |
| 187 | + infoType. |
| 188 | + stored_info_type_id: The id of the stored infoType to delete. |
| 189 | + Returns: |
| 190 | + None; the response from the API is printed to the terminal. |
| 191 | + """ |
| 192 | + |
| 193 | + # Import the client library |
| 194 | + import google.cloud.dlp |
| 195 | + |
| 196 | + # Instantiate a client. |
| 197 | + dlp = google.cloud.dlp.DlpServiceClient() |
| 198 | + |
| 199 | + # Convert the project id into a full resource id. |
| 200 | + parent = dlp.project_path(project) |
| 201 | + |
| 202 | + # Combine the stored infoType id with the parent id. |
| 203 | + stored_info_type_resource = '{}/storedInfoTypes/{}'.format( |
| 204 | + parent, stored_info_type_id) |
| 205 | + |
| 206 | + # Call the API. |
| 207 | + dlp.delete_stored_info_type(stored_info_type_resource) |
| 208 | + |
| 209 | + print('Stored infoType {} successfully deleted.'.format( |
| 210 | + stored_info_type_resource)) |
| 211 | + |
| 212 | +# [END dlp_delete_stored_info_type] |
| 213 | + |
| 214 | + |
| 215 | +if __name__ == '__main__': |
| 216 | + default_project = os.environ.get('GCLOUD_PROJECT') |
| 217 | + |
| 218 | + parser = argparse.ArgumentParser(description=__doc__) |
| 219 | + subparsers = parser.add_subparsers( |
| 220 | + dest='action', help='Select which action to perform.') |
| 221 | + subparsers.required = True |
| 222 | + |
| 223 | + parser_create = subparsers.add_parser('create', |
| 224 | + help='Create a stored infoType.') |
| 225 | + parser_create.add_argument( |
| 226 | + '--gcs_input_file_path', |
| 227 | + help='GCS path of the input files containing the dictionary words.') |
| 228 | + parser_create.add_argument( |
| 229 | + '--bq_input_project_id', |
| 230 | + help='Project of the BigQuery table containing the dictionary words.', |
| 231 | + default=default_project) |
| 232 | + parser_create.add_argument( |
| 233 | + '--bq_input_dataset_id', |
| 234 | + help='Dataset of the BigQuery table containing the dictionary words.') |
| 235 | + parser_create.add_argument( |
| 236 | + '--bq_input_table_id', |
| 237 | + help='ID of the BigQuery table containing the dictionary words.') |
| 238 | + parser_create.add_argument( |
| 239 | + '--bq_input_table_field', |
| 240 | + help='Field of the BigQuery table containing the dictionary words.') |
| 241 | + parser_create.add_argument( |
| 242 | + '--gcs_output_path', |
| 243 | + help='GCS path where the output data files should be stored.') |
| 244 | + parser_create.add_argument( |
| 245 | + '--stored_info_type_id', |
| 246 | + help='The id of the stored infoType. If omitted, an id will be ' |
| 247 | + 'randomly generated') |
| 248 | + parser_create.add_argument( |
| 249 | + '--display_name', |
| 250 | + help='The optional display name of the stored infoType.') |
| 251 | + parser_create.add_argument( |
| 252 | + '--description', |
| 253 | + help='The optional description of the stored infoType.') |
| 254 | + parser_create.add_argument( |
| 255 | + '--project', |
| 256 | + help='The Google Cloud project id to use as a parent resource.', |
| 257 | + default=default_project) |
| 258 | + |
| 259 | + parser_list = subparsers.add_parser('list', |
| 260 | + help='List all stored infoTypes.') |
| 261 | + parser_list.add_argument( |
| 262 | + '--project', |
| 263 | + help='The Google Cloud project id to use as a parent resource.', |
| 264 | + default=default_project) |
| 265 | + |
| 266 | + parser_delete = subparsers.add_parser('delete', |
| 267 | + help='Delete a stored infoType.') |
| 268 | + parser_delete.add_argument( |
| 269 | + 'stored_info_type_id', |
| 270 | + help='The id of the stored infoType to delete.') |
| 271 | + parser_delete.add_argument( |
| 272 | + '--project', |
| 273 | + help='The Google Cloud project id to use as a parent resource.', |
| 274 | + default=default_project) |
| 275 | + |
| 276 | + args = parser.parse_args() |
| 277 | + |
| 278 | + if args.action == 'create': |
| 279 | + if args.gcs_input_file_path: |
| 280 | + create_stored_info_type_from_gcs_files( |
| 281 | + args.project, args.gcs_input_file_path, args.gcs_output_path, |
| 282 | + stored_info_type_id=args.stored_info_type_id, |
| 283 | + display_name=args.display_name, description=args.description |
| 284 | + ) |
| 285 | + else: |
| 286 | + create_stored_info_type_from_bq_table( |
| 287 | + args.project, args.bq_input_project_id, |
| 288 | + args.bq_input_dataset_id, args.bq_input_table_id, |
| 289 | + args.bq_input_table_field, args.gcs_output_path, |
| 290 | + stored_info_type_id=args.stored_info_type_id, |
| 291 | + display_name=args.display_name, description=args.description |
| 292 | + ) |
| 293 | + elif args.action == 'list': |
| 294 | + list_stored_info_types(args.project) |
| 295 | + elif args.action == 'delete': |
| 296 | + delete_stored_info_type(args.project, args.stored_info_type_id) |
0 commit comments