From 47fc04f74c77db3bd5397459cf9242dc11521c37 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Thu, 14 Jun 2018 10:51:15 -0700 Subject: [PATCH] Share logic for building custom info types --- dlp/inspect_content.py | 103 +++++++++++------------------------------ 1 file changed, 28 insertions(+), 75 deletions(-) diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index acdd7f3f274b..b2da99c4f5c4 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -53,21 +53,8 @@ def inspect_string(project, content_string, info_types, # Prepare custom_info_types by parsing the dictionary word lists and # regex patterns. - if custom_dictionaries is None: - custom_dictionaries = [] - dictionaries = [{ - 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, - 'dictionary': { - 'word_list': {'words': custom_dictionaries[i].split(',')} - } - } for i in range(len(custom_dictionaries))] - if custom_regexes is None: - custom_regexes = [] - regexes = [{ - 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, - 'regex': {'pattern': custom_regexes[i]} - } for i in range(len(custom_regexes))] - custom_info_types = dictionaries + regexes + custom_info_types = build_custom_info_types(custom_dictionaries, + custom_info_types) # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. @@ -141,21 +128,8 @@ def inspect_file(project, filename, info_types, min_likelihood=None, # Prepare custom_info_types by parsing the dictionary word lists and # regex patterns. - if custom_dictionaries is None: - custom_dictionaries = [] - dictionaries = [{ - 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, - 'dictionary': { - 'word_list': {'words': custom_dictionaries[i].split(',')} - } - } for i in range(len(custom_dictionaries))] - if custom_regexes is None: - custom_regexes = [] - regexes = [{ - 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, - 'regex': {'pattern': custom_regexes[i]} - } for i in range(len(custom_regexes))] - custom_info_types = dictionaries + regexes + custom_info_types = build_custom_info_types(custom_dictionaries, + custom_regexes) # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. @@ -254,21 +228,8 @@ def inspect_gcs_file(project, bucket, filename, topic_id, subscription_id, # Prepare custom_info_types by parsing the dictionary word lists and # regex patterns. - if custom_dictionaries is None: - custom_dictionaries = [] - dictionaries = [{ - 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, - 'dictionary': { - 'word_list': {'words': custom_dictionaries[i].split(',')} - } - } for i in range(len(custom_dictionaries))] - if custom_regexes is None: - custom_regexes = [] - regexes = [{ - 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, - 'regex': {'pattern': custom_regexes[i]} - } for i in range(len(custom_regexes))] - custom_info_types = dictionaries + regexes + custom_info_types = build_custom_info_types(custom_dictionaries, + custom_regexes) # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. @@ -400,21 +361,8 @@ def inspect_datastore(project, datastore_project, kind, # Prepare custom_info_types by parsing the dictionary word lists and # regex patterns. - if custom_dictionaries is None: - custom_dictionaries = [] - dictionaries = [{ - 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, - 'dictionary': { - 'word_list': {'words': custom_dictionaries[i].split(',')} - } - } for i in range(len(custom_dictionaries))] - if custom_regexes is None: - custom_regexes = [] - regexes = [{ - 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, - 'regex': {'pattern': custom_regexes[i]} - } for i in range(len(custom_regexes))] - custom_info_types = dictionaries + regexes + custom_info_types = build_custom_info_types(custom_dictionaries, + custom_regexes) # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. @@ -551,21 +499,8 @@ def inspect_bigquery(project, bigquery_project, dataset_id, table_id, # Prepare custom_info_types by parsing the dictionary word lists and # regex patterns. - if custom_dictionaries is None: - custom_dictionaries = [] - dictionaries = [{ - 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, - 'dictionary': { - 'word_list': {'words': custom_dictionaries[i].split(',')} - } - } for i in range(len(custom_dictionaries))] - if custom_regexes is None: - custom_regexes = [] - regexes = [{ - 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, - 'regex': {'pattern': custom_regexes[i]} - } for i in range(len(custom_regexes))] - custom_info_types = dictionaries + regexes + custom_info_types = build_custom_info_types(custom_dictionaries, + custom_regexes) # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. @@ -651,6 +586,24 @@ def callback(message): # [END dlp_inspect_bigquery] +def build_custom_info_types(custom_dictionaries, custom_regexes): + if custom_dictionaries is None: + custom_dictionaries = [] + dictionaries = [{ + 'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)}, + 'dictionary': { + 'word_list': {'words': custom_dictionaries[i].split(',')} + } + } for i in range(len(custom_dictionaries))] + if custom_regexes is None: + custom_regexes = [] + regexes = [{ + 'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)}, + 'regex': {'pattern': custom_regexes[i]} + } for i in range(len(custom_regexes))] + return dictionaries + regexes + + if __name__ == '__main__': default_project = os.environ.get('GCLOUD_PROJECT')