Skip to content

Commit 47fc04f

Browse files
authored
Share logic for building custom info types
1 parent 5258658 commit 47fc04f

File tree

1 file changed

+28
-75
lines changed

1 file changed

+28
-75
lines changed

dlp/inspect_content.py

+28-75
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,8 @@ def inspect_string(project, content_string, info_types,
5353

5454
# Prepare custom_info_types by parsing the dictionary word lists and
5555
# regex patterns.
56-
if custom_dictionaries is None:
57-
custom_dictionaries = []
58-
dictionaries = [{
59-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
60-
'dictionary': {
61-
'word_list': {'words': custom_dictionaries[i].split(',')}
62-
}
63-
} for i in range(len(custom_dictionaries))]
64-
if custom_regexes is None:
65-
custom_regexes = []
66-
regexes = [{
67-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
68-
'regex': {'pattern': custom_regexes[i]}
69-
} for i in range(len(custom_regexes))]
70-
custom_info_types = dictionaries + regexes
56+
custom_info_types = build_custom_info_types(custom_dictionaries,
57+
custom_info_types)
7158

7259
# Construct the configuration dictionary. Keys which are None may
7360
# optionally be omitted entirely.
@@ -141,21 +128,8 @@ def inspect_file(project, filename, info_types, min_likelihood=None,
141128

142129
# Prepare custom_info_types by parsing the dictionary word lists and
143130
# regex patterns.
144-
if custom_dictionaries is None:
145-
custom_dictionaries = []
146-
dictionaries = [{
147-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
148-
'dictionary': {
149-
'word_list': {'words': custom_dictionaries[i].split(',')}
150-
}
151-
} for i in range(len(custom_dictionaries))]
152-
if custom_regexes is None:
153-
custom_regexes = []
154-
regexes = [{
155-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
156-
'regex': {'pattern': custom_regexes[i]}
157-
} for i in range(len(custom_regexes))]
158-
custom_info_types = dictionaries + regexes
131+
custom_info_types = build_custom_info_types(custom_dictionaries,
132+
custom_regexes)
159133

160134
# Construct the configuration dictionary. Keys which are None may
161135
# optionally be omitted entirely.
@@ -254,21 +228,8 @@ def inspect_gcs_file(project, bucket, filename, topic_id, subscription_id,
254228

255229
# Prepare custom_info_types by parsing the dictionary word lists and
256230
# regex patterns.
257-
if custom_dictionaries is None:
258-
custom_dictionaries = []
259-
dictionaries = [{
260-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
261-
'dictionary': {
262-
'word_list': {'words': custom_dictionaries[i].split(',')}
263-
}
264-
} for i in range(len(custom_dictionaries))]
265-
if custom_regexes is None:
266-
custom_regexes = []
267-
regexes = [{
268-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
269-
'regex': {'pattern': custom_regexes[i]}
270-
} for i in range(len(custom_regexes))]
271-
custom_info_types = dictionaries + regexes
231+
custom_info_types = build_custom_info_types(custom_dictionaries,
232+
custom_regexes)
272233

273234
# Construct the configuration dictionary. Keys which are None may
274235
# optionally be omitted entirely.
@@ -400,21 +361,8 @@ def inspect_datastore(project, datastore_project, kind,
400361

401362
# Prepare custom_info_types by parsing the dictionary word lists and
402363
# regex patterns.
403-
if custom_dictionaries is None:
404-
custom_dictionaries = []
405-
dictionaries = [{
406-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
407-
'dictionary': {
408-
'word_list': {'words': custom_dictionaries[i].split(',')}
409-
}
410-
} for i in range(len(custom_dictionaries))]
411-
if custom_regexes is None:
412-
custom_regexes = []
413-
regexes = [{
414-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
415-
'regex': {'pattern': custom_regexes[i]}
416-
} for i in range(len(custom_regexes))]
417-
custom_info_types = dictionaries + regexes
364+
custom_info_types = build_custom_info_types(custom_dictionaries,
365+
custom_regexes)
418366

419367
# Construct the configuration dictionary. Keys which are None may
420368
# optionally be omitted entirely.
@@ -551,21 +499,8 @@ def inspect_bigquery(project, bigquery_project, dataset_id, table_id,
551499

552500
# Prepare custom_info_types by parsing the dictionary word lists and
553501
# regex patterns.
554-
if custom_dictionaries is None:
555-
custom_dictionaries = []
556-
dictionaries = [{
557-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
558-
'dictionary': {
559-
'word_list': {'words': custom_dictionaries[i].split(',')}
560-
}
561-
} for i in range(len(custom_dictionaries))]
562-
if custom_regexes is None:
563-
custom_regexes = []
564-
regexes = [{
565-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
566-
'regex': {'pattern': custom_regexes[i]}
567-
} for i in range(len(custom_regexes))]
568-
custom_info_types = dictionaries + regexes
502+
custom_info_types = build_custom_info_types(custom_dictionaries,
503+
custom_regexes)
569504

570505
# Construct the configuration dictionary. Keys which are None may
571506
# optionally be omitted entirely.
@@ -651,6 +586,24 @@ def callback(message):
651586
# [END dlp_inspect_bigquery]
652587

653588

589+
def build_custom_info_types(custom_dictionaries, custom_regexes):
590+
if custom_dictionaries is None:
591+
custom_dictionaries = []
592+
dictionaries = [{
593+
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
594+
'dictionary': {
595+
'word_list': {'words': custom_dictionaries[i].split(',')}
596+
}
597+
} for i in range(len(custom_dictionaries))]
598+
if custom_regexes is None:
599+
custom_regexes = []
600+
regexes = [{
601+
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
602+
'regex': {'pattern': custom_regexes[i]}
603+
} for i in range(len(custom_regexes))]
604+
return dictionaries + regexes
605+
606+
654607
if __name__ == '__main__':
655608
default_project = os.environ.get('GCLOUD_PROJECT')
656609

0 commit comments

Comments
 (0)