@@ -53,21 +53,8 @@ def inspect_string(project, content_string, info_types,
5353
5454 # Prepare custom_info_types by parsing the dictionary word lists and
5555 # regex patterns.
56- if custom_dictionaries is None :
57- custom_dictionaries = []
58- dictionaries = [{
59- 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
60- 'dictionary' : {
61- 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
62- }
63- } for i in range (len (custom_dictionaries ))]
64- if custom_regexes is None :
65- custom_regexes = []
66- regexes = [{
67- 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
68- 'regex' : {'pattern' : custom_regexes [i ]}
69- } for i in range (len (custom_regexes ))]
70- custom_info_types = dictionaries + regexes
56+ custom_info_types = build_custom_info_types (custom_dictionaries ,
57+ custom_info_types )
7158
7259 # Construct the configuration dictionary. Keys which are None may
7360 # optionally be omitted entirely.
@@ -141,21 +128,8 @@ def inspect_file(project, filename, info_types, min_likelihood=None,
141128
142129 # Prepare custom_info_types by parsing the dictionary word lists and
143130 # regex patterns.
144- if custom_dictionaries is None :
145- custom_dictionaries = []
146- dictionaries = [{
147- 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
148- 'dictionary' : {
149- 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
150- }
151- } for i in range (len (custom_dictionaries ))]
152- if custom_regexes is None :
153- custom_regexes = []
154- regexes = [{
155- 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
156- 'regex' : {'pattern' : custom_regexes [i ]}
157- } for i in range (len (custom_regexes ))]
158- custom_info_types = dictionaries + regexes
131+ custom_info_types = build_custom_info_types (custom_dictionaries ,
132+ custom_regexes )
159133
160134 # Construct the configuration dictionary. Keys which are None may
161135 # optionally be omitted entirely.
@@ -254,21 +228,8 @@ def inspect_gcs_file(project, bucket, filename, topic_id, subscription_id,
254228
255229 # Prepare custom_info_types by parsing the dictionary word lists and
256230 # regex patterns.
257- if custom_dictionaries is None :
258- custom_dictionaries = []
259- dictionaries = [{
260- 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
261- 'dictionary' : {
262- 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
263- }
264- } for i in range (len (custom_dictionaries ))]
265- if custom_regexes is None :
266- custom_regexes = []
267- regexes = [{
268- 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
269- 'regex' : {'pattern' : custom_regexes [i ]}
270- } for i in range (len (custom_regexes ))]
271- custom_info_types = dictionaries + regexes
231+ custom_info_types = build_custom_info_types (custom_dictionaries ,
232+ custom_regexes )
272233
273234 # Construct the configuration dictionary. Keys which are None may
274235 # optionally be omitted entirely.
@@ -400,21 +361,8 @@ def inspect_datastore(project, datastore_project, kind,
400361
401362 # Prepare custom_info_types by parsing the dictionary word lists and
402363 # regex patterns.
403- if custom_dictionaries is None :
404- custom_dictionaries = []
405- dictionaries = [{
406- 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
407- 'dictionary' : {
408- 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
409- }
410- } for i in range (len (custom_dictionaries ))]
411- if custom_regexes is None :
412- custom_regexes = []
413- regexes = [{
414- 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
415- 'regex' : {'pattern' : custom_regexes [i ]}
416- } for i in range (len (custom_regexes ))]
417- custom_info_types = dictionaries + regexes
364+ custom_info_types = build_custom_info_types (custom_dictionaries ,
365+ custom_regexes )
418366
419367 # Construct the configuration dictionary. Keys which are None may
420368 # optionally be omitted entirely.
@@ -551,21 +499,8 @@ def inspect_bigquery(project, bigquery_project, dataset_id, table_id,
551499
552500 # Prepare custom_info_types by parsing the dictionary word lists and
553501 # regex patterns.
554- if custom_dictionaries is None :
555- custom_dictionaries = []
556- dictionaries = [{
557- 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
558- 'dictionary' : {
559- 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
560- }
561- } for i in range (len (custom_dictionaries ))]
562- if custom_regexes is None :
563- custom_regexes = []
564- regexes = [{
565- 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
566- 'regex' : {'pattern' : custom_regexes [i ]}
567- } for i in range (len (custom_regexes ))]
568- custom_info_types = dictionaries + regexes
502+ custom_info_types = build_custom_info_types (custom_dictionaries ,
503+ custom_regexes )
569504
570505 # Construct the configuration dictionary. Keys which are None may
571506 # optionally be omitted entirely.
@@ -651,6 +586,24 @@ def callback(message):
651586# [END dlp_inspect_bigquery]
652587
653588
589+ def build_custom_info_types (custom_dictionaries , custom_regexes ):
590+ if custom_dictionaries is None :
591+ custom_dictionaries = []
592+ dictionaries = [{
593+ 'info_type' : {'name' : 'CUSTOM_DICTIONARY_{}' .format (i )},
594+ 'dictionary' : {
595+ 'word_list' : {'words' : custom_dictionaries [i ].split (',' )}
596+ }
597+ } for i in range (len (custom_dictionaries ))]
598+ if custom_regexes is None :
599+ custom_regexes = []
600+ regexes = [{
601+ 'info_type' : {'name' : 'CUSTOM_REGEX_{}' .format (i )},
602+ 'regex' : {'pattern' : custom_regexes [i ]}
603+ } for i in range (len (custom_regexes ))]
604+ return dictionaries + regexes
605+
606+
654607if __name__ == '__main__' :
655608 default_project = os .environ .get ('GCLOUD_PROJECT' )
656609
0 commit comments