From 9c95491bb4ca753ad2f5898cb0a38e4afca4e1b3 Mon Sep 17 00:00:00 2001 From: mwdaub Date: Thu, 20 Dec 2018 15:28:21 -0800 Subject: [PATCH] samples: Added custom dictionary and regex code samples (#204) --- dlp/inspect.js | 73 ++++++++++++++++++++++++++++++--- dlp/system-test/inspect.test.js | 38 +++++++++++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/dlp/inspect.js b/dlp/inspect.js index a9bf5de752..28190f2c7b 100644 --- a/dlp/inspect.js +++ b/dlp/inspect.js @@ -21,6 +21,7 @@ async function inspectString( minLikelihood, maxFindings, infoTypes, + customInfoTypes, includeQuote ) { // [START dlp_inspect_string] @@ -45,6 +46,10 @@ async function inspectString( // The infoTypes of information to match // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + // The customInfoTypes of information to match + // const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + // Whether to include the matching string // const includeQuote = true; @@ -56,6 +61,7 @@ async function inspectString( parent: dlp.projectPath(callingProjectId), inspectConfig: { infoTypes: infoTypes, + customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, includeQuote: includeQuote, limits: { @@ -94,6 +100,7 @@ async function inspectFile( minLikelihood, maxFindings, infoTypes, + customInfoTypes, includeQuote ) { // [START dlp_inspect_file] @@ -122,6 +129,10 @@ async function inspectFile( // The infoTypes of information to match // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + // The customInfoTypes of information to match + // const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + // Whether to include the matching string // const includeQuote = true; @@ -143,6 +154,7 @@ async function inspectFile( parent: dlp.projectPath(callingProjectId), inspectConfig: { infoTypes: infoTypes, + customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, includeQuote: includeQuote, limits: { @@ -182,7 +194,8 @@ async function inspectGCSFile( subscriptionId, minLikelihood, maxFindings, - infoTypes + infoTypes, + customInfoTypes ) { // [START dlp_inspect_gcs] // Import the Google Cloud client libraries @@ -212,6 +225,10 @@ async function inspectGCSFile( // The infoTypes of information to match // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + // The customInfoTypes of information to match + // const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + // The name of the Pub/Sub topic to notify once the job completes // TODO(developer): create a Pub/Sub topic to use for this // const topicId = 'MY-PUBSUB-TOPIC' @@ -234,6 +251,7 @@ async function inspectGCSFile( inspectJob: { inspectConfig: { infoTypes: infoTypes, + customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, limits: { maxFindingsPerRequest: maxFindings, @@ -316,7 +334,8 @@ async function inspectDatastore( subscriptionId, minLikelihood, maxFindings, - infoTypes + infoTypes, + customInfoTypes ) { // [START dlp_inspect_datastore] // Import the Google Cloud client libraries @@ -350,6 +369,10 @@ async function inspectDatastore( // The infoTypes of information to match // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + // The customInfoTypes of information to match + // const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + // The name of the Pub/Sub topic to notify once the job completes // TODO(developer): create a Pub/Sub topic to use for this // const topicId = 'MY-PUBSUB-TOPIC' @@ -378,6 +401,7 @@ async function inspectDatastore( inspectJob: { inspectConfig: { infoTypes: infoTypes, + customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, limits: { maxFindingsPerRequest: maxFindings, @@ -458,7 +482,8 @@ async function inspectBigquery( subscriptionId, minLikelihood, maxFindings, - infoTypes + infoTypes, + customInfoTypes ) { // [START dlp_inspect_bigquery] // Import the Google Cloud client libraries @@ -491,6 +516,10 @@ async function inspectBigquery( // The infoTypes of information to match // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + // The customInfoTypes of information to match + // const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + // The name of the Pub/Sub topic to notify once the job completes // TODO(developer): create a Pub/Sub topic to use for this // const topicId = 'MY-PUBSUB-TOPIC' @@ -517,6 +546,7 @@ async function inspectBigquery( inspectJob: { inspectConfig: { infoTypes: infoTypes, + customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, limits: { maxFindingsPerRequest: maxFindings, @@ -602,6 +632,7 @@ const cli = require(`yargs`) // eslint-disable-line opts.minLikelihood, opts.maxFindings, opts.infoTypes, + opts.customDictionaries.concat(opts.customRegexes), opts.includeQuote ) ) @@ -616,6 +647,7 @@ const cli = require(`yargs`) // eslint-disable-line opts.minLikelihood, opts.maxFindings, opts.infoTypes, + opts.customDictionaries.concat(opts.customRegexes), opts.includeQuote ) ) @@ -632,7 +664,8 @@ const cli = require(`yargs`) // eslint-disable-line opts.subscriptionId, opts.minLikelihood, opts.maxFindings, - opts.infoTypes + opts.infoTypes, + opts.customDictionaries.concat(opts.customRegexes) ) ) .command( @@ -649,7 +682,8 @@ const cli = require(`yargs`) // eslint-disable-line opts.subscriptionId, opts.minLikelihood, opts.maxFindings, - opts.infoTypes + opts.infoTypes, + opts.customDictionaries.concat(opts.customRegexes) ); } ) @@ -673,7 +707,8 @@ const cli = require(`yargs`) // eslint-disable-line opts.subscriptionId, opts.minLikelihood, opts.maxFindings, - opts.infoTypes + opts.infoTypes, + opts.customDictionaries.concat(opts.customRegexes) ) ) .option('m', { @@ -722,6 +757,32 @@ const cli = require(`yargs`) // eslint-disable-line return {name: type}; }), }) + .option('d', { + alias: 'customDictionaries', + default: [], + type: 'array', + global: true, + coerce: customDictionaries => + customDictionaries.map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }), + }) + .option('r', { + alias: 'customRegexes', + default: [], + type: 'array', + global: true, + coerce: customRegexes => + customRegexes.map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }), + }) .option('n', { alias: 'notificationTopic', type: 'string', diff --git a/dlp/system-test/inspect.test.js b/dlp/system-test/inspect.test.js index f604950a94..648210e203 100644 --- a/dlp/system-test/inspect.test.js +++ b/dlp/system-test/inspect.test.js @@ -56,6 +56,25 @@ it('should inspect a string', async () => { assert.strictEqual(new RegExp(/Info type: EMAIL_ADDRESS/).test(output), true); }); +it('should inspect a string with custom dictionary', async () => { + const output = await tools.runAsync( + `${cmd} string "I'm Gary and my email is gary@example.com" -d "Gary,email"`, + cwd + ); + assert.strictEqual(new RegExp(/Info type: CUSTOM_DICT_0/).test(output), true); +}); + +it('should inspect a string with custom regex', async () => { + const output = await tools.runAsync( + `${cmd} string "I'm Gary and my email is gary@example.com" -r "gary@example\\.com"`, + cwd + ); + assert.strictEqual( + new RegExp(/Info type: CUSTOM_REGEX_0/).test(output), + true + ); +}); + it('should handle a string with no sensitive data', async () => { const output = await tools.runAsync(`${cmd} string "foo"`, cwd); assert.strictEqual(output, 'No findings.'); @@ -76,6 +95,25 @@ it('should inspect a local text file', async () => { assert.strictEqual(new RegExp(/Info type: EMAIL_ADDRESS/).test(output), true); }); +it('should inspect a local text file with custom dictionary', async () => { + const output = await tools.runAsync( + `${cmd} file resources/test.txt -d "gary@somedomain.com"`, + cwd + ); + assert.strictEqual(new RegExp(/Info type: CUSTOM_DICT_0/).test(output), true); +}); + +it('should inspect a local text file with custom regex', async () => { + const output = await tools.runAsync( + `${cmd} file resources/test.txt -r "\\(\\d{3}\\) \\d{3}-\\d{4}"`, + cwd + ); + assert.strictEqual( + new RegExp(/Info type: CUSTOM_REGEX_0/).test(output), + true + ); +}); + it('should inspect a local image file', async () => { const output = await tools.runAsync(`${cmd} file resources/test.png`, cwd); assert.strictEqual(new RegExp(/Info type: EMAIL_ADDRESS/).test(output), true);