Skip to content

Commit f1e9dd8

Browse files
jmorrisedzlier-gcp
authored andcommitted
add replace with info type examples, and update README (GoogleCloudPlatform#1299)
1 parent 1e34f80 commit f1e9dd8

File tree

3 files changed

+113
-24
lines changed

3 files changed

+113
-24
lines changed

dlp/README.md

+26-23
Original file line numberDiff line numberDiff line change
@@ -44,43 +44,46 @@ Note: image scanning is not currently supported on Google Cloud Storage.
4444
For more information, refer to the [API documentation](https://cloud.google.com/dlp/docs).
4545
Optional flags are explained in [this resource](https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/inspect#InspectConfig).
4646
```
47-
Commands:
48-
-s <string> Inspect a string using the Data Loss Prevention API.
49-
-f <filepath> Inspects a local text, PNG, or JPEG file using the Data Loss Prevention API.
50-
-gcs -bucketName <bucketName> -fileName <fileName> Inspects a text file stored on Google Cloud Storage using the Data Loss
51-
Prevention API.
52-
-ds -projectId [projectId] -namespace [namespace] - kind <kind> Inspect a Datastore instance using the Data Loss Prevention API.
53-
54-
Options:
55-
--help Show help
56-
-minLikelihood [string] [choices: "LIKELIHOOD_UNSPECIFIED", "VERY_UNLIKELY", "UNLIKELY", "POSSIBLE", "LIKELY", "VERY_LIKELY"]
57-
[default: "LIKELIHOOD_UNSPECIFIED"]
58-
specifies the minimum reporting likelihood threshold.
59-
-f, --maxFindings [number] [default: 0]
60-
maximum number of results to retrieve
61-
-q, --includeQuote [boolean] [default: true] include matching string in results
62-
-t, --infoTypes set of infoTypes to search for [eg. PHONE_NUMBER US_PASSPORT]
63-
-customDictionaries set of comma-separated dictionary words to search for as customInfoTypes
64-
-customRegexes set of regex patterns to search for as customInfoTypes
47+
usage: com.example.dlp.Inspect
48+
-bq,--Google BigQuery inspect BigQuery table
49+
-bucketName <arg>
50+
-customDictionaries <arg>
51+
-customRegexes <arg>
52+
-datasetId <arg>
53+
-ds,--Google Datastore inspect Datastore kind
54+
-f,--file path <arg> inspect input file path
55+
-fileName <arg>
56+
-gcs,--Google Cloud Storage inspect GCS file
57+
-includeQuote <arg>
58+
-infoTypes <arg>
59+
-kind <arg>
60+
-maxFindings <arg>
61+
-minLikelihood <arg>
62+
-namespace <arg>
63+
-projectId <arg>
64+
-s,--string <arg> inspect string
65+
-subscriptionId <arg>
66+
-tableId <arg>
67+
-topicId <arg>
6568
```
6669
### Examples
6770
- Inspect a string:
6871
```
69-
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" --infoTypes PHONE_NUMBER EMAIL_ADDRESS
72+
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" -infoTypes PHONE_NUMBER EMAIL_ADDRESS
7073
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" -customDictionaries me@somedomain.com -customRegexes "\(\d{3}\) \d{3}-\d{4}"
7174
```
7275
- Inspect a local file (text / image):
7376
```
74-
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.txt --infoTypes PHONE_NUMBER EMAIL_ADDRESS
75-
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.png --infoTypes PHONE_NUMBER EMAIL_ADDRESS
77+
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.txt -infoTypes PHONE_NUMBER EMAIL_ADDRESS
78+
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f src/test/resources/test.png -infoTypes PHONE_NUMBER EMAIL_ADDRESS
7679
```
7780
- Inspect a file on Google Cloud Storage:
7881
```
79-
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -gcs -bucketName my-bucket -fileName my-file.txt --infoTypes PHONE_NUMBER EMAIL_ADDRESS
82+
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -gcs -bucketName my-bucket -fileName my-file.txt -infoTypes PHONE_NUMBER EMAIL_ADDRESS
8083
```
8184
- Inspect a Google Cloud Datastore kind:
8285
```
83-
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -ds -kind my-kind --infoTypes PHONE_NUMBER EMAIL_ADDRESS
86+
java -cp dlp/target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -ds -kind my-kind -infoTypes PHONE_NUMBER EMAIL_ADDRESS
8487
```
8588

8689
## Automatic redaction of sensitive data from images

dlp/src/main/java/com/example/dlp/DeIdentification.java

+75-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import com.google.privacy.dlp.v2.RecordTransformations;
4343
import com.google.privacy.dlp.v2.ReidentifyContentRequest;
4444
import com.google.privacy.dlp.v2.ReidentifyContentResponse;
45+
import com.google.privacy.dlp.v2.ReplaceWithInfoTypeConfig;
4546
import com.google.privacy.dlp.v2.Table;
4647
import com.google.privacy.dlp.v2.Value;
4748
import com.google.protobuf.ByteString;
@@ -71,6 +72,71 @@
7172

7273
public class DeIdentification {
7374

75+
// [START dlp_deidentify_replace_with_info_type]
76+
/**
77+
* Deidentify a string by replacing sensitive information with its info type using the DLP API.
78+
*
79+
* @param string The string to deidentify.
80+
* @param projectId ID of Google Cloud project to run the API under.
81+
*/
82+
private static void deIdentifyReplaceWithInfoType(
83+
String string,
84+
List<InfoType> infoTypes,
85+
String projectId) {
86+
87+
// instantiate a client
88+
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
89+
90+
ContentItem contentItem = ContentItem.newBuilder().setValue(string).build();
91+
92+
// Create the deidentification transformation configuration
93+
PrimitiveTransformation primitiveTransformation =
94+
PrimitiveTransformation.newBuilder()
95+
.setReplaceWithInfoTypeConfig(ReplaceWithInfoTypeConfig.getDefaultInstance())
96+
.build();
97+
98+
InfoTypeTransformation infoTypeTransformationObject =
99+
InfoTypeTransformation.newBuilder()
100+
.setPrimitiveTransformation(primitiveTransformation)
101+
.build();
102+
103+
InfoTypeTransformations infoTypeTransformationArray =
104+
InfoTypeTransformations.newBuilder()
105+
.addTransformations(infoTypeTransformationObject)
106+
.build();
107+
108+
InspectConfig inspectConfig =
109+
InspectConfig.newBuilder()
110+
.addAllInfoTypes(infoTypes)
111+
.build();
112+
113+
DeidentifyConfig deidentifyConfig =
114+
DeidentifyConfig.newBuilder()
115+
.setInfoTypeTransformations(infoTypeTransformationArray)
116+
.build();
117+
118+
// Create the deidentification request object
119+
DeidentifyContentRequest request =
120+
DeidentifyContentRequest.newBuilder()
121+
.setParent(ProjectName.of(projectId).toString())
122+
.setInspectConfig(inspectConfig)
123+
.setDeidentifyConfig(deidentifyConfig)
124+
.setItem(contentItem)
125+
.build();
126+
127+
// Execute the deidentification request
128+
DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request);
129+
130+
// Print the redacted input value
131+
// e.g. "My SSN is 123456789" --> "My SSN is [US_SOCIAL_SECURITY_NUMBER]"
132+
String result = response.getItem().getValue();
133+
System.out.println(result);
134+
} catch (Exception e) {
135+
System.out.println("Error in deIdentifyReplaceWithInfoType: " + e.getMessage());
136+
}
137+
}
138+
// [END dlp_deidentify_replace_with_info_type]
139+
74140
// [START dlp_deidentify_masking]
75141
/**
76142
* Deidentify a string by masking sensitive information with a character using the DLP API.
@@ -512,6 +578,10 @@ public static void main(String[] args) throws Exception {
512578
OptionGroup optionsGroup = new OptionGroup();
513579
optionsGroup.setRequired(true);
514580

581+
Option deidentifyReplaceWithInfoTypeOption =
582+
new Option("it", "info_type_replace", true, "Deidentify by replacing with info type.");
583+
optionsGroup.addOption(deidentifyReplaceWithInfoTypeOption);
584+
515585
Option deidentifyMaskingOption =
516586
new Option("m", "mask", true, "Deidentify with character masking.");
517587
optionsGroup.addOption(deidentifyMaskingOption);
@@ -606,7 +676,11 @@ public static void main(String[] args) throws Exception {
606676
}
607677
}
608678

609-
if (cmd.hasOption("m")) {
679+
if (cmd.hasOption("it")) {
680+
// replace with info type
681+
String val = cmd.getOptionValue(deidentifyReplaceWithInfoTypeOption.getOpt());
682+
deIdentifyReplaceWithInfoType(val, infoTypesList, projectId);
683+
} else if (cmd.hasOption("m")) {
610684
// deidentification with character masking
611685
int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0"));
612686
char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0);

dlp/src/test/java/com/example/dlp/DeIdentificationIT.java

+12
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ public void setUp() {
5757
assertNotNull(System.getenv("DLP_DEID_KEY_NAME"));
5858
}
5959

60+
@Test
61+
public void testDeidReplaceWithInfoType() throws Exception {
62+
String text = "\"My SSN is 372819127\"";
63+
DeIdentification.main(
64+
new String[] {
65+
"-it", text,
66+
"-infoTypes", "US_SOCIAL_SECURITY_NUMBER"
67+
});
68+
String output = bout.toString();
69+
assertThat(output, containsString("My SSN is [US_SOCIAL_SECURITY_NUMBER]"));
70+
}
71+
6072
@Test
6173
public void testDeidStringMasksCharacters() throws Exception {
6274
String text = "\"My SSN is 372819127\"";

0 commit comments

Comments
 (0)