Skip to content

Commit

Permalink
fix: correct dataset name, use env var for project (#2621)
Browse files Browse the repository at this point in the history
* fix: correct dataset name, use env var for project

* Add uuids to tests

* add uuids and fixtures for bq

* Add logic to delete job

* ran black

* Run black with line length

* Add utf encoding for python 2 tests

* Add skips for now

* Ran black

* Remove skips, adjust job tests

* fix lint and skips

* Cleanup commented things

Co-authored-by: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com>
  • Loading branch information
leahecole and kurtisvg authored Feb 10, 2020
1 parent d26b380 commit d87e01d
Show file tree
Hide file tree
Showing 16 changed files with 393 additions and 130 deletions.
51 changes: 38 additions & 13 deletions dlp/deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def deidentify_with_mask(
parent = dlp.project_path(project)

# Construct inspect configuration dictionary
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
inspect_config = {
"info_types": [{"name": info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
Expand Down Expand Up @@ -131,17 +133,24 @@ def deidentify_with_fpe(
# Construct FPE configuration dictionary
crypto_replace_ffx_fpe_config = {
"crypto_key": {
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
"kms_wrapped": {
"wrapped_key": wrapped_key,
"crypto_key_name": key_name,
}
},
"common_alphabet": alphabet,
}

# Add surrogate type
if surrogate_type:
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type}
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {
"name": surrogate_type
}

# Construct inspect configuration dictionary
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
inspect_config = {
"info_types": [{"name": info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
Expand Down Expand Up @@ -176,7 +185,12 @@ def deidentify_with_fpe(

# [START dlp_reidentify_fpe]
def reidentify_with_fpe(
project, string, alphabet=None, surrogate_type=None, key_name=None, wrapped_key=None
project,
string,
alphabet=None,
surrogate_type=None,
key_name=None,
wrapped_key=None,
):
"""Uses the Data Loss Prevention API to reidentify sensitive data in a
string that was encrypted by Format Preserving Encryption (FPE).
Expand Down Expand Up @@ -333,7 +347,11 @@ def map_data(value):
try:
date = datetime.strptime(value, "%m/%d/%Y")
return {
"date_value": {"year": date.year, "month": date.month, "day": date.day}
"date_value": {
"year": date.year,
"month": date.month,
"day": date.day,
}
}
except ValueError:
return {"string_value": value}
Expand Down Expand Up @@ -426,7 +444,8 @@ def write_data(data):

mask_parser = subparsers.add_parser(
"deid_mask",
help="Deidentify sensitive data in a string by masking it with a " "character.",
help="Deidentify sensitive data in a string by masking it with a "
"character.",
)
mask_parser.add_argument(
"--info_types",
Expand All @@ -438,7 +457,8 @@ def write_data(data):
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
)
mask_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
mask_parser.add_argument("item", help="The string to deidentify.")
mask_parser.add_argument(
Expand Down Expand Up @@ -471,11 +491,13 @@ def write_data(data):
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
)
fpe_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
fpe_parser.add_argument(
"item",
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
help="The string to deidentify. "
"Example: string = 'My SSN is 372819127'",
)
fpe_parser.add_argument(
"key_name",
Expand Down Expand Up @@ -513,11 +535,13 @@ def write_data(data):
"Encryption (FPE).",
)
reid_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
reid_parser.add_argument(
"item",
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
help="The string to deidentify. "
"Example: string = 'My SSN is 372819127'",
)
reid_parser.add_argument(
"surrogate_type",
Expand Down Expand Up @@ -553,7 +577,8 @@ def write_data(data):
help="Deidentify dates in a CSV file by pseudorandomly shifting them.",
)
date_shift_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
date_shift_parser.add_argument(
"input_csv_file",
Expand Down
5 changes: 4 additions & 1 deletion dlp/deid_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys):

def test_deidentify_with_mask_masking_number_specified(capsys):
deid.deidentify_with_mask(
GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7
GCLOUD_PROJECT,
HARMFUL_STRING,
["US_SOCIAL_SECURITY_NUMBER"],
number_to_mask=7,
)

out, _ = capsys.readouterr()
Expand Down
17 changes: 12 additions & 5 deletions dlp/inspect_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@ def inspect_table(
headers = [{"name": val} for val in data["header"]]
rows = []
for row in data["rows"]:
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
rows.append(
{"values": [{"string_value": cell_val} for cell_val in row]}
)

table = {}
table["headers"] = headers
Expand Down Expand Up @@ -978,7 +980,9 @@ def callback(message):
)

parser_file = subparsers.add_parser("file", help="Inspect a local file.")
parser_file.add_argument("filename", help="The path to the file to inspect.")
parser_file.add_argument(
"filename", help="The path to the file to inspect."
)
parser_file.add_argument(
"--project",
help="The Google Cloud project id to use as a parent resource.",
Expand Down Expand Up @@ -1121,10 +1125,12 @@ def callback(message):
"datastore", help="Inspect files on Google Datastore."
)
parser_datastore.add_argument(
"datastore_project", help="The Google Cloud project id of the target Datastore."
"datastore_project",
help="The Google Cloud project id of the target Datastore.",
)
parser_datastore.add_argument(
"kind", help='The kind of the Datastore entity to inspect, e.g. "Person".'
"kind",
help='The kind of the Datastore entity to inspect, e.g. "Person".',
)
parser_datastore.add_argument(
"topic_id",
Expand Down Expand Up @@ -1200,7 +1206,8 @@ def callback(message):
"bigquery", help="Inspect files on Google BigQuery."
)
parser_bigquery.add_argument(
"bigquery_project", help="The Google Cloud project id of the target table."
"bigquery_project",
help="The Google Cloud project id of the target table.",
)
parser_bigquery.add_argument(
"dataset_id", help="The ID of the target BigQuery dataset."
Expand Down
37 changes: 26 additions & 11 deletions dlp/inspect_content_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import os
import uuid

from gcp_devrel.testing import eventually_consistent
from gcp_devrel.testing.flaky import flaky
Expand All @@ -26,16 +27,18 @@
import pytest
import inspect_content

UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]

GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test"
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING
RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources")
RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"]
TOPIC_ID = "dlp-test"
SUBSCRIPTION_ID = "dlp-test-subscription"
TOPIC_ID = "dlp-test" + UNIQUE_STRING
SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING
DATASTORE_KIND = "DLP test kind"
BIGQUERY_DATASET_ID = "dlp_test_dataset"
BIGQUERY_TABLE_ID = "dlp_test_table"
DATASTORE_NAME = "DLP test object" + UNIQUE_STRING
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -91,7 +94,9 @@ def subscription_id(topic_id):
# Subscribes to a topic.
subscriber = google.cloud.pubsub.SubscriberClient()
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
subscription_path = subscriber.subscription_path(
GCLOUD_PROJECT, SUBSCRIPTION_ID
)
try:
subscriber.create_subscription(subscription_path, topic_path)
except google.api_core.exceptions.AlreadyExists:
Expand All @@ -108,7 +113,7 @@ def datastore_project():
datastore_client = google.cloud.datastore.Client()

kind = DATASTORE_KIND
name = "DLP test object"
name = DATASTORE_NAME
key = datastore_client.key(kind, name)
item = google.cloud.datastore.Entity(key=key)
item["payload"] = "My name is Gary Smith and my email is gary@example.com"
Expand Down Expand Up @@ -159,7 +164,10 @@ def test_inspect_string(capsys):
test_string = "My name is Gary Smith and my email is gary@example.com"

inspect_content.inspect_string(
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
GCLOUD_PROJECT,
test_string,
["FIRST_NAME", "EMAIL_ADDRESS"],
include_quote=True,
)

out, _ = capsys.readouterr()
Expand Down Expand Up @@ -211,7 +219,10 @@ def test_inspect_string_no_results(capsys):
test_string = "Nothing to see here"

inspect_content.inspect_string(
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
GCLOUD_PROJECT,
test_string,
["FIRST_NAME", "EMAIL_ADDRESS"],
include_quote=True,
)

out, _ = capsys.readouterr()
Expand Down Expand Up @@ -320,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types(


@flaky
def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
def test_inspect_gcs_file_no_results(
bucket, topic_id, subscription_id, capsys
):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
Expand Down Expand Up @@ -367,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):


@flaky
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
def test_inspect_datastore(
datastore_project, topic_id, subscription_id, capsys
):
@eventually_consistent.call
def _():
inspect_content.inspect_datastore(
Expand Down
19 changes: 14 additions & 5 deletions dlp/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def list_dlp_jobs(project, filter_string=None, job_type=None):

# Job type dictionary
job_type_to_int = {
"DLP_JOB_TYPE_UNSPECIFIED": google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
"DLP_JOB_TYPE_UNSPECIFIED":
google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
"INSPECT_JOB": google.cloud.dlp.enums.DlpJobType.INSPECT_JOB,
"RISK_ANALYSIS_JOB": google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB,
}
Expand Down Expand Up @@ -122,7 +123,8 @@ def delete_dlp_job(project, job_name):

list_parser = subparsers.add_parser(
"list",
help="List Data Loss Prevention API jobs corresponding to a given " "filter.",
help="List Data Loss Prevention API jobs corresponding to a given "
"filter.",
)
list_parser.add_argument(
"project", help="The project id to use as a parent resource."
Expand All @@ -135,7 +137,11 @@ def delete_dlp_job(project, job_name):
list_parser.add_argument(
"-t",
"--type",
choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB"],
choices=[
"DLP_JOB_TYPE_UNSPECIFIED",
"INSPECT_JOB",
"RISK_ANALYSIS_JOB",
],
help='The type of job. API defaults to "INSPECT"',
)

Expand All @@ -147,12 +153,15 @@ def delete_dlp_job(project, job_name):
)
delete_parser.add_argument(
"job_name",
help="The name of the DlpJob resource to be deleted. " "Example: X-#####",
help="The name of the DlpJob resource to be deleted. "
"Example: X-#####",
)

args = parser.parse_args()

if args.content == "list":
list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type)
list_dlp_jobs(
args.project, filter_string=args.filter, job_type=args.type
)
elif args.content == "delete":
delete_dlp_job(args.project, args.job_name)
Loading

0 comments on commit d87e01d

Please sign in to comment.