Skip to content

Commit 00d3d16

Browse files
committed
SK-1758 Add detect support in Python SDK v2
- SK-2042 Add public interfaces for DeIdentify and ReIdentify text - SK-1758 Add detect controller
1 parent 2818d31 commit 00d3d16

21 files changed

+458
-6
lines changed

skyflow/client/skyflow.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from skyflow.vault.client.client import VaultClient
99
from skyflow.vault.controller import Vault
1010
from skyflow.vault.controller import Connection
11+
from skyflow.vault.controller import Detect
1112

1213
class Skyflow:
1314
def __init__(self, builder):
@@ -65,11 +66,15 @@ def update_log_level(self, log_level):
6566

6667
def vault(self, vault_id = None) -> Vault:
6768
vault_config = self.__builder.get_vault_config(vault_id)
68-
return vault_config.get("controller")
69+
return vault_config.get("vault_controller")
6970

7071
def connection(self, connection_id = None) -> Connection:
7172
connection_config = self.__builder.get_connection_config(connection_id)
7273
return connection_config.get("controller")
74+
75+
def detect(self, vault_id = None) -> Detect:
76+
vault_config = self.__builder.get_vault_config(vault_id)
77+
return vault_config.get("detect_controller")
7378

7479
class Builder:
7580
def __init__(self):
@@ -182,7 +187,8 @@ def __add_vault_config(self, config):
182187
vault_client = VaultClient(config)
183188
self.__vault_configs[vault_id] = {
184189
"vault_client": vault_client,
185-
"controller": Vault(vault_client)
190+
"vault_controller": Vault(vault_client),
191+
"detect_controller": Detect(vault_client)
186192
}
187193
log_info(SkyflowMessages.Info.VAULT_CONTROLLER_INITIALIZED.value.format(config.get("vault_id")), self.__logger)
188194

skyflow/utils/_skyflow_messages.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,59 @@ class Error(Enum):
147147
FILE_INVALID_JSON = f"{error_prefix} Initialization failed. File at {{}} is not in valid JSON format. Verify the file contents."
148148
INVALID_JSON_FORMAT_IN_CREDENTIALS_ENV = f"{error_prefix} Validation error. Invalid JSON format in SKYFLOW_CREDENTIALS environment variable."
149149

150+
INVALID_TEXT_IN_DEIDENTIFY: f"{error_prefix} Validation error. The text field is required and must be a non-empty string. Specify a valid text."
151+
INVALID_ENTITIES_IN_DEIDENTIFY: f"{error_prefix} Validation error. The entities field must be an array of DetectEntities enums. Specify a valid entities."
152+
INVALID_ALLOW_REGEX_LIST: f"{error_prefix} Validation error. The allowRegexList field must be an array of strings. Specify a valid allowRegexList."
153+
INVALID_RESTRICT_REGEX_LIST: f"{error_prefix} Validation error. The restrictRegexList field must be an array of strings. Specify a valid restrictRegexList."
154+
INVALID_TOKEN_FORMAT: f"{error_prefix} Validation error. The tokenFormat key must be an instance of TokenFormat. Specify a valid token format."
155+
INVALID_TRANSFORMATIONS: f"{error_prefix} Validation error. The transformations key must be an instance of Transformations. Specify a valid transformations."
156+
157+
INVALID_TEXT_IN_REIDENTIFY: f"{error_prefix} Validation error. The text field is required and must be a non-empty string. Specify a valid text."
158+
INVALID_REDACTED_ENTITIES_IN_REIDENTIFY: f"{error_prefix} Validation error. The redactedEntities field must be an array of DetectEntities enums. Specify a valid redactedEntities."
159+
INVALID_MASKED_ENTITIES_IN_REIDENTIFY: f"{error_prefix} Validation error. The maskedEntities field must be an array of DetectEntities enums. Specify a valid maskedEntities."
160+
INVALID_PLAIN_TEXT_ENTITIES_IN_REIDENTIFY: f"{error_prefix} Validation error. The plainTextEntities field must be an array of DetectEntities enums. Specify a valid plainTextEntities."
161+
162+
INVALID_DEIDENTIFY_FILE_REQUEST: f"{error_prefix} Validation error. Invalid deidentify file request. Specify a valid deidentify file request."
163+
EMPTY_FILE_OBJECT: f"{error_prefix} Validation error. File object cannot be empty. Specify a valid file object."
164+
INVALID_FILE_FORMAT: f"{error_prefix} Validation error. Invalid file format. Specify a valid file format."
165+
MISSING_FILE_SOURCE: f"{error_prefix} Validation error. Provide exactly one of filePath, base64, or fileObject."
166+
INVALID_FILE_OBJECT: f"{error_prefix} Validation error. Invalid file object. Specify a valid file object."
167+
INVALID_BASE64_STRING: f"{error_prefix} Validation error. Invalid base64 string. Specify a valid base64 string."
168+
INVALID_DEIDENTIFY_FILE_OPTIONS: f"{error_prefix} Validation error. Invalid deidentify file options. Specify a valid deidentify file options."
169+
INVALID_ENTITIES: f"{error_prefix} Validation error. Invalid entities. Specify valid entities as string array."
170+
EMPTY_ENTITIES: f"{error_prefix} Validation error. Entities cannot be empty. Specify valid entities."
171+
EMPTY_ALLOW_REGEX_LIST: f"{error_prefix} Validation error. Allow regex list cannot be empty. Specify valid allow regex list."
172+
INVALID_ALLOW_REGEX: f"{error_prefix} Validation error. Invalid allow regex. Specify valid allow regex at index {{}}."
173+
EMPTY_RESTRICT_REGEX_LIST: f"{error_prefix} Validation error. Restrict regex list cannot be empty. Specify valid restrict regex list."
174+
INVALID_RESTRICT_REGEX: f"{error_prefix} Validation error. Invalid restrict regex. Specify valid restrict regex at index {{}}."
175+
INVALID_OUTPUT_PROCESSED_IMAGE: f"{error_prefix} Validation error. Invalid output processed image. Specify valid output processed image as string."
176+
INVALID_OUTPUT_OCR_TEXT: f"{error_prefix} Validation error. Invalid output ocr text. Specify valid output ocr text as string."
177+
INVALID_MASKING_METHOD: f"{error_prefix} Validation error. Invalid masking method. Specify valid masking method as string."
178+
INVALID_PIXEL_DENSITY: f"{error_prefix} Validation error. Invalid pixel density. Specify valid pixel density as string."
179+
INVALID_OUTPUT_TRANSCRIPTION: f"{error_prefix} Validation error. Invalid output transcription. Specify valid output transcription as string."
180+
INVALID_OUTPUT_PROCESSED_AUDIO: f"{error_prefix} Validation error. Invalid output processed audio. Specify valid output processed audio as string."
181+
INVALID_MAX_RESOLUTION: f"{error_prefix} Validation error. Invalid max resolution. Specify valid max resolution as string."
182+
INVALID_BLEEP: f"{error_prefix} Validation error. Invalid bleep. Specify valid bleep as object."
183+
INVALID_FILE_OR_ENCODED_FILE: f"{error_prefix} . Error while decoding base64 and saving file"
184+
INVALID_FILE_TYPE : f"{error_prefix} Validation error. Invalid file type. Specify a valid file type."
185+
INVALID_FILE_NAME: f"{error_prefix} Validation error. Invalid file name. Specify a valid file name."
186+
FILE_READ_ERROR: f"{error_prefix} Validation error. Unable to read file. Verify the file path."
187+
INVALID_BASE64_HEADER: f"{error_prefix} Validation error. Invalid base64 header. Specify a valid base64 header."
188+
INVALID_WAIT_TIME: f"{error_prefix} Validation error. Invalid wait time. Specify a valid wait time as number and should not be greater than 20 secs."
189+
INVALID_OUTPUT_DIRECTORY: f"{error_prefix} Validation error. Invalid output directory. Specify a valid output directory as string."
190+
INVALID_OUTPUT_DIRECTORY_PATH: f"{error_prefix} Validation error. Invalid output directory path. Specify a valid output directory path as string."
191+
EMPTY_RUN_ID: f"{error_prefix} Validation error. Run id cannot be empty. Specify a valid run id."
192+
INVALID_RUN_ID: f"{error_prefix} Validation error. Invalid run id. Specify a valid run id as string."
193+
INTERNAL_SERVER_ERROR: f"{error_prefix}. Internal server error. {{}}."
194+
150195
class Info(Enum):
151196
CLIENT_INITIALIZED = f"{INFO}: [{error_prefix}] Initialized skyflow client."
152197
VALIDATING_VAULT_CONFIG = f"{INFO}: [{error_prefix}] Validating vault config."
153198
VALIDATING_CONNECTION_CONFIG = f"{INFO}: [{error_prefix}] Validating connection config."
154199
UNABLE_TO_GENERATE_SDK_METRIC = f"{INFO}: [{error_prefix}] Unable to generate {{}} metric."
155200
VAULT_CONTROLLER_INITIALIZED = f"{INFO}: [{error_prefix}] Initialized vault controller with vault ID {{}}."
156201
CONNECTION_CONTROLLER_INITIALIZED = f"{INFO}: [{error_prefix}] Initialized connection controller with connection ID {{}}."
202+
DETECT_CONTROLLER_INITIALIZED = f"{INFO}: [{error_prefix}] Initialized detect controller with vault ID {{}}."
157203
VAULT_CONFIG_EXISTS = f"{INFO}: [{error_prefix}] Vault config with vault ID {{}} already exists."
158204
VAULT_CONFIG_DOES_NOT_EXIST = f"{INFO}: [{error_prefix}] Vault config with vault ID {{}} doesn't exist."
159205
CONNECTION_CONFIG_EXISTS = f"{INFO}: [{error_prefix}] Connection config with connection ID {{}} already exists."
@@ -166,10 +212,9 @@ class Info(Enum):
166212
GET_BEARER_TOKEN_SUCCESS = f"{INFO}: [{error_prefix}] Bearer token generated."
167213
GET_SIGNED_DATA_TOKENS_TRIGGERED = f"{INFO}: [{error_prefix}] generate_signed_data_tokens method triggered."
168214
GET_SIGNED_DATA_TOKEN_SUCCESS = f"{INFO}: [{error_prefix}] Signed data tokens generated."
169-
GENERATE_BEARER_TOKEN_FROM_CREDENTIALS_STRING_TRIGGERED = f"{INFO}: [{error_prefix}] generate bearer_token_from_credential_string method triggered ."
215+
GENERATE_BEARER_TOKEN_FROM_CREDENTIALS_STRING_TRIGGERED = f"{INFO}: [{error_prefix}] generate bearer_token_from_credential_string method triggered."
170216
REUSE_BEARER_TOKEN = f"{INFO}: [{error_prefix}] Reusing bearer token."
171217

172-
173218
VALIDATE_INSERT_REQUEST = f"{INFO}: [{error_prefix}] Validating insert request."
174219
INSERT_TRIGGERED = f"{INFO}: [{error_prefix}] Insert method triggered."
175220
INSERT_SUCCESS = f"{INFO}: [{error_prefix}] Data inserted."
@@ -210,6 +255,28 @@ class Info(Enum):
210255
INVOKE_CONNECTION_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Invoke connection request resolved."
211256
INVOKE_CONNECTION_SUCCESS = f"{INFO}: [{error_prefix}] Invoke Connection Success."
212257

258+
DEIDENTIFY_TEXT_TRIGGERED = f"{INFO}: [{error_prefix}] Deidentify text method triggered."
259+
VALIDATING_DEIDENTIFY_TEXT_INPUT = f"{INFO}: [{error_prefix}] Validating deidentify text input."
260+
DEIDENTIFY_TEXT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Deidentify text request is resolved."
261+
DEIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] data deidentified."
262+
263+
REIDENTIFY_TEXT_TRIGGERED = f"{INFO}: [{error_prefix}] Reidentify text method triggered."
264+
VALIDATING_REIDENTIFY_TEXT_INPUT = f"{INFO}: [{error_prefix}] Validating reidentify text input."
265+
REIDENTIFY_TEXT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Reidentify text request is resolved."
266+
REIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] data reidentified."
267+
268+
DEIDENTIFY_FILE_TRIGGERED = f"{INFO}: [{error_prefix}] Deidentify file triggered."
269+
VALIDATING_DETECT_FILE_INPUT = f"{INFO}: [{error_prefix}] Validating deidentify file input."
270+
DEIDENTIFY_FILE_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Deidentify file request is resolved."
271+
DEIDENTIFY_FILE_SUCCESS = f"{INFO}: [{error_prefix}] file deidentified."
272+
273+
GET_DETECT_RUN_TRIGGERED = f"{INFO}: [{error_prefix}] Get detect run triggered."
274+
VALIDATING_GET_DETECT_RUN_INPUT = f"{INFO}: [{error_prefix}] Validating get detect run input."
275+
GET_DETECT_RUN_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Get detect run request is resolved."
276+
GET_DETECT_RUN_SUCCESS = f"{INFO}: [{error_prefix}] get detect run success." # will see for a better message
277+
278+
DETECT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Detect request is resolved." # Whats this for?
279+
213280
class ErrorLogs(Enum):
214281
VAULTID_IS_REQUIRED = f"{ERROR}: [{error_prefix}] Invalid vault config. Vault ID is required."
215282
EMPTY_VAULTID = f"{ERROR}: [{error_prefix}] Invalid vault config. Vault ID can not be empty."
@@ -280,6 +347,13 @@ class ErrorLogs(Enum):
280347
QUERY_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Query request resulted in failure."
281348
GET_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Get request resulted in failure."
282349
INVOKE_CONNECTION_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Invoke connection request resulted in failure."
350+
351+
EMPTY_RUN_ID = f"{ERROR}: [{error_prefix}] Validation error. Run id cannot be empty. Specify a valid run id."
352+
INVALID_RUN_ID = f"{ERROR}: [{error_prefix}] Validation error. Invalid run id. Specify a valid run id as string."
353+
DEIDENTIFY_FILE_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Deidentify file resulted in failure."
354+
DETECT_RUN_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Detect get run resulted in failure."
355+
DEIDENTIFY_TEXT_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Deidentify text resulted in failure."
356+
REIDENTIFY_TEXT_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Reidentify text resulted in failure."
283357

284358
class Interface(Enum):
285359
INSERT = "INSERT"

skyflow/utils/_utils.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
from urllib.parse import quote
1414
from skyflow.error import SkyflowError
1515
from skyflow.generated.rest import V1UpdateRecordResponse, V1BulkDeleteRecordResponse, \
16-
V1DetokenizeResponse, V1TokenizeResponse, V1GetQueryResponse, V1BulkGetRecordResponse
16+
V1DetokenizeResponse, V1TokenizeResponse, V1GetQueryResponse, V1BulkGetRecordResponse, \
17+
DeidentifyStringResponse, ReidentifyStringResponse
1718
from skyflow.generated.rest.core.http_response import HttpResponse
1819
from skyflow.utils.logger import log_error_log
20+
from skyflow.vault.detect import DeidentifyTextResponse, ReidentifyTextResponse
1921
from . import SkyflowMessages, SDK_VERSION
2022
from .constants import PROTOCOL
2123
from .enums import Env, ContentType, EnvUrls
@@ -364,6 +366,16 @@ def parse_invoke_connection_response(api_response: requests.Response):
364366
message = SkyflowMessages.Error.RESPONSE_NOT_JSON.value.format(content)
365367
raise SkyflowError(message, status_code)
366368

369+
def parse_deidentify_text_response(api_response: DeidentifyStringResponse):
370+
processed_text = api_response.processed_text
371+
entities = api_response.entities
372+
word_count = api_response.word_count
373+
character_count = api_response.character_count
374+
return DeidentifyTextResponse(processed_text, entities, word_count, character_count)
375+
376+
def parse_reidentify_text_response(api_response: ReidentifyStringResponse):
377+
return ReidentifyTextResponse(api_response.processed_text)
378+
367379
def log_and_reject_error(description, status_code, request_id, http_status=None, grpc_code=None, details=None, logger = None):
368380
raise SkyflowError(description, status_code, request_id, grpc_code, http_status, details)
369381

skyflow/utils/enums/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from .env import Env, EnvUrls
22
from .log_level import LogLevel
33
from .content_types import ContentType
4+
from .detect_entities import DetectEntities
45
from .token_mode import TokenMode
6+
from .token_type import TokenType
57
from .request_method import RequestMethod
68
from .redaction_type import RedactionType
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from enum import Enum
2+
3+
class DetectEntities(Enum):
4+
ACCOUNT_NUMBER = "account_number"
5+
AGE = "age"
6+
ALL = "all"
7+
BANK_ACCOUNT = "bank_account"
8+
BLOOD_TYPE = "blood_type"
9+
CONDITION = "condition"
10+
CORPORATE_ACTION = "corporate_action"
11+
CREDIT_CARD = "credit_card"
12+
CREDIT_CARD_EXPIRATION = "credit_card_expiration"
13+
CVV = "cvv"
14+
DATE = "date"
15+
DATE_INTERVAL = "date_interval"
16+
DOB = "dob"
17+
DOSE = "dose"
18+
DRIVER_LICENSE = "driver_license"
19+
DRUG = "drug"
20+
DURATION = "duration"
21+
EMAIL_ADDRESS = "email_address"
22+
EVENT = "event"
23+
FILENAME = "filename"
24+
FINANCIAL_METRIC = "financial_metric"
25+
GENDER_SEXUALITY = "gender_sexuality"
26+
HEALTHCARE_NUMBER = "healthcare_number"
27+
INJURY = "injury"
28+
IP_ADDRESS = "ip_address"
29+
LANGUAGE = "language"
30+
LOCATION = "location"
31+
LOCATION_ADDRESS = "location_address"
32+
LOCATION_ADDRESS_STREET = "location_address_street"
33+
LOCATION_CITY = "location_city"
34+
LOCATION_COORDINATE = "location_coordinate"
35+
LOCATION_COUNTRY = "location_country"
36+
LOCATION_STATE = "location_state"
37+
LOCATION_ZIP = "location_zip"
38+
MARITAL_STATUS = "marital_status"
39+
MEDICAL_CODE = "medical_code"
40+
MEDICAL_PROCESS = "medical_process"
41+
MONEY = "money"
42+
NAME = "name"
43+
NAME_FAMILY = "name_family"
44+
NAME_GIVEN = "name_given"
45+
NAME_MEDICAL_PROFESSIONAL = "name_medical_professional"
46+
NUMERICAL_PII = "numerical_pii"
47+
OCCUPATION = "occupation"
48+
ORGANIZATION = "organization"
49+
ORGANIZATION_MEDICAL_FACILITY = "organization_medical_facility"
50+
ORIGIN = "origin"
51+
PASSPORT_NUMBER = "passport_number"
52+
PASSWORD = "password"
53+
PHONE_NUMBER = "phone_number"
54+
PHYSICAL_ATTRIBUTE = "physical_attribute"
55+
POLITICAL_AFFILIATION = "political_affiliation"
56+
PRODUCT = "product"
57+
RELIGION = "religion"
58+
ROUTING_NUMBER = "routing_number"
59+
SSN = "ssn"
60+
STATISTICS = "statistics"
61+
TIME = "time"
62+
TREND = "trend"
63+
URL = "url"
64+
USERNAME = "username"
65+
VEHICLE_ID = "vehicle_id"
66+
ZODIAC_SIGN = "zodiac_sign"

skyflow/utils/enums/token_type.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from enum import Enum
2+
3+
class TokenType(Enum):
4+
VAULT_TOKEN = "vault_token"
5+
ENTITY_UNIQUE_COUNTER = "entity_unq_counter"
6+
ENTITY_ONLY = "entity_only"

skyflow/utils/validations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,6 @@
1313
validate_detokenize_request,
1414
validate_tokenize_request,
1515
validate_invoke_connection_params,
16+
validate_deidentify_text_request,
17+
validate_reidentify_text_request
1618
)

0 commit comments

Comments
 (0)