Skip to content

Commit 45795d3

Browse files
committed
SK-1758 Add detect support in Python SDK
- Add deidentify and reidentify text support in Python SDK
1 parent 00d3d16 commit 45795d3

File tree

8 files changed

+116
-84
lines changed

8 files changed

+116
-84
lines changed

skyflow/utils/_skyflow_messages.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,22 +258,22 @@ class Info(Enum):
258258
DEIDENTIFY_TEXT_TRIGGERED = f"{INFO}: [{error_prefix}] Deidentify text method triggered."
259259
VALIDATING_DEIDENTIFY_TEXT_INPUT = f"{INFO}: [{error_prefix}] Validating deidentify text input."
260260
DEIDENTIFY_TEXT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Deidentify text request is resolved."
261-
DEIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] data deidentified."
261+
DEIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] Data deidentified."
262262

263263
REIDENTIFY_TEXT_TRIGGERED = f"{INFO}: [{error_prefix}] Reidentify text method triggered."
264264
VALIDATING_REIDENTIFY_TEXT_INPUT = f"{INFO}: [{error_prefix}] Validating reidentify text input."
265265
REIDENTIFY_TEXT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Reidentify text request is resolved."
266-
REIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] data reidentified."
266+
REIDENTIFY_TEXT_SUCCESS = f"{INFO}: [{error_prefix}] Data reidentified."
267267

268268
DEIDENTIFY_FILE_TRIGGERED = f"{INFO}: [{error_prefix}] Deidentify file triggered."
269269
VALIDATING_DETECT_FILE_INPUT = f"{INFO}: [{error_prefix}] Validating deidentify file input."
270270
DEIDENTIFY_FILE_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Deidentify file request is resolved."
271-
DEIDENTIFY_FILE_SUCCESS = f"{INFO}: [{error_prefix}] file deidentified."
271+
DEIDENTIFY_FILE_SUCCESS = f"{INFO}: [{error_prefix}] File deidentified."
272272

273273
GET_DETECT_RUN_TRIGGERED = f"{INFO}: [{error_prefix}] Get detect run triggered."
274274
VALIDATING_GET_DETECT_RUN_INPUT = f"{INFO}: [{error_prefix}] Validating get detect run input."
275275
GET_DETECT_RUN_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Get detect run request is resolved."
276-
GET_DETECT_RUN_SUCCESS = f"{INFO}: [{error_prefix}] get detect run success." # will see for a better message
276+
GET_DETECT_RUN_SUCCESS = f"{INFO}: [{error_prefix}] Get detect run success." # will see for a better message
277277

278278
DETECT_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Detect request is resolved." # Whats this for?
279279

skyflow/utils/_utils.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from skyflow.generated.rest.core.http_response import HttpResponse
1919
from skyflow.utils.logger import log_error_log
2020
from skyflow.vault.detect import DeidentifyTextResponse, ReidentifyTextResponse
21+
from skyflow.vault.detect import EntityInfo, TextIndex
2122
from . import SkyflowMessages, SDK_VERSION
2223
from .constants import PROTOCOL
2324
from .enums import Env, ContentType, EnvUrls
@@ -84,6 +85,30 @@ def to_lowercase_keys(dict):
8485

8586
return result
8687

88+
def convert_to_entity_type(detect_entities):
89+
entity_types = None
90+
if (detect_entities is not None and len(detect_entities) != 0):
91+
entity_types = []
92+
for entity in detect_entities:
93+
entity_types.append(entity.value)
94+
return entity_types
95+
96+
def _convert_detected_entity_to_entity_info(detected_entity):
97+
return EntityInfo(
98+
token=detected_entity.token,
99+
value=detected_entity.value,
100+
text_index=TextIndex(
101+
start=detected_entity.location.start_index,
102+
end=detected_entity.location.end_index
103+
),
104+
processed_index=TextIndex(
105+
start=detected_entity.location.start_index_processed,
106+
end=detected_entity.location.end_index_processed
107+
),
108+
entity=detected_entity.entity_type,
109+
scores=detected_entity.entity_scores
110+
)
111+
87112
def construct_invoke_connection_request(request, connection_url, logger) -> PreparedRequest:
88113
url = parse_path_params(connection_url.rstrip('/'), request.path_params)
89114

@@ -367,11 +392,13 @@ def parse_invoke_connection_response(api_response: requests.Response):
367392
raise SkyflowError(message, status_code)
368393

369394
def parse_deidentify_text_response(api_response: DeidentifyStringResponse):
370-
processed_text = api_response.processed_text
371-
entities = api_response.entities
372-
word_count = api_response.word_count
373-
character_count = api_response.character_count
374-
return DeidentifyTextResponse(processed_text, entities, word_count, character_count)
395+
entities = [_convert_detected_entity_to_entity_info(entity) for entity in api_response.entities]
396+
return DeidentifyTextResponse(
397+
processed_text=api_response.processed_text,
398+
entities=entities,
399+
word_count=api_response.word_count,
400+
char_count=api_response.character_count
401+
)
375402

376403
def parse_reidentify_text_response(api_response: ReidentifyStringResponse):
377404
return ReidentifyTextResponse(api_response.processed_text)

skyflow/vault/controller/_detect.py

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import json
2+
from skyflow.generated.rest.types.token_type import TokenType
3+
from skyflow.generated.rest.types.transformations import Transformations
4+
from skyflow.generated.rest.types.transformations_shift_dates import TransformationsShiftDates
25
from skyflow.utils._skyflow_messages import SkyflowMessages
3-
from skyflow.utils._utils import get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response
6+
from skyflow.utils._utils import convert_to_entity_type, get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response
47
from skyflow.utils.constants import SKY_META_DATA_HEADER
58
from skyflow.utils.logger import log_info, log_error_log
69
from skyflow.utils.validations._validations import validate_deidentify_text_request, validate_reidentify_text_request
@@ -23,29 +26,42 @@ def __get_headers(self):
2326

2427
def ___build_deidentify_text_body(self, request: DeidentifyTextRequest) -> Dict[str, Any]:
2528
deidentify_text_body = {}
26-
entity_types = []
27-
deidentify_text_body['text'] = request.text
28-
29-
for entity in request.entities:
30-
entity_type = entity.value
31-
entity_types.append(entity_type)
29+
parsed_entity_types = convert_to_entity_type(request.entities)
3230

33-
deidentify_text_body['entity_types'] = entity_types
34-
deidentify_text_body['token_type'] = request.token_format
31+
parsed_token_type = TokenType(
32+
default = request.token_format.default,
33+
vault_token = convert_to_entity_type(request.token_format.vault_token),
34+
entity_unq_counter = convert_to_entity_type(request.token_format.entity_unique_counter),
35+
entity_only = convert_to_entity_type(request.token_format.entity_only)
36+
)
37+
parsed_transformations = None
38+
if request.transformations is not None:
39+
parsed_transformations = Transformations(
40+
shift_dates = TransformationsShiftDates(
41+
max_days = request.transformations.shift_days.max,
42+
min_days = request.transformations.shift_days.min,
43+
entity_types = convert_to_entity_type(request.transformations.shift_days.entities)
44+
)
45+
)
46+
47+
deidentify_text_body['text'] = request.text
48+
deidentify_text_body['entity_types'] = parsed_entity_types
49+
deidentify_text_body['token_type'] = parsed_token_type
3550
deidentify_text_body['allow_regex'] = request.allow_regex_list
3651
deidentify_text_body['restrict_regex'] = request.restrict_regex_list
37-
deidentify_text_body['transformations'] = request.transformations
52+
deidentify_text_body['transformations'] = parsed_transformations
53+
3854
return deidentify_text_body
3955

4056
def ___build_reidentify_text_body(self, request: ReidentifyTextRequest) -> Dict[str, Any]:
41-
format_obj = ReidentifyStringRequestFormat(
42-
redacted=request.redacted_entities,
43-
masked=request.masked_entities,
44-
plaintext=request.plain_text_entities
57+
parsed_format = ReidentifyStringRequestFormat(
58+
redacted=convert_to_entity_type(request.redacted_entities),
59+
masked=convert_to_entity_type(request.masked_entities),
60+
plaintext=convert_to_entity_type(request.plain_text_entities)
4561
)
46-
reidentify_text_body = []
62+
reidentify_text_body = {}
4763
reidentify_text_body['text'] = request.text
48-
reidentify_text_body['format'] = format_obj
64+
reidentify_text_body['format'] = parsed_format
4965
return reidentify_text_body
5066

5167
def deidentify_text(self, request: DeidentifyTextRequest) -> DeidentifyTextResponse:
@@ -59,12 +75,12 @@ def deidentify_text(self, request: DeidentifyTextRequest) -> DeidentifyTextRespo
5975
try:
6076
log_info(SkyflowMessages.Info.DEIDENTIFY_TEXT_TRIGGERED.value, self.__vault_client.get_logger())
6177
api_response = detect_api.deidentify_string(
62-
self.__vault_client.get_vault_id(),
63-
request.text,
64-
entity_types=deidentify_text_body['entities'],
65-
allows_regex=deidentify_text_body['allow_regex_list'],
66-
restrict_regex=deidentify_text_body['restrict_regex_list'],
67-
token_type=deidentify_text_body['token_format'],
78+
vault_id=self.__vault_client.get_vault_id(),
79+
text=deidentify_text_body['text'],
80+
entity_types=deidentify_text_body['entity_types'],
81+
allow_regex=deidentify_text_body['allow_regex'],
82+
restrict_regex=deidentify_text_body['restrict_regex'],
83+
token_type=deidentify_text_body['token_type'],
6884
transformations=deidentify_text_body['transformations'],
6985
request_options=self.__get_headers()
7086
)
@@ -78,7 +94,7 @@ def deidentify_text(self, request: DeidentifyTextRequest) -> DeidentifyTextRespo
7894

7995
def reidentify_text(self, request: ReidentifyTextRequest) -> ReidentifyTextResponse:
8096
log_info(SkyflowMessages.Info.VALIDATING_REIDENTIFY_TEXT_INPUT.value, self.__vault_client.get_logger())
81-
validate_deidentify_text_request(self.__vault_client.get_logger(), request)
97+
validate_reidentify_text_request(self.__vault_client.get_logger(), request)
8298
log_info(SkyflowMessages.Info.REIDENTIFY_TEXT_REQUEST_RESOLVED.value, self.__vault_client.get_logger())
8399
self.__initialize()
84100
detect_api = self.__vault_client.get_detect_text_api()
@@ -87,8 +103,8 @@ def reidentify_text(self, request: ReidentifyTextRequest) -> ReidentifyTextRespo
87103
try:
88104
log_info(SkyflowMessages.Info.REIDENTIFY_TEXT_TRIGGERED.value, self.__vault_client.get_logger())
89105
api_response = detect_api.reidentify_string(
90-
text=reidentify_text_body['text'],
91106
vault_id=self.__vault_client.get_vault_id(),
107+
text=reidentify_text_body['text'],
92108
format=reidentify_text_body['format'],
93109
request_options=self.__get_headers()
94110
)
@@ -97,5 +113,5 @@ def reidentify_text(self, request: ReidentifyTextRequest) -> ReidentifyTextRespo
97113
return reidentify_text_response
98114

99115
except Exception as e:
100-
log_error_log(SkyflowMessages.ErrorLogs.DEIDENTIFY_TEXT_REQUEST_REJECTED.value, self.__vault_client.get_logger())
116+
log_error_log(SkyflowMessages.ErrorLogs.REIDENTIFY_TEXT_REQUEST_REJECTED.value, self.__vault_client.get_logger())
101117
handle_exception(e, self.__vault_client.get_logger())
Lines changed: 16 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,19 @@
1-
from dataclasses import dataclass
2-
from typing import List, Dict
3-
from ._text_index import TextIndex
1+
from typing import List
42
from ._entity_info import EntityInfo
53

6-
@dataclass
74
class DeidentifyTextResponse:
8-
processed_text: str
9-
entities: List[EntityInfo]
10-
word_count: int
11-
char_count: int
12-
13-
@property
14-
def processed_text(self) -> str:
15-
return self._processed_text
16-
17-
@processed_text.setter
18-
def processed_text(self, value: str):
19-
self._processed_text = value
20-
21-
@property
22-
def entities(self) -> List[EntityInfo]:
23-
return self._entities
24-
25-
@entities.setter
26-
def entities(self, value: List[EntityInfo]):
27-
self._entities = value
28-
29-
@property
30-
def word_count(self) -> int:
31-
return self._word_count
32-
33-
@word_count.setter
34-
def word_count(self, value: int):
35-
self._word_count = value
36-
37-
@property
38-
def char_count(self) -> int:
39-
return self._char_count
40-
41-
@char_count.setter
42-
def char_count(self, value: int):
43-
self._char_count = value
44-
45-
def __init__(self, processed_text: str, entities: List[EntityInfo],
46-
word_count: int, char_count: int):
47-
self._processed_text = processed_text
48-
self._entities = entities
49-
self._word_count = word_count
50-
self._char_count = char_count
5+
def __init__(self,
6+
processed_text: str,
7+
entities: List[EntityInfo],
8+
word_count: int,
9+
char_count: int):
10+
self.processed_text = processed_text
11+
self.entities = entities
12+
self.word_count = word_count
13+
self.char_count = char_count
14+
15+
def __repr__(self):
16+
return f"DeidentifyTextResponse(processed_text='{self.processed_text}', entities={self.entities}, word_count={self.word_count}, char_count={self.char_count})"
17+
18+
def __str__(self):
19+
return self.__repr__()

skyflow/vault/detect/_entity_info.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,11 @@ def __init__(self, token: str, value: str, text_index: TextIndex,
1010
self.processed_index = processed_index
1111
self.entity = entity
1212
self.scores = scores
13+
14+
def __repr__(self) -> str:
15+
return (f"EntityInfo(token='{self.token}', value='{self.value}', "
16+
f"text_index={self.text_index}, processed_index={self.processed_index}, "
17+
f"entity='{self.entity}', scores={self.scores})")
18+
19+
def __str__(self) -> str:
20+
return self.__repr__()

skyflow/vault/detect/_reidentify_text_request.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
class ReidentifyTextRequest:
55
def __init__(self, text: str,
6-
redacted_entities: Optional[List[DetectEntities]] = None,
7-
masked_entities: Optional[List[DetectEntities]] = None,
8-
plain_text_entities: Optional[List[DetectEntities]] = None):
6+
redacted_entities: Optional[List[DetectEntities]] = None,
7+
masked_entities: Optional[List[DetectEntities]] = None,
8+
plain_text_entities: Optional[List[DetectEntities]] = None):
99
self.text = text
1010
self.redacted_entities = redacted_entities
1111
self.masked_entities = masked_entities
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
class ReidentifyTextResponse:
22
def __init__(self, processed_text: str):
33
self.processed_text = processed_text
4+
5+
def __repr__(self) -> str:
6+
return f"ReidentifyTextResponse(processed_text='{self.processed_text}')"
7+
8+
def __str__(self) -> str:
9+
return self.__repr__()

skyflow/vault/detect/_text_index.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,9 @@ class TextIndex:
22
def __init__(self, start: int, end: int):
33
self.start = start
44
self.end = end
5+
6+
def __repr__(self):
7+
return f"TextIndex(start={self.start}, end={self.end})"
8+
9+
def __str__(self):
10+
return self.__repr__()

0 commit comments

Comments
 (0)