Skip to content

Commit e3279f0

Browse files
SK-2142 return None in case of empty error for Data APIs and include file path in deidentify file request (#188)
1 parent e63345e commit e3279f0

19 files changed

+401
-128
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
if sys.version_info < (3, 8):
99
raise RuntimeError("skyflow requires Python 3.8+")
10-
current_version = '2.0.0b6'
10+
current_version = '2.0.0b7'
1111

1212
setup(
1313
name='skyflow',

skyflow/utils/_skyflow_messages.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ class Error(Enum):
169169
INVALID_PLAIN_TEXT_ENTITIES_IN_REIDENTIFY= f"{error_prefix} Validation error. The plainTextEntities field must be an array of DetectEntities enums. Specify a valid plainTextEntities."
170170

171171
INVALID_DEIDENTIFY_FILE_REQUEST= f"{error_prefix} Validation error. Invalid deidentify file request. Specify a valid deidentify file request."
172+
INVALID_DEIDENTIFY_FILE_INPUT= f"{error_prefix} Validation error. Invalid deidentify file input. Please provide either a file or a file path."
172173
EMPTY_FILE_OBJECT= f"{error_prefix} Validation error. File object cannot be empty. Specify a valid file object."
173174
INVALID_FILE_FORMAT= f"{error_prefix} Validation error. Invalid file format. Specify a valid file format."
174175
MISSING_FILE_SOURCE= f"{error_prefix} Validation error. Provide exactly one of filePath, base64, or fileObject."
@@ -197,7 +198,7 @@ class Error(Enum):
197198
INVALID_FILE_OR_ENCODED_FILE= f"{error_prefix} . Error while decoding base64 and saving file"
198199
INVALID_FILE_TYPE = f"{error_prefix} Validation error. Invalid file type. Specify a valid file type."
199200
INVALID_FILE_NAME= f"{error_prefix} Validation error. Invalid file name. Specify a valid file name."
200-
FILE_READ_ERROR= f"{error_prefix} Validation error. Unable to read file. Verify the file path."
201+
INVALID_DEIDENTIFY_FILE_PATH= f"{error_prefix} Validation error. Invalid file path. Specify a valid file path."
201202
INVALID_BASE64_HEADER= f"{error_prefix} Validation error. Invalid base64 header. Specify a valid base64 header."
202203
INVALID_WAIT_TIME= f"{error_prefix} Validation error. Invalid wait time. Specify a valid wait time as number and should not be greater than 64 secs."
203204
INVALID_OUTPUT_DIRECTORY= f"{error_prefix} Validation error. Invalid output directory. Specify a valid output directory as string."

skyflow/utils/_utils.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,6 @@ def get_metrics():
211211
}
212212
return details_dic
213213

214-
215214
def parse_insert_response(api_response, continue_on_error):
216215
# Retrieve the headers and data from the API response
217216
api_response_headers = api_response.headers
@@ -239,13 +238,13 @@ def parse_insert_response(api_response, continue_on_error):
239238
error = {
240239
'request_index': idx,
241240
'request_id': request_id,
242-
'error': response['Body']['error']
241+
'error': response['Body']['error'],
242+
'http_code': response['Status'],
243243
}
244244
errors.append(error)
245245

246246
insert_response.inserted_fields = inserted_fields
247-
insert_response.errors = errors
248-
247+
insert_response.errors = errors if len(errors) > 0 else None
249248
else:
250249
for record in api_response_data.records:
251250
field_data = {
@@ -257,6 +256,7 @@ def parse_insert_response(api_response, continue_on_error):
257256

258257
inserted_fields.append(field_data)
259258
insert_response.inserted_fields = inserted_fields
259+
insert_response.errors = None
260260

261261
return insert_response
262262

@@ -275,21 +275,17 @@ def parse_delete_response(api_response: V1BulkDeleteRecordResponse):
275275
delete_response = DeleteResponse()
276276
deleted_ids = api_response.record_id_response
277277
delete_response.deleted_ids = deleted_ids
278-
delete_response.errors = []
278+
delete_response.errors = None
279279
return delete_response
280280

281-
282281
def parse_get_response(api_response: V1BulkGetRecordResponse):
283282
get_response = GetResponse()
284283
data = []
285-
errors = []
286284
for record in api_response.records:
287285
field_data = {field: value for field, value in record.fields.items()}
288286
data.append(field_data)
289287

290288
get_response.data = data
291-
get_response.errors = errors
292-
293289
return get_response
294290

295291
def parse_detokenize_response(api_response: HttpResponse[V1DetokenizeResponse]):
@@ -320,7 +316,7 @@ def parse_detokenize_response(api_response: HttpResponse[V1DetokenizeResponse]):
320316
errors = errors
321317
detokenize_response = DetokenizeResponse()
322318
detokenize_response.detokenized_fields = detokenized_fields
323-
detokenize_response.errors = errors
319+
detokenize_response.errors = errors if len(errors) > 0 else None
324320

325321
return detokenize_response
326322

@@ -357,7 +353,7 @@ def parse_invoke_connection_response(api_response: requests.Response):
357353
if 'x-request-id' in api_response.headers:
358354
metadata['request_id'] = api_response.headers['x-request-id']
359355

360-
return InvokeConnectionResponse(data=data, metadata=metadata)
356+
return InvokeConnectionResponse(data=data, metadata=metadata, errors=None)
361357
except Exception as e:
362358
raise SkyflowError(SkyflowMessages.Error.RESPONSE_NOT_JSON.value.format(content), status_code)
363359
except HTTPError:

skyflow/utils/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
SDK_VERSION = '2.0.0b6'
1+
SDK_VERSION = '2.0.0b7'

skyflow/utils/validations/_validations.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from skyflow.utils.logger import log_info, log_error_log
1010
from skyflow.vault.detect import DeidentifyTextRequest, ReidentifyTextRequest, TokenFormat, Transformations, \
1111
GetDetectRunRequest, Bleep, DeidentifyFileRequest
12+
from skyflow.vault.detect._file_input import FileInput
1213

1314
valid_vault_config_keys = ["vault_id", "cluster_id", "credentials", "env"]
1415
valid_connection_config_keys = ["connection_id", "connection_url", "credentials"]
@@ -257,9 +258,42 @@ def validate_update_connection_config(logger, config):
257258

258259
return True
259260

261+
def validate_file_from_request(file_input: FileInput):
262+
if file_input is None:
263+
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_INPUT.value, invalid_input_error_code)
264+
265+
has_file = hasattr(file_input, 'file') and file_input.file is not None
266+
has_file_path = hasattr(file_input, 'file_path') and file_input.file_path is not None
267+
268+
# Must provide exactly one of file or file_path
269+
if (has_file and has_file_path) or (not has_file and not has_file_path):
270+
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_INPUT.value, invalid_input_error_code)
271+
272+
if has_file:
273+
file = file_input.file
274+
# Validate file object has required attributes
275+
if not hasattr(file, 'name') or not isinstance(file.name, str) or not file.name.strip():
276+
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_TYPE.value, invalid_input_error_code)
277+
278+
# Validate file name
279+
file_name = os.path.splitext(file.name)[0]
280+
if not file_name or not file_name.strip():
281+
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_NAME.value, invalid_input_error_code)
282+
283+
elif has_file_path:
284+
file_path = file_input.file_path
285+
if not isinstance(file_path, str) or not file_path.strip():
286+
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_PATH.value, invalid_input_error_code)
287+
288+
if not os.path.exists(file_path) or not os.path.isfile(file_path):
289+
raise SkyflowError(SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_PATH.value, invalid_input_error_code)
290+
260291
def validate_deidentify_file_request(logger, request: DeidentifyFileRequest):
261292
if not hasattr(request, 'file') or request.file is None:
262293
raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_INPUT.value, invalid_input_error_code)
294+
295+
# Validate file input first
296+
validate_file_from_request(request.file)
263297

264298
# Optional: entities
265299
if hasattr(request, 'entities') and request.entities is not None:
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
class InvokeConnectionResponse:
2-
def __init__(self, data=None, metadata=None):
2+
def __init__(self, data=None, metadata=None, errors=None):
33
self.data = data
44
self.metadata = metadata if metadata else {}
5+
self.errors = errors if errors else None
56

67
def __repr__(self):
7-
return f"ConnectionResponse('data'={self.data},'metadata'={self.metadata})"
8+
return f"ConnectionResponse('data'={self.data},'metadata'={self.metadata}), 'errors'={self.errors})"
89

910
def __str__(self):
1011
return self.__repr__()

skyflow/vault/controller/_detect.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import io
12
import json
23
import os
34
from skyflow.error import SkyflowError
@@ -20,6 +21,7 @@
2021
from skyflow.vault.detect import DeidentifyTextRequest, DeidentifyTextResponse, ReidentifyTextRequest, \
2122
ReidentifyTextResponse, DeidentifyFileRequest, DeidentifyFileResponse, GetDetectRunRequest
2223

24+
2325
class Detect:
2426
def __init__(self, vault_client):
2527
self.__vault_client = vault_client
@@ -124,10 +126,22 @@ def output_to_dict_list(output):
124126
word_count = getattr(word_character_count, "word_count", None)
125127
char_count = getattr(word_character_count, "character_count", None)
126128

129+
base64_string = first_output.get("file", None)
130+
extension = first_output.get("extension", None)
131+
132+
file_obj = None
133+
if base64_string is not None:
134+
file_bytes = base64.b64decode(base64_string)
135+
file_obj = io.BytesIO(file_bytes)
136+
file_obj.name = f"deidentified.{extension}" if extension else "processed_file"
137+
else:
138+
file_obj = None
139+
127140
return DeidentifyFileResponse(
128-
file=first_output.get("file", None),
141+
file_base64=base64_string,
142+
file=file_obj, # File class will be instantiated in DeidentifyFileResponse
129143
type=first_output.get("type", None),
130-
extension=first_output.get("extension", None),
144+
extension=extension,
131145
word_count=word_count,
132146
char_count=char_count,
133147
size_in_kb=size,
@@ -137,7 +151,7 @@ def output_to_dict_list(output):
137151
entities=entities,
138152
run_id=run_id_val,
139153
status=status_val,
140-
errors=[]
154+
errors=None
141155
)
142156

143157
def __get_token_format(self, request):
@@ -216,16 +230,26 @@ def reidentify_text(self, request: ReidentifyTextRequest) -> ReidentifyTextRespo
216230
log_error_log(SkyflowMessages.ErrorLogs.REIDENTIFY_TEXT_REQUEST_REJECTED.value, self.__vault_client.get_logger())
217231
handle_exception(e, self.__vault_client.get_logger())
218232

233+
def __get_file_from_request(self, request: DeidentifyFileRequest):
234+
file_input = request.file
235+
236+
# Check for file
237+
if hasattr(file_input, 'file') and file_input.file is not None:
238+
return file_input.file
239+
240+
# Check for file_path if file is not provided
241+
if hasattr(file_input, 'file_path') and file_input.file_path is not None:
242+
return open(file_input.file_path, 'rb')
243+
219244
def deidentify_file(self, request: DeidentifyFileRequest):
220245
log_info(SkyflowMessages.Info.DETECT_FILE_TRIGGERED.value, self.__vault_client.get_logger())
221246
validate_deidentify_file_request(self.__vault_client.get_logger(), request)
222247
self.__initialize()
223248
files_api = self.__vault_client.get_detect_file_api().with_raw_response
224-
file_obj = request.file
249+
file_obj = self.__get_file_from_request(request)
225250
file_name = getattr(file_obj, 'name', None)
226251
file_extension = self._get_file_extension(file_name) if file_name else None
227252
file_content = file_obj.read()
228-
229253
base64_string = base64.b64encode(file_content).decode('utf-8')
230254

231255
try:
@@ -375,7 +399,7 @@ def deidentify_file(self, request: DeidentifyFileRequest):
375399
file_name_only = 'processed-'+os.path.basename(file_name)
376400
output_file_path = f"{request.output_directory}/{file_name_only}"
377401
with open(output_file_path, 'wb') as output_file:
378-
output_file.write(base64.b64decode(parsed_response.file))
402+
output_file.write(base64.b64decode(parsed_response.file_base64))
379403
log_info(SkyflowMessages.Info.DETECT_FILE_SUCCESS.value, self.__vault_client.get_logger())
380404
return parsed_response
381405

skyflow/vault/data/_insert_response.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
class InsertResponse:
22
def __init__(self, inserted_fields = None, errors=None):
3-
if errors is None:
4-
errors = list()
53
self.inserted_fields = inserted_fields
64
self.errors = errors
75

skyflow/vault/data/_query_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class QueryResponse:
22
def __init__(self):
33
self.fields = []
4-
self.errors = []
4+
self.errors = None
55

66
def __repr__(self):
77
return f"QueryResponse(fields={self.fields}, errors={self.errors})"

skyflow/vault/data/_update_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class UpdateResponse:
22
def __init__(self, updated_field = None, errors=None):
33
self.updated_field = updated_field
4-
self.errors = errors if errors is not None else []
4+
self.errors = errors
55

66
def __repr__(self):
77
return f"UpdateResponse(updated_field={self.updated_field}, errors={self.errors})"

0 commit comments

Comments
 (0)