|
10 | 10 | from skyflow.utils import get_credentials, SkyflowMessages, get_vault_url, construct_invoke_connection_request, \ |
11 | 11 | parse_insert_response, parse_update_record_response, parse_delete_response, parse_get_response, \ |
12 | 12 | parse_detokenize_response, parse_tokenize_response, parse_query_response, parse_invoke_connection_response, \ |
13 | | - handle_exception, validate_api_key, encode_column_values |
| 13 | + handle_exception, validate_api_key, encode_column_values, parse_deidentify_text_response, \ |
| 14 | + parse_reidentify_text_response, convert_to_entity_type, convert_detected_entity_to_entity_info |
14 | 15 | from skyflow.utils._utils import parse_path_params, to_lowercase_keys, get_metrics |
15 | 16 | from skyflow.utils.enums import EnvUrls, Env, ContentType |
16 | 17 | from skyflow.vault.connection import InvokeConnectionResponse |
@@ -418,3 +419,133 @@ def test_encode_column_values(self): |
418 | 419 |
|
419 | 420 | result = encode_column_values(get_request) |
420 | 421 | self.assertEqual(result, expected_encoded_values) |
| 422 | + |
| 423 | + def test_parse_deidentify_text_response(self): |
| 424 | + """Test parsing deidentify text response with multiple entities.""" |
| 425 | + mock_entity = Mock() |
| 426 | + mock_entity.token = "token123" |
| 427 | + mock_entity.value = "sensitive_value" |
| 428 | + mock_entity.entity_type = "EMAIL" |
| 429 | + mock_entity.entity_scores = {"EMAIL": 0.95} |
| 430 | + mock_entity.location = Mock( |
| 431 | + start_index=10, |
| 432 | + end_index=20, |
| 433 | + start_index_processed=15, |
| 434 | + end_index_processed=25 |
| 435 | + ) |
| 436 | + |
| 437 | + mock_api_response = Mock() |
| 438 | + mock_api_response.processed_text = "Sample processed text" |
| 439 | + mock_api_response.entities = [mock_entity] |
| 440 | + mock_api_response.word_count = 3 |
| 441 | + mock_api_response.character_count = 20 |
| 442 | + |
| 443 | + result = parse_deidentify_text_response(mock_api_response) |
| 444 | + |
| 445 | + self.assertEqual(result.processed_text, "Sample processed text") |
| 446 | + self.assertEqual(result.word_count, 3) |
| 447 | + self.assertEqual(result.char_count, 20) |
| 448 | + self.assertEqual(len(result.entities), 1) |
| 449 | + |
| 450 | + entity = result.entities[0] |
| 451 | + self.assertEqual(entity.token, "token123") |
| 452 | + self.assertEqual(entity.value, "sensitive_value") |
| 453 | + self.assertEqual(entity.entity, "EMAIL") |
| 454 | + self.assertEqual(entity.scores, {"EMAIL": 0.95}) |
| 455 | + self.assertEqual(entity.text_index.start, 10) |
| 456 | + self.assertEqual(entity.text_index.end, 20) |
| 457 | + self.assertEqual(entity.processed_index.start, 15) |
| 458 | + self.assertEqual(entity.processed_index.end, 25) |
| 459 | + |
| 460 | + def test_parse_deidentify_text_response_no_entities(self): |
| 461 | + """Test parsing deidentify text response with no entities.""" |
| 462 | + mock_api_response = Mock() |
| 463 | + mock_api_response.processed_text = "Sample processed text" |
| 464 | + mock_api_response.entities = [] |
| 465 | + mock_api_response.word_count = 3 |
| 466 | + mock_api_response.character_count = 20 |
| 467 | + |
| 468 | + result = parse_deidentify_text_response(mock_api_response) |
| 469 | + |
| 470 | + self.assertEqual(result.processed_text, "Sample processed text") |
| 471 | + self.assertEqual(result.word_count, 3) |
| 472 | + self.assertEqual(result.char_count, 20) |
| 473 | + self.assertEqual(len(result.entities), 0) |
| 474 | + |
| 475 | + def test_parse_reidentify_text_response(self): |
| 476 | + """Test parsing reidentify text response.""" |
| 477 | + mock_api_response = Mock() |
| 478 | + mock_api_response.processed_text = "Reidentified text with actual values" |
| 479 | + |
| 480 | + result = parse_reidentify_text_response(mock_api_response) |
| 481 | + |
| 482 | + self.assertEqual(result.processed_text, "Reidentified text with actual values") |
| 483 | + |
| 484 | + def test_convert_to_entity_type_with_valid_entities(self): |
| 485 | + """Test converting entity types with valid input.""" |
| 486 | + from skyflow.utils.enums import DetectEntities |
| 487 | + |
| 488 | + detect_entities = [DetectEntities.EMAIL_ADDRESS, DetectEntities.PHONE_NUMBER] |
| 489 | + result = convert_to_entity_type(detect_entities) |
| 490 | + |
| 491 | + self.assertEqual(result, ["email_address", "phone_number"]) |
| 492 | + |
| 493 | + def test_convert_to_entity_type_with_empty_list(self): |
| 494 | + """Test converting entity types with empty list.""" |
| 495 | + result = convert_to_entity_type([]) |
| 496 | + self.assertIsNone(result) |
| 497 | + |
| 498 | + def test_convert_to_entity_type_with_none(self): |
| 499 | + """Test converting entity types with None input.""" |
| 500 | + result = convert_to_entity_type(None) |
| 501 | + self.assertIsNone(result) |
| 502 | + |
| 503 | + def test__convert_detected_entity_to_entity_info(self): |
| 504 | + """Test converting detected entity to EntityInfo object.""" |
| 505 | + mock_detected_entity = Mock() |
| 506 | + mock_detected_entity.token = "token123" |
| 507 | + mock_detected_entity.value = "sensitive_value" |
| 508 | + mock_detected_entity.entity_type = "EMAIL" |
| 509 | + mock_detected_entity.entity_scores = {"EMAIL": 0.95} |
| 510 | + mock_detected_entity.location = Mock( |
| 511 | + start_index=10, |
| 512 | + end_index=20, |
| 513 | + start_index_processed=15, |
| 514 | + end_index_processed=25 |
| 515 | + ) |
| 516 | + |
| 517 | + result = convert_detected_entity_to_entity_info(mock_detected_entity) |
| 518 | + |
| 519 | + self.assertEqual(result.token, "token123") |
| 520 | + self.assertEqual(result.value, "sensitive_value") |
| 521 | + self.assertEqual(result.entity, "EMAIL") |
| 522 | + self.assertEqual(result.scores, {"EMAIL": 0.95}) |
| 523 | + self.assertEqual(result.text_index.start, 10) |
| 524 | + self.assertEqual(result.text_index.end, 20) |
| 525 | + self.assertEqual(result.processed_index.start, 15) |
| 526 | + self.assertEqual(result.processed_index.end, 25) |
| 527 | + |
| 528 | + def test__convert_detected_entity_to_entity_info_with_minimal_data(self): |
| 529 | + """Test converting detected entity with minimal required data.""" |
| 530 | + mock_detected_entity = Mock() |
| 531 | + mock_detected_entity.token = "token123" |
| 532 | + mock_detected_entity.value = None |
| 533 | + mock_detected_entity.entity_type = "UNKNOWN" |
| 534 | + mock_detected_entity.entity_scores = {} |
| 535 | + mock_detected_entity.location = Mock( |
| 536 | + start_index=0, |
| 537 | + end_index=0, |
| 538 | + start_index_processed=0, |
| 539 | + end_index_processed=0 |
| 540 | + ) |
| 541 | + |
| 542 | + result = convert_detected_entity_to_entity_info(mock_detected_entity) |
| 543 | + |
| 544 | + self.assertEqual(result.token, "token123") |
| 545 | + self.assertIsNone(result.value) |
| 546 | + self.assertEqual(result.entity, "UNKNOWN") |
| 547 | + self.assertEqual(result.scores, {}) |
| 548 | + self.assertEqual(result.text_index.start, 0) |
| 549 | + self.assertEqual(result.text_index.end, 0) |
| 550 | + self.assertEqual(result.processed_index.start, 0) |
| 551 | + self.assertEqual(result.processed_index.end, 0) |
0 commit comments