From 8de3d09e4139c026607dcee84828208ab2186dbf Mon Sep 17 00:00:00 2001 From: David Pollack Date: Tue, 19 May 2020 13:16:27 +0200 Subject: [PATCH] fix rebase errors --- .../recognizers_store_api.py | 5 +- .../tests/test_analyzer_engine.py | 125 ++---------------- 2 files changed, 18 insertions(+), 112 deletions(-) diff --git a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_store_api.py b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_store_api.py index e0ee3b138f..1615b6cf63 100644 --- a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_store_api.py +++ b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_store_api.py @@ -3,7 +3,10 @@ import grpc from presidio_analyzer.protobuf_models import recognizers_store_pb2, recognizers_store_pb2_grpc -from presidio_analyzer import PatternRecognizer, Pattern +from presidio_analyzer import PatternRecognizer, Pattern, PresidioLogger + + +logger = PresidioLogger("presidio") class RecognizerStoreApi: diff --git a/presidio-analyzer/tests/test_analyzer_engine.py b/presidio-analyzer/tests/test_analyzer_engine.py index 3f2634f8e6..3a649ea9fc 100644 --- a/presidio-analyzer/tests/test_analyzer_engine.py +++ b/presidio-analyzer/tests/test_analyzer_engine.py @@ -131,7 +131,7 @@ def test_analyze_with_predefined_recognizers_return_results( language = "en" entities = ["CREDIT_CARD"] results = loaded_analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) assert len(results) == 1 @@ -149,7 +149,7 @@ def test_analyze_with_multiple_predefined_recognizers( registry=loaded_registry, nlp_engine=nlp_engine ) results = analyzer_engine_with_spacy.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) assert len(results) == 2 @@ -160,110 +160,13 @@ def test_analyze_with_multiple_predefined_recognizers( assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score) -def test_analyze_without_entities(loaded_analyzer_engine, unit_test_guid): - with pytest.raises(ValueError): - language = "en" -<<<<<<< HEAD - entities = ["CREDIT_CARD", "PHONE_NUMBER"] - - # This analyzer engine is different from the global one, as this one - # also loads SpaCy so it can use the context words - - analyzer_engine_with_spacy = AnalyzerEngine( - registry=self.loaded_registry, nlp_engine=loaded_spacy_nlp_engine) - results = analyzer_engine_with_spacy.analyze(self.unit_test_guid, text, - entities, language, - all_fields=False) - - assert len(results) == 2 - medium_regex_score = 0.5 # UsPhoneRecognizer.PATTERN_GROUPS[1][2] - context_similarity_factor = 0.35 # PatternRecognizer.CONTEXT_SIMILARITY_FACTOR - assert_result(results[0], "CREDIT_CARD", 14, - 33, EntityRecognizer.MAX_SCORE) - expected_score = medium_regex_score + context_similarity_factor - assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score) - - def test_analyze_without_entities(self): - with pytest.raises(ValueError): - language = "en" - text = " Credit card: 4095-2609-9393-4932, my name is John Oliver, DateTime: September 18 Domain: microsoft.com" - entities = [] - self.loaded_analyzer_engine.analyze(self.unit_test_guid, - text, entities, language, - all_fields=False) - - def test_analyze_with_empty_text(self): -======= -@pytest.fixture(scope="module") -def loaded_registry(): - return MockRecognizerRegistry(RecognizerStoreApiMock()) - -@pytest.fixture(scope="module") -def app_tracer(): - return AppTracerMock(enable_interpretability=True) - -@pytest.fixture(scope="module") -def loaded_analyzer_engine(loaded_registry, app_tracer): - mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en") - analyzer_engine = AnalyzerEngine( - loaded_registry, - MockNlpEngine( - stopwords=[], - punct_words=[], - nlp_artifacts=mock_nlp_artifacts - ), - app_tracer=app_tracer, - enable_trace_pii=True - ) - return analyzer_engine - -@pytest.fixture(scope="module") -def unit_test_guid(): - return "00000000-0000-0000-0000-000000000000" - -def test_analyze_with_predefined_recognizers_return_results(loaded_analyzer_engine, unit_test_guid): - text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" - language = "en" - entities = ["CREDIT_CARD"] - results = loaded_analyzer_engine.analyze( - unit_test_guid, - text, entities, language, all_fields=False) - - assert len(results) == 1 - assert_result(results[0], "CREDIT_CARD", 14, - 33, EntityRecognizer.MAX_SCORE) - -def test_analyze_with_multiple_predefined_recognizers(loaded_registry, unit_test_guid, nlp_engines): - text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" - language = "en" - entities = ["CREDIT_CARD", "PHONE_NUMBER"] - - # This analyzer engine is different from the global one, as this one - # also loads SpaCy so it can use the context words - - nlp_engine = nlp_engines["spacy_en"] - - analyzer_engine_with_spacy = AnalyzerEngine( - registry=loaded_registry, nlp_engine=nlp_engine) - results = analyzer_engine_with_spacy.analyze(unit_test_guid, text, - entities, language, - all_fields=False) - - assert len(results) == 2 - medium_regex_score = 0.5 # UsPhoneRecognizer.PATTERN_GROUPS[1][2] - context_similarity_factor = 0.35 # PatternRecognizer.CONTEXT_SIMILARITY_FACTOR - assert_result(results[0], "CREDIT_CARD", 14, - 33, EntityRecognizer.MAX_SCORE) - expected_score = medium_regex_score + context_similarity_factor - assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score) - def test_analyze_without_entities(loaded_analyzer_engine, unit_test_guid): with pytest.raises(ValueError): language = "en" text = " Credit card: 4095-2609-9393-4932, my name is John Oliver, DateTime: September 18 Domain: microsoft.com" # noqa E501 entities = [] loaded_analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) @@ -272,7 +175,7 @@ def test_analyze_with_empty_text(loaded_analyzer_engine, unit_test_guid): text = "" entities = ["CREDIT_CARD", "PHONE_NUMBER"] results = loaded_analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) assert len(results) == 0 @@ -284,7 +187,7 @@ def test_analyze_with_unsupported_language(loaded_analyzer_engine, unit_test_gui text = "" entities = ["CREDIT_CARD", "PHONE_NUMBER"] loaded_analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) @@ -376,7 +279,7 @@ def test_added_pattern_recognizer_works(unit_test_guid): entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze( - unit_test_guid, text=text, entities=entities, language="en", all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False ) assert len(results) == 0 @@ -386,7 +289,7 @@ def test_added_pattern_recognizer_works(unit_test_guid): # Check that the entity is recognized: results = analyze_engine.analyze( - unit_test_guid, text=text, entities=entities, language="en", all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False ) assert len(results) == 1 @@ -409,7 +312,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid): entities = ["CREDIT_CARD", "SPACESHIP"] results = analyze_engine.analyze( - unit_test_guid, text=text, entities=entities, language="en", all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False ) assert len(results) == 0 @@ -418,7 +321,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid): recognizers_store_api_mock.add_custom_pattern_recognizer(pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( - unit_test_guid, text=text, entities=entities, language="en", all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False ) assert len(results) == 1 assert_result(results[0], "SPACESHIP", 0, 10, 0.8) @@ -427,7 +330,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid): recognizers_store_api_mock.remove_recognizer("Spaceship recognizer") # Test again to see we didn't get any results results = analyze_engine.analyze( - unit_test_guid, text=text, entities=entities, language="en", all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False ) assert len(results) == 0 @@ -547,7 +450,7 @@ def test_when_threshold_is_zero_all_results_pass(loaded_registry, unit_test_guid registry=loaded_registry, nlp_engine=NlpEngineMock() ) results = analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False, score_threshold=0 + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False, score_threshold=0 ) assert len(results) == 2 @@ -567,7 +470,7 @@ def test_when_threshold_is_more_than_half_only_credit_card_passes( registry=loaded_registry, nlp_engine=NlpEngineMock() ) results = analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False, score_threshold=0.51 + correlation_id=unit_test_guid, text=text, entities=entities, language=language, score_threshold=0.51, all_fields=False ) assert len(results) == 1 @@ -589,7 +492,7 @@ def test_when_default_threshold_is_more_than_half_only_one_passes( default_score_threshold=0.7, ) results = analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) assert len(results) == 1 @@ -609,7 +512,7 @@ def test_when_default_threshold_is_zero_all_results_pass( registry=loaded_registry, nlp_engine=NlpEngineMock() ) results = analyzer_engine.analyze( - unit_test_guid, text, entities, language, all_fields=False + correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False ) assert len(results) == 2