Skip to content

Commit

Permalink
fix rebase errors
Browse files Browse the repository at this point in the history
  • Loading branch information
David Pollack committed May 19, 2020
1 parent ab5553b commit 8de3d09
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import grpc
from presidio_analyzer.protobuf_models import recognizers_store_pb2, recognizers_store_pb2_grpc

from presidio_analyzer import PatternRecognizer, Pattern
from presidio_analyzer import PatternRecognizer, Pattern, PresidioLogger


logger = PresidioLogger("presidio")


class RecognizerStoreApi:
Expand Down
125 changes: 14 additions & 111 deletions presidio-analyzer/tests/test_analyzer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_analyze_with_predefined_recognizers_return_results(
language = "en"
entities = ["CREDIT_CARD"]
results = loaded_analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)

assert len(results) == 1
Expand All @@ -149,7 +149,7 @@ def test_analyze_with_multiple_predefined_recognizers(
registry=loaded_registry, nlp_engine=nlp_engine
)
results = analyzer_engine_with_spacy.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)

assert len(results) == 2
Expand All @@ -160,110 +160,13 @@ def test_analyze_with_multiple_predefined_recognizers(
assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score)


def test_analyze_without_entities(loaded_analyzer_engine, unit_test_guid):
with pytest.raises(ValueError):
language = "en"
<<<<<<< HEAD
entities = ["CREDIT_CARD", "PHONE_NUMBER"]

# This analyzer engine is different from the global one, as this one
# also loads SpaCy so it can use the context words

analyzer_engine_with_spacy = AnalyzerEngine(
registry=self.loaded_registry, nlp_engine=loaded_spacy_nlp_engine)
results = analyzer_engine_with_spacy.analyze(self.unit_test_guid, text,
entities, language,
all_fields=False)

assert len(results) == 2
medium_regex_score = 0.5 # UsPhoneRecognizer.PATTERN_GROUPS[1][2]
context_similarity_factor = 0.35 # PatternRecognizer.CONTEXT_SIMILARITY_FACTOR
assert_result(results[0], "CREDIT_CARD", 14,
33, EntityRecognizer.MAX_SCORE)
expected_score = medium_regex_score + context_similarity_factor
assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score)

def test_analyze_without_entities(self):
with pytest.raises(ValueError):
language = "en"
text = " Credit card: 4095-2609-9393-4932, my name is John Oliver, DateTime: September 18 Domain: microsoft.com"
entities = []
self.loaded_analyzer_engine.analyze(self.unit_test_guid,
text, entities, language,
all_fields=False)

def test_analyze_with_empty_text(self):
=======
@pytest.fixture(scope="module")
def loaded_registry():
return MockRecognizerRegistry(RecognizerStoreApiMock())

@pytest.fixture(scope="module")
def app_tracer():
return AppTracerMock(enable_interpretability=True)

@pytest.fixture(scope="module")
def loaded_analyzer_engine(loaded_registry, app_tracer):
mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
analyzer_engine = AnalyzerEngine(
loaded_registry,
MockNlpEngine(
stopwords=[],
punct_words=[],
nlp_artifacts=mock_nlp_artifacts
),
app_tracer=app_tracer,
enable_trace_pii=True
)
return analyzer_engine

@pytest.fixture(scope="module")
def unit_test_guid():
return "00000000-0000-0000-0000-000000000000"

def test_analyze_with_predefined_recognizers_return_results(loaded_analyzer_engine, unit_test_guid):
text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090"
language = "en"
entities = ["CREDIT_CARD"]
results = loaded_analyzer_engine.analyze(
unit_test_guid,
text, entities, language, all_fields=False)

assert len(results) == 1
assert_result(results[0], "CREDIT_CARD", 14,
33, EntityRecognizer.MAX_SCORE)

def test_analyze_with_multiple_predefined_recognizers(loaded_registry, unit_test_guid, nlp_engines):
text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090"
language = "en"
entities = ["CREDIT_CARD", "PHONE_NUMBER"]

# This analyzer engine is different from the global one, as this one
# also loads SpaCy so it can use the context words

nlp_engine = nlp_engines["spacy_en"]

analyzer_engine_with_spacy = AnalyzerEngine(
registry=loaded_registry, nlp_engine=nlp_engine)
results = analyzer_engine_with_spacy.analyze(unit_test_guid, text,
entities, language,
all_fields=False)

assert len(results) == 2
medium_regex_score = 0.5 # UsPhoneRecognizer.PATTERN_GROUPS[1][2]
context_similarity_factor = 0.35 # PatternRecognizer.CONTEXT_SIMILARITY_FACTOR
assert_result(results[0], "CREDIT_CARD", 14,
33, EntityRecognizer.MAX_SCORE)
expected_score = medium_regex_score + context_similarity_factor
assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score)

def test_analyze_without_entities(loaded_analyzer_engine, unit_test_guid):
with pytest.raises(ValueError):
language = "en"
text = " Credit card: 4095-2609-9393-4932, my name is John Oliver, DateTime: September 18 Domain: microsoft.com" # noqa E501
entities = []
loaded_analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)


Expand All @@ -272,7 +175,7 @@ def test_analyze_with_empty_text(loaded_analyzer_engine, unit_test_guid):
text = ""
entities = ["CREDIT_CARD", "PHONE_NUMBER"]
results = loaded_analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)

assert len(results) == 0
Expand All @@ -284,7 +187,7 @@ def test_analyze_with_unsupported_language(loaded_analyzer_engine, unit_test_gui
text = ""
entities = ["CREDIT_CARD", "PHONE_NUMBER"]
loaded_analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)


Expand Down Expand Up @@ -376,7 +279,7 @@ def test_added_pattern_recognizer_works(unit_test_guid):
entities = ["CREDIT_CARD", "ROCKET"]

results = analyze_engine.analyze(
unit_test_guid, text=text, entities=entities, language="en", all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False
)

assert len(results) == 0
Expand All @@ -386,7 +289,7 @@ def test_added_pattern_recognizer_works(unit_test_guid):

# Check that the entity is recognized:
results = analyze_engine.analyze(
unit_test_guid, text=text, entities=entities, language="en", all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False
)

assert len(results) == 1
Expand All @@ -409,7 +312,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid):
entities = ["CREDIT_CARD", "SPACESHIP"]

results = analyze_engine.analyze(
unit_test_guid, text=text, entities=entities, language="en", all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False
)

assert len(results) == 0
Expand All @@ -418,7 +321,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid):
recognizers_store_api_mock.add_custom_pattern_recognizer(pattern_recognizer)
# Check that the entity is recognized:
results = analyze_engine.analyze(
unit_test_guid, text=text, entities=entities, language="en", all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False
)
assert len(results) == 1
assert_result(results[0], "SPACESHIP", 0, 10, 0.8)
Expand All @@ -427,7 +330,7 @@ def test_removed_pattern_recognizer_doesnt_work(unit_test_guid):
recognizers_store_api_mock.remove_recognizer("Spaceship recognizer")
# Test again to see we didn't get any results
results = analyze_engine.analyze(
unit_test_guid, text=text, entities=entities, language="en", all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False
)

assert len(results) == 0
Expand Down Expand Up @@ -547,7 +450,7 @@ def test_when_threshold_is_zero_all_results_pass(loaded_registry, unit_test_guid
registry=loaded_registry, nlp_engine=NlpEngineMock()
)
results = analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False, score_threshold=0
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False, score_threshold=0
)

assert len(results) == 2
Expand All @@ -567,7 +470,7 @@ def test_when_threshold_is_more_than_half_only_credit_card_passes(
registry=loaded_registry, nlp_engine=NlpEngineMock()
)
results = analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False, score_threshold=0.51
correlation_id=unit_test_guid, text=text, entities=entities, language=language, score_threshold=0.51, all_fields=False
)

assert len(results) == 1
Expand All @@ -589,7 +492,7 @@ def test_when_default_threshold_is_more_than_half_only_one_passes(
default_score_threshold=0.7,
)
results = analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)

assert len(results) == 1
Expand All @@ -609,7 +512,7 @@ def test_when_default_threshold_is_zero_all_results_pass(
registry=loaded_registry, nlp_engine=NlpEngineMock()
)
results = analyzer_engine.analyze(
unit_test_guid, text, entities, language, all_fields=False
correlation_id=unit_test_guid, text=text, entities=entities, language=language, all_fields=False
)

assert len(results) == 2
Expand Down

0 comments on commit 8de3d09

Please sign in to comment.