From f0336ae825a818df042bb0145e4c660c5b43d285 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 22 Mar 2019 16:19:18 +0100 Subject: [PATCH 1/3] Improve handling of ambiguous utterances in DeterministicIntentParser --- .../deterministic_intent_parser.py | 17 +++++- .../tests/test_deterministic_intent_parser.py | 57 +++++++++++++++---- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/snips_nlu/intent_parser/deterministic_intent_parser.py b/snips_nlu/intent_parser/deterministic_intent_parser.py index 29d78dfa9..a01a68f14 100644 --- a/snips_nlu/intent_parser/deterministic_intent_parser.py +++ b/snips_nlu/intent_parser/deterministic_intent_parser.py @@ -19,7 +19,8 @@ from snips_nlu.constants import ( DATA, END, ENTITIES, ENTITY, INTENTS, LANGUAGE, RES_INTENT, RES_INTENT_NAME, - RES_MATCH_RANGE, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, UTTERANCES) + RES_MATCH_RANGE, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, UTTERANCES, + RES_PROBA) from snips_nlu.dataset import validate_and_format_dataset from snips_nlu.entity_parser.builtin_entity_parser import is_builtin_entity from snips_nlu.exceptions import IntentNotFoundError, LoadingError @@ -198,6 +199,9 @@ def parse(self, text, intents=None, top_n=None): if top_intents: intent = top_intents[0][RES_INTENT] slots = top_intents[0][RES_SLOTS] + if intent[RES_PROBA] < 1.0: + # return None in case of ambiguity + return empty_result(text, probability=1.0) return parsing_result(text, intent, slots) return empty_result(text, probability=1.0) return self._parse_top_intents(text, top_n=top_n, intents=intents) @@ -239,8 +243,15 @@ def placeholder_fn(entity_name): if res is not None: results.append(res) break - if len(results) == top_n: - return results + + confidence_score = 1. + if results: + confidence_score = 1. / float(len(results)) + + results = results[:top_n] + + for res in results: + res[RES_INTENT][RES_PROBA] = confidence_score return results diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py index 094b74e8c..7a0def5f0 100644 --- a/snips_nlu/tests/test_deterministic_intent_parser.py +++ b/snips_nlu/tests/test_deterministic_intent_parser.py @@ -120,24 +120,33 @@ def test_should_parse_top_intents(self): type: intent name: intent1 utterances: - - hello world + - meeting tomorrow --- type: intent name: intent2 utterances: - - foo bar""") + - meeting [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) - text = "hello world" + text = "meeting tomorrow" # When results = parser.parse(text, top_n=3) # Then - expected_intent = intent_classification_result( - intent_name="intent1", probability=1.0) - expected_results = [extraction_result(expected_intent, [])] + expected_results = [ + extraction_result(intent_classification_result( + intent_name="intent1", probability=0.5), []), + extraction_result(intent_classification_result( + intent_name="intent2", probability=0.5), + [{"entity": "snips/datetime", + "range": {"end": 16, "start": 8}, + "slotName": "time", + "value": "tomorrow"}]) + ] + expected_results = sorted(expected_results, + key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_results, results) @patch("snips_nlu.intent_parser.deterministic_intent_parser" @@ -225,6 +234,30 @@ def test_should_ignore_ambiguous_utterances(self): # Then self.assertEqual(empty_result(text, 1.0), res) + def test_should_ignore_subtly_ambiguous_utterances(self): + # Given + dataset_stream = io.StringIO(""" +--- +type: intent +name: intent_1 +utterances: + - meeting tomorrow + +--- +type: intent +name: intent_2 +utterances: + - meeting [time:snips/datetime](today)""") + dataset = Dataset.from_yaml_files("en", [dataset_stream]).json + parser = DeterministicIntentParser().fit(dataset) + text = "meeting tomorrow" + + # When + res = parser.parse(text) + + # Then + self.assertEqual(empty_result(text, 1.0), res) + def test_should_not_parse_when_not_fitted(self): # Given parser = DeterministicIntentParser() @@ -565,7 +598,7 @@ def test_should_parse_naughty_strings(self): - this is [slot2:entity2](second_entity)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt" - with naughty_strings_path.open(encoding='utf8') as f: + with naughty_strings_path.open(encoding="utf8") as f: naughty_strings = [line.strip("\n") for line in f.readlines()] # When @@ -579,7 +612,7 @@ def test_should_parse_naughty_strings(self): def test_should_fit_with_naughty_strings_no_tags(self): # Given naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt" - with naughty_strings_path.open(encoding='utf8') as f: + with naughty_strings_path.open(encoding="utf8") as f: naughty_strings = [line.strip("\n") for line in f.readlines()] utterances = [{DATA: [{TEXT: naughty_string}]} for naughty_string in @@ -635,13 +668,13 @@ def test_should_fit_and_parse_with_non_ascii_tags(self): parsing = parser.parse("string0") expected_slot = { - 'entity': 'non_ascìi_entïty', - 'range': { + "entity": "non_ascìi_entïty", + "range": { "start": 0, "end": 7 }, - 'slotName': u'non_ascìi_slöt', - 'value': u'string0' + "slotName": u"non_ascìi_slöt", + "value": u"string0" } intent_name = parsing[RES_INTENT][RES_INTENT_NAME] self.assertEqual("naughty_intent", intent_name) From 699890f6f50411fe3a390bef2f07d5edcd8bf27c Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 22 Mar 2019 18:08:01 +0100 Subject: [PATCH 2/3] Fix linting --- snips_nlu/tests/test_deterministic_intent_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py index 7a0def5f0..0353dca7a 100644 --- a/snips_nlu/tests/test_deterministic_intent_parser.py +++ b/snips_nlu/tests/test_deterministic_intent_parser.py @@ -135,15 +135,17 @@ def test_should_parse_top_intents(self): results = parser.parse(text, top_n=3) # Then + slot = { + "entity": "snips/datetime", + "range": {"end": 16, "start": 8}, + "slotName": "time", + "value": "tomorrow" + } expected_results = [ extraction_result(intent_classification_result( intent_name="intent1", probability=0.5), []), extraction_result(intent_classification_result( - intent_name="intent2", probability=0.5), - [{"entity": "snips/datetime", - "range": {"end": 16, "start": 8}, - "slotName": "time", - "value": "tomorrow"}]) + intent_name="intent2", probability=0.5), [slot]) ] expected_results = sorted(expected_results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) From f26f53eaff0b1db299864b2b774366d16afa35cd Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 22 Mar 2019 19:05:44 +0100 Subject: [PATCH 3/3] Fix stochastic test --- snips_nlu/tests/test_deterministic_intent_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py index 0353dca7a..39750075d 100644 --- a/snips_nlu/tests/test_deterministic_intent_parser.py +++ b/snips_nlu/tests/test_deterministic_intent_parser.py @@ -147,8 +147,7 @@ def test_should_parse_top_intents(self): extraction_result(intent_classification_result( intent_name="intent2", probability=0.5), [slot]) ] - expected_results = sorted(expected_results, - key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) + results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_results, results) @patch("snips_nlu.intent_parser.deterministic_intent_parser"