From f0336ae825a818df042bb0145e4c660c5b43d285 Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrien.ball@snips.ai>
Date: Fri, 22 Mar 2019 16:19:18 +0100
Subject: [PATCH 1/3] Improve handling of ambiguous utterances in
 DeterministicIntentParser

---
 .../deterministic_intent_parser.py            | 17 +++++-
 .../tests/test_deterministic_intent_parser.py | 57 +++++++++++++++----
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/snips_nlu/intent_parser/deterministic_intent_parser.py b/snips_nlu/intent_parser/deterministic_intent_parser.py
index 29d78dfa9..a01a68f14 100644
--- a/snips_nlu/intent_parser/deterministic_intent_parser.py
+++ b/snips_nlu/intent_parser/deterministic_intent_parser.py
@@ -19,7 +19,8 @@
 from snips_nlu.constants import (
     DATA, END, ENTITIES, ENTITY,
     INTENTS, LANGUAGE, RES_INTENT, RES_INTENT_NAME,
-    RES_MATCH_RANGE, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, UTTERANCES)
+    RES_MATCH_RANGE, RES_SLOTS, RES_VALUE, SLOT_NAME, START, TEXT, UTTERANCES,
+    RES_PROBA)
 from snips_nlu.dataset import validate_and_format_dataset
 from snips_nlu.entity_parser.builtin_entity_parser import is_builtin_entity
 from snips_nlu.exceptions import IntentNotFoundError, LoadingError
@@ -198,6 +199,9 @@ def parse(self, text, intents=None, top_n=None):
             if top_intents:
                 intent = top_intents[0][RES_INTENT]
                 slots = top_intents[0][RES_SLOTS]
+                if intent[RES_PROBA] < 1.0:
+                    # return None in case of ambiguity
+                    return empty_result(text, probability=1.0)
                 return parsing_result(text, intent, slots)
             return empty_result(text, probability=1.0)
         return self._parse_top_intents(text, top_n=top_n, intents=intents)
@@ -239,8 +243,15 @@ def placeholder_fn(entity_name):
                 if res is not None:
                     results.append(res)
                     break
-            if len(results) == top_n:
-                return results
+
+        confidence_score = 1.
+        if results:
+            confidence_score = 1. / float(len(results))
+
+        results = results[:top_n]
+
+        for res in results:
+            res[RES_INTENT][RES_PROBA] = confidence_score
 
         return results
 
diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py
index 094b74e8c..7a0def5f0 100644
--- a/snips_nlu/tests/test_deterministic_intent_parser.py
+++ b/snips_nlu/tests/test_deterministic_intent_parser.py
@@ -120,24 +120,33 @@ def test_should_parse_top_intents(self):
 type: intent
 name: intent1
 utterances:
-  - hello world
+  - meeting tomorrow
   
 ---
 type: intent
 name: intent2
 utterances:
-  - foo bar""")
+  - meeting [time:snips/datetime](today)""")
         dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
         parser = DeterministicIntentParser().fit(dataset)
-        text = "hello world"
+        text = "meeting tomorrow"
 
         # When
         results = parser.parse(text, top_n=3)
 
         # Then
-        expected_intent = intent_classification_result(
-            intent_name="intent1", probability=1.0)
-        expected_results = [extraction_result(expected_intent, [])]
+        expected_results = [
+            extraction_result(intent_classification_result(
+                intent_name="intent1", probability=0.5), []),
+            extraction_result(intent_classification_result(
+                intent_name="intent2", probability=0.5),
+                [{"entity": "snips/datetime",
+                  "range": {"end": 16, "start": 8},
+                  "slotName": "time",
+                  "value": "tomorrow"}])
+        ]
+        expected_results = sorted(expected_results,
+                                  key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
         self.assertEqual(expected_results, results)
 
     @patch("snips_nlu.intent_parser.deterministic_intent_parser"
@@ -225,6 +234,30 @@ def test_should_ignore_ambiguous_utterances(self):
         # Then
         self.assertEqual(empty_result(text, 1.0), res)
 
+    def test_should_ignore_subtly_ambiguous_utterances(self):
+        # Given
+        dataset_stream = io.StringIO("""
+---
+type: intent
+name: intent_1
+utterances:
+  - meeting tomorrow
+
+---
+type: intent
+name: intent_2
+utterances:
+  - meeting [time:snips/datetime](today)""")
+        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
+        parser = DeterministicIntentParser().fit(dataset)
+        text = "meeting tomorrow"
+
+        # When
+        res = parser.parse(text)
+
+        # Then
+        self.assertEqual(empty_result(text, 1.0), res)
+
     def test_should_not_parse_when_not_fitted(self):
         # Given
         parser = DeterministicIntentParser()
@@ -565,7 +598,7 @@ def test_should_parse_naughty_strings(self):
 - this is [slot2:entity2](second_entity)""")
         dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
         naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
-        with naughty_strings_path.open(encoding='utf8') as f:
+        with naughty_strings_path.open(encoding="utf8") as f:
             naughty_strings = [line.strip("\n") for line in f.readlines()]
 
         # When
@@ -579,7 +612,7 @@ def test_should_parse_naughty_strings(self):
     def test_should_fit_with_naughty_strings_no_tags(self):
         # Given
         naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
-        with naughty_strings_path.open(encoding='utf8') as f:
+        with naughty_strings_path.open(encoding="utf8") as f:
             naughty_strings = [line.strip("\n") for line in f.readlines()]
 
         utterances = [{DATA: [{TEXT: naughty_string}]} for naughty_string in
@@ -635,13 +668,13 @@ def test_should_fit_and_parse_with_non_ascii_tags(self):
             parsing = parser.parse("string0")
 
             expected_slot = {
-                'entity': 'non_ascìi_entïty',
-                'range': {
+                "entity": "non_ascìi_entïty",
+                "range": {
                     "start": 0,
                     "end": 7
                 },
-                'slotName': u'non_ascìi_slöt',
-                'value': u'string0'
+                "slotName": u"non_ascìi_slöt",
+                "value": u"string0"
             }
             intent_name = parsing[RES_INTENT][RES_INTENT_NAME]
             self.assertEqual("naughty_intent", intent_name)

From 699890f6f50411fe3a390bef2f07d5edcd8bf27c Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrien.ball@snips.ai>
Date: Fri, 22 Mar 2019 18:08:01 +0100
Subject: [PATCH 2/3] Fix linting

---
 snips_nlu/tests/test_deterministic_intent_parser.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py
index 7a0def5f0..0353dca7a 100644
--- a/snips_nlu/tests/test_deterministic_intent_parser.py
+++ b/snips_nlu/tests/test_deterministic_intent_parser.py
@@ -135,15 +135,17 @@ def test_should_parse_top_intents(self):
         results = parser.parse(text, top_n=3)
 
         # Then
+        slot = {
+            "entity": "snips/datetime",
+            "range": {"end": 16, "start": 8},
+            "slotName": "time",
+            "value": "tomorrow"
+        }
         expected_results = [
             extraction_result(intent_classification_result(
                 intent_name="intent1", probability=0.5), []),
             extraction_result(intent_classification_result(
-                intent_name="intent2", probability=0.5),
-                [{"entity": "snips/datetime",
-                  "range": {"end": 16, "start": 8},
-                  "slotName": "time",
-                  "value": "tomorrow"}])
+                intent_name="intent2", probability=0.5), [slot])
         ]
         expected_results = sorted(expected_results,
                                   key=lambda r: r[RES_INTENT][RES_INTENT_NAME])

From f26f53eaff0b1db299864b2b774366d16afa35cd Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrien.ball@snips.ai>
Date: Fri, 22 Mar 2019 19:05:44 +0100
Subject: [PATCH 3/3] Fix stochastic test

---
 snips_nlu/tests/test_deterministic_intent_parser.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py
index 0353dca7a..39750075d 100644
--- a/snips_nlu/tests/test_deterministic_intent_parser.py
+++ b/snips_nlu/tests/test_deterministic_intent_parser.py
@@ -147,8 +147,7 @@ def test_should_parse_top_intents(self):
             extraction_result(intent_classification_result(
                 intent_name="intent2", probability=0.5), [slot])
         ]
-        expected_results = sorted(expected_results,
-                                  key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
+        results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
         self.assertEqual(expected_results, results)
 
     @patch("snips_nlu.intent_parser.deterministic_intent_parser"