Merge pull request #791 from snipsco/task/slots-rescoring

Re-score ambiguous DeterministicIntentParser results based on slots
snipsco · Apr 29, 2019 · d548c62 · d548c62
2 parents b39538e + fe31eda
commit d548c62
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
+## [Unreleased]
+### Changed
+- Re-score ambiguous `DeterministicIntentParser` results based on slots [#791](https://github.com/snipsco/snips-nlu/pull/791)
+
 ## [0.19.6]
 ### Fixed
 - Raise an error when using unknown intents in intents filter [#788](https://github.com/snipsco/snips-nlu/pull/788)

diff --git a/snips_nlu/intent_parser/deterministic_intent_parser.py b/snips_nlu/intent_parser/deterministic_intent_parser.py
@@ -249,16 +249,17 @@ def placeholder_fn(entity_name):
                     results.append(res)
                     break
 
-        confidence_score = 1.
-        if results:
-            confidence_score = 1. / float(len(results))
+        # In some rare cases there can be multiple ambiguous intents
+        # In such cases, priority is given to results containing fewer slots
+        weights = [1.0 / (1.0 + len(res[RES_SLOTS])) for res in results]
+        total_weight = sum(weights)
 
-        results = results[:top_n]
+        for res, weight in zip(results, weights):
+            res[RES_INTENT][RES_PROBA] = weight / total_weight
 
-        for res in results:
-            res[RES_INTENT][RES_PROBA] = confidence_score
+        results = sorted(results, key=lambda r: -r[RES_INTENT][RES_PROBA])
 
-        return results
+        return results[:top_n]
 
     @fitted_required
     def get_intents(self, text):

diff --git a/snips_nlu/tests/test_deterministic_intent_parser.py b/snips_nlu/tests/test_deterministic_intent_parser.py
@@ -119,14 +119,27 @@ def test_should_parse_top_intents(self):
 ---
 type: intent
 name: intent1
+utterances:
+  - meeting [time:snips/datetime](today)
+
+---
+type: intent
+name: intent2
 utterances:
   - meeting tomorrow
   
 ---
 type: intent
-name: intent2
+name: intent3
 utterances:
-  - meeting [time:snips/datetime](today)""")
+  - "[event_type](call) [time:snips/datetime](at 9pm)"
+
+---
+type: entity
+name: event_type
+values:
+  - meeting
+  - feedback session""")
         dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
         parser = DeterministicIntentParser().fit(dataset)
         text = "meeting tomorrow"
@@ -135,19 +148,39 @@ def test_should_parse_top_intents(self):
         results = parser.parse(text, top_n=3)
 
         # Then
-        slot = {
+        time_slot = {
             "entity": "snips/datetime",
             "range": {"end": 16, "start": 8},
             "slotName": "time",
             "value": "tomorrow"
         }
+        event_slot = {
+            "entity": "event_type",
+            "range": {"end": 7, "start": 0},
+            "slotName": "event_type",
+            "value": "meeting"
+        }
+        weight_intent_1 = 1. / 2.
+        weight_intent_2 = 1.
+        weight_intent_3 = 1. / 3.
+        total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3
+        proba_intent2 = weight_intent_2 / total_weight
+        proba_intent1 = weight_intent_1 / total_weight
+        proba_intent3 = weight_intent_3 / total_weight
         expected_results = [
-            extraction_result(intent_classification_result(
-                intent_name="intent1", probability=0.5), []),
-            extraction_result(intent_classification_result(
-                intent_name="intent2", probability=0.5), [slot])
+            extraction_result(
+                intent_classification_result(
+                    intent_name="intent2", probability=proba_intent2),
+                slots=[]),
+            extraction_result(
+                intent_classification_result(
+                    intent_name="intent1", probability=proba_intent1),
+                slots=[time_slot]),
+            extraction_result(
+                intent_classification_result(
+                    intent_name="intent3", probability=proba_intent3),
+                slots=[event_slot, time_slot])
         ]
-        results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
         self.assertEqual(expected_results, results)
 
     @patch("snips_nlu.intent_parser.deterministic_intent_parser"