Merge pull request zake7749#20 from zake7749/custom_rules_dev

Custom rules dev
wuyunxiangwyx · Nov 12, 2016 · 58fc061 · 58fc061
2 parents 7ee01c5 + ff39bc7
commit 58fc061
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 45 deletions.
diff --git a/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py b/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py
@@ -42,13 +42,14 @@ def tieBreak(self, query, i, j):
         else:
             return (raw2,j)
 
-    def match(self, query):
+    def match(self, query, custom_title=None):
         """
         讀入使用者 query，若語料庫中存在類似的句子，便回傳該句子與標號
 
         Args:
             - query: 使用者欲查詢的語句
             - removeStopWords: 清除 stopwords
+            - custom_title: 使用者欲比對的問題集
         """
         ratio  = -1
         target = ""
@@ -60,7 +61,10 @@ def match(self, query):
             mQuery = "".join(mQuery)
             title_list = self.segTitles
         else:
-            title_list = self.titles
+            if custom_title is None:
+                title_list = self.titles
+            else:
+                title_list = custom_title
             mQuery = query
 
         for index,title in enumerate(title_list):

diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py
@@ -29,35 +29,49 @@ def moduleTest(self):
 
     def getResponse(self, sentence, api_key=None):
 
-        if api_key is not None:
-            response = self.getCustomQA(sentence,api_key)
-        else:
+        if api_key is None:
             response = self.getGeneralQA(sentence)
+        else:
+            response = self.getCustomQA(sentence,api_key)
         return response
 
-    def getGeneralQA(self,query,threshold=50):
+    def getGeneralQA(self,query,threshold=0):
 
         title,index = self.matcher.match(query)
         sim = self.matcher.getSimilarity()
         if sim < threshold:
-            return None
+            return None,0
         else:
             res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'),
                             'r',encoding='utf-8'))
             targetId = index % 1000
             candiates = self.evaluator.getBestResponse(res[targetId],topk=3)
             reply = self.randomPick(candiates)
-            return reply
+            return reply,sim
 
     def randomPick(self, answers):
+
         try:
             answer = answers[random.randrange(0,len(answers))][0]
         except:
             answer = None
         return answer
 
-    def getCustomQA(self, sentence, api_key):
+    def getCustomQA(self, sentence, api_key, threshold=50):
 
         #TODO GET USER'S QA BY api_key
+        #FIXME REPLACE TESTING DATA TO FORMAL ONE(GET BY DATABASE).
+        #i.e IMPLEMENT getUserQA(api_key)
         #customqa_list = json.loads(getUserQA(api_key))
-        return None
+
+        data = '[{"Question":"你媽長得像魚人","Answers":["你媽也長得像魚人","你比痲瘋地精還臭"]}]'
+        customqa_list = json.loads(data)
+
+        # Load question to a list.
+        q_list = [qa["Question"] for qa in customqa_list]
+        #TODO  customized threshold.
+        title,index = self.matcher.match(sentence,custom_title=q_list)
+        sim = self.matcher.getSimilarity()
+        if sim < threshold:
+            return None,0
+        return customqa_list[index]["Answers"][random.randrange(0,len(customqa_list[index]["Answers"]))],sim
diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py
@@ -1,8 +1,8 @@
 # coding=utf-8
-
 import json
+import random
 
-from .rulebase import RuleBase
+from .rulebase import RuleBase,Rule
 
 class CustomRuleBase(RuleBase):
 
@@ -12,25 +12,39 @@ class CustomRuleBase(RuleBase):
 
     #TODO 客製化的「階段式對話」
 
-    def customMatch(self, sentence, apiKey):
+    def customMatch(self, sentence, api_key, threshold):
 
         """
         比對 sentence 與用戶自定義的規則
 
         Args:
             - sentence : 用戶輸入
-            - apiKey   : 該名會員的聊天機器人金鑰
+            - api_key   : 該名會員的聊天機器人金鑰
+
+        Return: response, 暫時目標 FIXME
+            - response : 批配出最適合的主題後，挑選用戶於該主題定義的句子隨機挑一回覆
         """
         # 清空之前讀入的規則
         self.rules.clear()
 
         # 重新建構規則表
-        customRules = self.getCustomDomainRules(apiKey)
-        customRules = json.loads(customRules)
-        self.buildCustomRules(customRules)
+        custom_rules = self.getCustomDomainRules(api_key)
+        custom_rules = json.loads(custom_rules)
+        self.buildCustomRules(custom_rules)
 
         # 進行比對
-        return self.match(sentence, threshold=customThreshold, root=apiKey)
+        result_list,path = self.match(sentence, threshold=0.4, root=api_key)
+
+        # 確認最佳回應的有效性
+        if result_list[0][0] < threshold:
+            return None
+
+        # 取出最佳主題的自訂回覆集, 並隨機挑一句回覆
+        best_domain = result_list[0][1]
+        target_rule = self.rules[best_domain]
+        res_num = target_rule.has_response()
+
+        return target_rule.response[random.randrange(0,res_num)]
 
     def buildCustomRules(self, rules):
 
@@ -52,14 +66,17 @@ def buildCustomRules(self, rules):
             if domain not in self.rules:
                 rule = Rule(domain, concepts_list, children_list, response, self.model)
                 self.rules[domain] = rule
-                if is_root:
-                    self.forest_base_roots.append(rule)
             else:
+                #TODO Block invalided rule type on front end.
                 print("[Rules]: Detect a duplicate domain name '%s'." % domain)
 
     def getCustomDomainRules(self, key):
         """
         依照 apiKey 取得該用戶的規則集
         """
         #TODO
-        return None
+        #FIXME 採用正規方式驗證
+
+        data = '[{"domain": "TESTING","response": ["這是個測試客製化規則的回覆1","這是個測試客製化規則的回覆2"],"concepts": ["測試"],"children": []}]'
+
+        return data
diff --git a/Chatbot/RuleMatcher/rule/rule.json b/Chatbot/RuleMatcher/rule/rule.json
@@ -44,7 +44,7 @@
 
   		],
   		"concepts": [
-  			"吃","餓","喝","好玩","逛"
+  			"吃喝玩樂","逛街"
   		],
   		"children": [
   			"吃",
@@ -172,7 +172,7 @@
         "domain": "病症",
         "response": [],
         "concepts": [
-            "病症","症狀","病徵","嘔吐"
+            "病症","症狀","病徵"
         ],
         "children": [
             "過敏",

diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py
@@ -9,7 +9,7 @@
 
 class Chatbot(object):
 
-    def __init__(self, name="NCKU"):
+    def __init__(self, name="MianBot"):
         self.name = name             # The name of chatbot.
 
         self.speech = ''             # The lastest user's input
@@ -52,7 +52,7 @@ def waiting_loop(self):
             res = self.listen(speech)
             print(res[0])
 
-    def listen(self, sentence, target=None, api_key=None):
+    def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80):
 
         """
         listen function is to encapsulate the following getResponse methods:
@@ -90,8 +90,16 @@ def listen(self, sentence, target=None, api_key=None):
         # 區隔 custom rule 與 root rule 匹配的原因是 custom rule 並不支援多段式應答
         # 若後續在模組上進行更動，可考慮將兩者合併，透過辨識 api_key 的有無來修改操作
 
+        # First of all,
+        # Assume this sentence is for qa, but use a very high threshold.
+        # FIXME Remove api_key TESTING VALUE
+        qa_response, qa_sim = self.getResponseForQA(sentence,"TESTING",qa_threshold)
+        if qa_sim > qa_block_threshold:
+            return qa_response,None,None,None,None
+
         # matching on custom rules.
-        response = self.getResponseOnCustomDomain(sentence, api_key)
+        # FIXME Remove api_key TESTING VALUE
+        response = self.getResponseOnCustomDomain(sentence, api_key="TESTING")
         if response is not None:
             return response,None,None,None
 
@@ -104,14 +112,11 @@ def listen(self, sentence, target=None, api_key=None):
             return response,stauts,target,candiates
 
         # The result based on custom rules and general rules are not confident.
-        # Assume that there are no intent in the sentence, do query matching for
-        # question answering.
+        # Assume that there are no intent in the sentence, consider this questions
+        # is qa again, but this time use a smaller threshold.
         else:
-            response = self.getResponseForCustomQA(sentence,api_key)
-            if response is None:
-                response = self.getResponseForGeneralQA(sentence)
-            if response is not None:
-                return response,None,None,None
+            if qa_sim > 60:
+                return qa_response,None,None,None
             else:
                 # This query has too low similarity for all matching methods.
                 # We can only send back a default response.
@@ -142,7 +147,7 @@ def getResponseOnRootDomains(self, target=None):
         status   = None
         response = None
 
-        handler = self.get_task_handler()
+        handler = self._get_task_handler()
 
         try:
             status,response = handler.get_response(self.speech, self.speech_domain, target)
@@ -166,29 +171,55 @@ def getResponseOnRootDomains(self, target=None):
             handler.debug(self.extract_attr_log)
             return [response,status,target,candiates]
 
-    def getResponseOnCustomDomain(self, sentence, api_key):
+    def getResponseOnCustomDomain(self, sentence, api_key, threshold=.4):
         """
         Fetch user's custom rules by api_key and then match the sentence with
         custom rules.
 
         Args:
             - sentence: user's raw input. (not segmented)
-            - api_key
+            - api_key : a string to recognize the user and get rules defined by him/she.
+            - threshold : a value between 0 to 1, to block the response which
+              has a similarity lower than threshold.
         """
         if api_key is None:
             return None
-        else:
-            #TODO 根據 api_key 調適 self.custom_rulebase
-            pass
+
+        #TODO 調適為能夠進行「多段式對話」
+        return self.custom_rulebase.customMatch(sentence, api_key, threshold)
+
+    def getResponseForQA(self, sentence, api_key, threshold):
+        """
+        Encapsulate getResponseForGeneralQA, getResponseForCustomQA
+
+        Return:
+            - response, similarity
+            if the similarity < threshold will return None,0.
+        """
+
+        #FIXME Remove this flag when all have done.
+        if self.github_qa_unupdated:
+            return None, 0
+
+        cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key)
+        if cqa_sim > threshold:
+            return cqa_response,cqa_sim
+        gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence)
+        if gqa_sim > threshold:
+            return gqa_response,gqa_sim
+        return None,0
 
     def getResponseForGeneralQA(self, sentence):
 
         """
         Listen user's input and return a response which is based on our
         knowledge base.
+
+        Return:
+            answer, similarity
         """
         if self.github_qa_unupdated:
-            return None
+            return None, 0
 
         return self.answerer.getResponse(sentence)
 
@@ -197,12 +228,13 @@ def getResponseForCustomQA(self,sentence,api_key):
         """
         Listen user's input and return a response which is based on a cutsom
         knowledge base.
-        """
-        if self.github_qa_unupdated:
-            return None
 
+        Return:
+            answer, similarity
+        """
         if api_key is None:
-            return None
+            return None, 0
+
         return self.answerer.getResponse(sentence,api_key)
 
     def getLoggerData(self):
@@ -261,7 +293,7 @@ def _set_root_domain(self):
         else:
             self.root_domain = self.last_path.split('>')[0]
 
-    def get_task_handler(self, domain=None):
+    def _get_task_handler(self, domain=None):
 
         """
         Get the instance of task handler based on the given domain.