diff --git a/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py b/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py index 62ff6c5..f3db1d2 100644 --- a/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py +++ b/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py @@ -42,13 +42,14 @@ def tieBreak(self, query, i, j): else: return (raw2,j) - def match(self, query): + def match(self, query, custom_title=None): """ 讀入使用者 query,若語料庫中存在類似的句子,便回傳該句子與標號 Args: - query: 使用者欲查詢的語句 - removeStopWords: 清除 stopwords + - custom_title: 使用者欲比對的問題集 """ ratio = -1 target = "" @@ -60,7 +61,10 @@ def match(self, query): mQuery = "".join(mQuery) title_list = self.segTitles else: - title_list = self.titles + if custom_title is None: + title_list = self.titles + else: + title_list = custom_title mQuery = query for index,title in enumerate(title_list): diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index aad913f..0bbdf90 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -29,35 +29,49 @@ def moduleTest(self): def getResponse(self, sentence, api_key=None): - if api_key is not None: - response = self.getCustomQA(sentence,api_key) - else: + if api_key is None: response = self.getGeneralQA(sentence) + else: + response = self.getCustomQA(sentence,api_key) return response - def getGeneralQA(self,query,threshold=50): + def getGeneralQA(self,query,threshold=0): title,index = self.matcher.match(query) sim = self.matcher.getSimilarity() if sim < threshold: - return None + return None,0 else: res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'), 'r',encoding='utf-8')) targetId = index % 1000 candiates = self.evaluator.getBestResponse(res[targetId],topk=3) reply = self.randomPick(candiates) - return reply + return reply,sim def randomPick(self, answers): + try: answer = answers[random.randrange(0,len(answers))][0] except: answer = None return answer - def getCustomQA(self, sentence, api_key): + def getCustomQA(self, sentence, api_key, threshold=50): #TODO GET USER'S QA BY api_key + #FIXME REPLACE TESTING DATA TO FORMAL ONE(GET BY DATABASE). + #i.e IMPLEMENT getUserQA(api_key) #customqa_list = json.loads(getUserQA(api_key)) - return None + + data = '[{"Question":"你媽長得像魚人","Answers":["你媽也長得像魚人","你比痲瘋地精還臭"]}]' + customqa_list = json.loads(data) + + # Load question to a list. + q_list = [qa["Question"] for qa in customqa_list] + #TODO customized threshold. + title,index = self.matcher.match(sentence,custom_title=q_list) + sim = self.matcher.getSimilarity() + if sim < threshold: + return None,0 + return customqa_list[index]["Answers"][random.randrange(0,len(customqa_list[index]["Answers"]))],sim diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py index 171897b..9aae783 100644 --- a/Chatbot/RuleMatcher/customRuleBase.py +++ b/Chatbot/RuleMatcher/customRuleBase.py @@ -1,8 +1,8 @@ # coding=utf-8 - import json +import random -from .rulebase import RuleBase +from .rulebase import RuleBase,Rule class CustomRuleBase(RuleBase): @@ -12,25 +12,39 @@ class CustomRuleBase(RuleBase): #TODO 客製化的「階段式對話」 - def customMatch(self, sentence, apiKey): + def customMatch(self, sentence, api_key, threshold): """ 比對 sentence 與用戶自定義的規則 Args: - sentence : 用戶輸入 - - apiKey : 該名會員的聊天機器人金鑰 + - api_key : 該名會員的聊天機器人金鑰 + + Return: response, 暫時目標 FIXME + - response : 批配出最適合的主題後,挑選用戶於該主題定義的句子隨機挑一回覆 """ # 清空之前讀入的規則 self.rules.clear() # 重新建構規則表 - customRules = self.getCustomDomainRules(apiKey) - customRules = json.loads(customRules) - self.buildCustomRules(customRules) + custom_rules = self.getCustomDomainRules(api_key) + custom_rules = json.loads(custom_rules) + self.buildCustomRules(custom_rules) # 進行比對 - return self.match(sentence, threshold=customThreshold, root=apiKey) + result_list,path = self.match(sentence, threshold=0.4, root=api_key) + + # 確認最佳回應的有效性 + if result_list[0][0] < threshold: + return None + + # 取出最佳主題的自訂回覆集, 並隨機挑一句回覆 + best_domain = result_list[0][1] + target_rule = self.rules[best_domain] + res_num = target_rule.has_response() + + return target_rule.response[random.randrange(0,res_num)] def buildCustomRules(self, rules): @@ -52,9 +66,8 @@ def buildCustomRules(self, rules): if domain not in self.rules: rule = Rule(domain, concepts_list, children_list, response, self.model) self.rules[domain] = rule - if is_root: - self.forest_base_roots.append(rule) else: + #TODO Block invalided rule type on front end. print("[Rules]: Detect a duplicate domain name '%s'." % domain) def getCustomDomainRules(self, key): @@ -62,4 +75,8 @@ def getCustomDomainRules(self, key): 依照 apiKey 取得該用戶的規則集 """ #TODO - return None + #FIXME 採用正規方式驗證 + + data = '[{"domain": "TESTING","response": ["這是個測試客製化規則的回覆1","這是個測試客製化規則的回覆2"],"concepts": ["測試"],"children": []}]' + + return data diff --git a/Chatbot/RuleMatcher/rule/rule.json b/Chatbot/RuleMatcher/rule/rule.json index 8d9952a..99017c4 100644 --- a/Chatbot/RuleMatcher/rule/rule.json +++ b/Chatbot/RuleMatcher/rule/rule.json @@ -44,7 +44,7 @@ ], "concepts": [ - "吃","餓","喝","好玩","逛" + "吃喝玩樂","逛街" ], "children": [ "吃", @@ -172,7 +172,7 @@ "domain": "病症", "response": [], "concepts": [ - "病症","症狀","病徵","嘔吐" + "病症","症狀","病徵" ], "children": [ "過敏", diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 180de79..eb470e8 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -9,7 +9,7 @@ class Chatbot(object): - def __init__(self, name="NCKU"): + def __init__(self, name="MianBot"): self.name = name # The name of chatbot. self.speech = '' # The lastest user's input @@ -52,7 +52,7 @@ def waiting_loop(self): res = self.listen(speech) print(res[0]) - def listen(self, sentence, target=None, api_key=None): + def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80): """ listen function is to encapsulate the following getResponse methods: @@ -90,8 +90,16 @@ def listen(self, sentence, target=None, api_key=None): # 區隔 custom rule 與 root rule 匹配的原因是 custom rule 並不支援多段式應答 # 若後續在模組上進行更動,可考慮將兩者合併,透過辨識 api_key 的有無來修改操作 + # First of all, + # Assume this sentence is for qa, but use a very high threshold. + # FIXME Remove api_key TESTING VALUE + qa_response, qa_sim = self.getResponseForQA(sentence,"TESTING",qa_threshold) + if qa_sim > qa_block_threshold: + return qa_response,None,None,None,None + # matching on custom rules. - response = self.getResponseOnCustomDomain(sentence, api_key) + # FIXME Remove api_key TESTING VALUE + response = self.getResponseOnCustomDomain(sentence, api_key="TESTING") if response is not None: return response,None,None,None @@ -104,14 +112,11 @@ def listen(self, sentence, target=None, api_key=None): return response,stauts,target,candiates # The result based on custom rules and general rules are not confident. - # Assume that there are no intent in the sentence, do query matching for - # question answering. + # Assume that there are no intent in the sentence, consider this questions + # is qa again, but this time use a smaller threshold. else: - response = self.getResponseForCustomQA(sentence,api_key) - if response is None: - response = self.getResponseForGeneralQA(sentence) - if response is not None: - return response,None,None,None + if qa_sim > 60: + return qa_response,None,None,None else: # This query has too low similarity for all matching methods. # We can only send back a default response. @@ -142,7 +147,7 @@ def getResponseOnRootDomains(self, target=None): status = None response = None - handler = self.get_task_handler() + handler = self._get_task_handler() try: status,response = handler.get_response(self.speech, self.speech_domain, target) @@ -166,29 +171,55 @@ def getResponseOnRootDomains(self, target=None): handler.debug(self.extract_attr_log) return [response,status,target,candiates] - def getResponseOnCustomDomain(self, sentence, api_key): + def getResponseOnCustomDomain(self, sentence, api_key, threshold=.4): """ Fetch user's custom rules by api_key and then match the sentence with custom rules. Args: - sentence: user's raw input. (not segmented) - - api_key + - api_key : a string to recognize the user and get rules defined by him/she. + - threshold : a value between 0 to 1, to block the response which + has a similarity lower than threshold. """ if api_key is None: return None - else: - #TODO 根據 api_key 調適 self.custom_rulebase - pass + + #TODO 調適為能夠進行「多段式對話」 + return self.custom_rulebase.customMatch(sentence, api_key, threshold) + + def getResponseForQA(self, sentence, api_key, threshold): + """ + Encapsulate getResponseForGeneralQA, getResponseForCustomQA + + Return: + - response, similarity + if the similarity < threshold will return None,0. + """ + + #FIXME Remove this flag when all have done. + if self.github_qa_unupdated: + return None, 0 + + cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) + if cqa_sim > threshold: + return cqa_response,cqa_sim + gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) + if gqa_sim > threshold: + return gqa_response,gqa_sim + return None,0 def getResponseForGeneralQA(self, sentence): """ Listen user's input and return a response which is based on our knowledge base. + + Return: + answer, similarity """ if self.github_qa_unupdated: - return None + return None, 0 return self.answerer.getResponse(sentence) @@ -197,12 +228,13 @@ def getResponseForCustomQA(self,sentence,api_key): """ Listen user's input and return a response which is based on a cutsom knowledge base. - """ - if self.github_qa_unupdated: - return None + Return: + answer, similarity + """ if api_key is None: - return None + return None, 0 + return self.answerer.getResponse(sentence,api_key) def getLoggerData(self): @@ -261,7 +293,7 @@ def _set_root_domain(self): else: self.root_domain = self.last_path.split('>')[0] - def get_task_handler(self, domain=None): + def _get_task_handler(self, domain=None): """ Get the instance of task handler based on the given domain.