From 45daf7c4c642843ebb135df6168bde69db73c5a0 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Wed, 9 Nov 2016 23:43:42 +0800 Subject: [PATCH 01/12] Change workflow. --- Chatbot/QuestionAnswering/qaBase.py | 8 +++--- Chatbot/chatbot.py | 38 ++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index aad913f..894cafd 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -40,14 +40,14 @@ def getGeneralQA(self,query,threshold=50): title,index = self.matcher.match(query) sim = self.matcher.getSimilarity() if sim < threshold: - return None + return None,0 else: res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'), 'r',encoding='utf-8')) targetId = index % 1000 candiates = self.evaluator.getBestResponse(res[targetId],topk=3) reply = self.randomPick(candiates) - return reply + return reply,sim def randomPick(self, answers): try: @@ -56,8 +56,8 @@ def randomPick(self, answers): answer = None return answer - def getCustomQA(self, sentence, api_key): + def getCustomQA(self, sentence, api_key, threshold=50): #TODO GET USER'S QA BY api_key #customqa_list = json.loads(getUserQA(api_key)) - return None + return None,0 diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 180de79..f7934fb 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -33,7 +33,7 @@ def __init__(self, name="NCKU"): self.custom_rulebase.model = self.console.rb.model # pass word2vec model # For QA - self.github_qa_unupdated = True + self.github_qa_unupdated = False if not self.github_qa_unupdated: self.answerer = qa.Answerer() @@ -52,7 +52,7 @@ def waiting_loop(self): res = self.listen(speech) print(res[0]) - def listen(self, sentence, target=None, api_key=None): + def listen(self, sentence, target=None, api_key=None, qa_block_threshold=80): """ listen function is to encapsulate the following getResponse methods: @@ -90,6 +90,15 @@ def listen(self, sentence, target=None, api_key=None): # 區隔 custom rule 與 root rule 匹配的原因是 custom rule 並不支援多段式應答 # 若後續在模組上進行更動,可考慮將兩者合併,透過辨識 api_key 的有無來修改操作 + # First of all, + # Assume this sentence is for qa, but use a very high threshold. + cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) + if cqa_sim > qa_block_threshold: + return cus_response,None,None,None + gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) + if gqa_sim > qa_block_threshold: + return gqa_response,None,None,None + # matching on custom rules. response = self.getResponseOnCustomDomain(sentence, api_key) if response is not None: @@ -104,14 +113,13 @@ def listen(self, sentence, target=None, api_key=None): return response,stauts,target,candiates # The result based on custom rules and general rules are not confident. - # Assume that there are no intent in the sentence, do query matching for - # question answering. + # Assume that there are no intent in the sentence, consider this questions + # is qa again, but this time use a smaller threshold. else: - response = self.getResponseForCustomQA(sentence,api_key) - if response is None: - response = self.getResponseForGeneralQA(sentence) - if response is not None: - return response,None,None,None + if cqa_sim > 50: + return cus_response,None,None,None + elif gqa_sim > 50: + return gqa_response,None,None,None else: # This query has too low similarity for all matching methods. # We can only send back a default response. @@ -186,9 +194,12 @@ def getResponseForGeneralQA(self, sentence): """ Listen user's input and return a response which is based on our knowledge base. + + Return: + answer, similarity """ if self.github_qa_unupdated: - return None + return None, 0 return self.answerer.getResponse(sentence) @@ -197,12 +208,15 @@ def getResponseForCustomQA(self,sentence,api_key): """ Listen user's input and return a response which is based on a cutsom knowledge base. + + Return: + answer, similarity """ if self.github_qa_unupdated: - return None + return None, 0 if api_key is None: - return None + return None, 0 return self.answerer.getResponse(sentence,api_key) def getLoggerData(self): From aa7b662e7ac2c7a1c1854f27fd9775ca8ed3e1ce Mon Sep 17 00:00:00 2001 From: F74026284 Date: Wed, 9 Nov 2016 23:55:45 +0800 Subject: [PATCH 02/12] Modify QA matching method. * Remove threshold in Answer. * Enclaspate the two getQAResponse method to one. * Only return the best result. --- Chatbot/QuestionAnswering/qaBase.py | 2 +- Chatbot/chatbot.py | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index 894cafd..8ac5e88 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -35,7 +35,7 @@ def getResponse(self, sentence, api_key=None): response = self.getGeneralQA(sentence) return response - def getGeneralQA(self,query,threshold=50): + def getGeneralQA(self,query,threshold=0): title,index = self.matcher.match(query) sim = self.matcher.getSimilarity() diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index f7934fb..c3847fb 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -52,7 +52,7 @@ def waiting_loop(self): res = self.listen(speech) print(res[0]) - def listen(self, sentence, target=None, api_key=None, qa_block_threshold=80): + def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80): """ listen function is to encapsulate the following getResponse methods: @@ -92,12 +92,7 @@ def listen(self, sentence, target=None, api_key=None, qa_block_threshold=80): # First of all, # Assume this sentence is for qa, but use a very high threshold. - cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) - if cqa_sim > qa_block_threshold: - return cus_response,None,None,None - gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) - if gqa_sim > qa_block_threshold: - return gqa_response,None,None,None + # matching on custom rules. response = self.getResponseOnCustomDomain(sentence, api_key) @@ -189,6 +184,21 @@ def getResponseOnCustomDomain(self, sentence, api_key): #TODO 根據 api_key 調適 self.custom_rulebase pass + def getResponseForQA(self, sentence, api_key, threshold): + """ + Encapsulate getResponseForGeneralQA, getResponseForCustomQA + """ + cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) + if cqa_sim > threshold: + return cus_response,cqa_sim + + gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) + if gqa_sim > threshold: + return gqa_response,gqa_sim + elif: + return None,0 + + def getResponseForGeneralQA(self, sentence): """ From 3d771291140577b33ae4289e0ef5ad042b58aeef Mon Sep 17 00:00:00 2001 From: F74026284 Date: Thu, 10 Nov 2016 00:40:19 +0800 Subject: [PATCH 03/12] Update workflow of listen. --- Chatbot/chatbot.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index c3847fb..4247528 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -52,7 +52,7 @@ def waiting_loop(self): res = self.listen(speech) print(res[0]) - def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80): + def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=75): """ listen function is to encapsulate the following getResponse methods: @@ -92,7 +92,9 @@ def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_ # First of all, # Assume this sentence is for qa, but use a very high threshold. - + qa_response, qa_sim = self.getResponseForQA(sentence,api_key,qa_threshold) + if qa_sim > qa_block_threshold: + return qa_response,None,None,None,None # matching on custom rules. response = self.getResponseOnCustomDomain(sentence, api_key) @@ -111,10 +113,8 @@ def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_ # Assume that there are no intent in the sentence, consider this questions # is qa again, but this time use a smaller threshold. else: - if cqa_sim > 50: - return cus_response,None,None,None - elif gqa_sim > 50: - return gqa_response,None,None,None + if qa_sim > 60: + return qa_response,None,None,None else: # This query has too low similarity for all matching methods. # We can only send back a default response. @@ -187,17 +187,18 @@ def getResponseOnCustomDomain(self, sentence, api_key): def getResponseForQA(self, sentence, api_key, threshold): """ Encapsulate getResponseForGeneralQA, getResponseForCustomQA + + Return: + - response, similarity + if the similarity < threshold will return None,0. """ cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) if cqa_sim > threshold: return cus_response,cqa_sim - gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) if gqa_sim > threshold: return gqa_response,gqa_sim - elif: - return None,0 - + return None,0 def getResponseForGeneralQA(self, sentence): From d52134cd3442b4651017f3f87e1207bc60dd0799 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Thu, 10 Nov 2016 23:43:20 +0800 Subject: [PATCH 04/12] Clean rules's concept. --- Chatbot/RuleMatcher/rule/rule.json | 4 ++-- Chatbot/chatbot.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Chatbot/RuleMatcher/rule/rule.json b/Chatbot/RuleMatcher/rule/rule.json index 8d9952a..99017c4 100644 --- a/Chatbot/RuleMatcher/rule/rule.json +++ b/Chatbot/RuleMatcher/rule/rule.json @@ -44,7 +44,7 @@ ], "concepts": [ - "吃","餓","喝","好玩","逛" + "吃喝玩樂","逛街" ], "children": [ "吃", @@ -172,7 +172,7 @@ "domain": "病症", "response": [], "concepts": [ - "病症","症狀","病徵","嘔吐" + "病症","症狀","病徵" ], "children": [ "過敏", diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 4247528..247887e 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -9,7 +9,7 @@ class Chatbot(object): - def __init__(self, name="NCKU"): + def __init__(self, name="MianBot"): self.name = name # The name of chatbot. self.speech = '' # The lastest user's input @@ -52,7 +52,7 @@ def waiting_loop(self): res = self.listen(speech) print(res[0]) - def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=75): + def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80): """ listen function is to encapsulate the following getResponse methods: @@ -192,6 +192,11 @@ def getResponseForQA(self, sentence, api_key, threshold): - response, similarity if the similarity < threshold will return None,0. """ + + #FIXME Remove this flag when all have done. + if self.github_qa_unupdated: + return None, 0 + cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) if cqa_sim > threshold: return cus_response,cqa_sim @@ -223,11 +228,9 @@ def getResponseForCustomQA(self,sentence,api_key): Return: answer, similarity """ - if self.github_qa_unupdated: - return None, 0 - if api_key is None: return None, 0 + return self.answerer.getResponse(sentence,api_key) def getLoggerData(self): From 4dfd6b7c18888433aa60afbc9049587435098b74 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Thu, 10 Nov 2016 23:58:52 +0800 Subject: [PATCH 05/12] Add basic features of custom rule matching. --- Chatbot/RuleMatcher/customRuleBase.py | 12 ++++++++++-- Chatbot/chatbot.py | 6 +++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py index 171897b..7f3f271 100644 --- a/Chatbot/RuleMatcher/customRuleBase.py +++ b/Chatbot/RuleMatcher/customRuleBase.py @@ -1,6 +1,6 @@ # coding=utf-8 - import json +import random from .rulebase import RuleBase @@ -20,6 +20,9 @@ def customMatch(self, sentence, apiKey): Args: - sentence : 用戶輸入 - apiKey : 該名會員的聊天機器人金鑰 + + Return: response, 暫時目標 FIXME + - response : 批配出最適合的主題後,挑選用戶於該主題定義的句子隨機挑一回覆 """ # 清空之前讀入的規則 self.rules.clear() @@ -30,7 +33,12 @@ def customMatch(self, sentence, apiKey): self.buildCustomRules(customRules) # 進行比對 - return self.match(sentence, threshold=customThreshold, root=apiKey) + result_list,path = self.match(sentence, threshold=0.4, root=apiKey) + + # 取出最佳主題的自訂回覆集, 並隨機挑一句回覆 + bestResult = customRules[result_list[0]] + return bestResult["response"][random.randrange(0,len(bestResult["response"]))] + def buildCustomRules(self, rules): diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 247887e..6638cff 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -180,9 +180,9 @@ def getResponseOnCustomDomain(self, sentence, api_key): """ if api_key is None: return None - else: - #TODO 根據 api_key 調適 self.custom_rulebase - pass + + #TODO 調適為能夠進行「多段式對話」 + return customMatch(sentence, api_key) def getResponseForQA(self, sentence, api_key, threshold): """ From 33f5e320cf8b11100d3f50f0908b8aed6c9e1d43 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Fri, 11 Nov 2016 23:03:01 +0800 Subject: [PATCH 06/12] Add the offline-testing for custom rules matching. --- Chatbot/RuleMatcher/customRuleBase.py | 9 ++++++--- Chatbot/chatbot.py | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py index 7f3f271..e0b33b9 100644 --- a/Chatbot/RuleMatcher/customRuleBase.py +++ b/Chatbot/RuleMatcher/customRuleBase.py @@ -60,9 +60,8 @@ def buildCustomRules(self, rules): if domain not in self.rules: rule = Rule(domain, concepts_list, children_list, response, self.model) self.rules[domain] = rule - if is_root: - self.forest_base_roots.append(rule) else: + #TODO Block invalided rule type on front end. print("[Rules]: Detect a duplicate domain name '%s'." % domain) def getCustomDomainRules(self, key): @@ -70,4 +69,8 @@ def getCustomDomainRules(self, key): 依照 apiKey 取得該用戶的規則集 """ #TODO - return None + #FIXME 採用正規方式驗證 + + data = '[{"domain": "TESTING","response": ["這是個測試客製化規則的回覆1","這是個測試客製化規則的回覆2"],"concepts": ["測試"],"children": []}]' + + return data diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 6638cff..e66a161 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -97,7 +97,8 @@ def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_ return qa_response,None,None,None,None # matching on custom rules. - response = self.getResponseOnCustomDomain(sentence, api_key) + # FIXME Remove api_key TESTING VALUE + response = self.getResponseOnCustomDomain(sentence, api_key="TESTING") if response is not None: return response,None,None,None @@ -182,7 +183,7 @@ def getResponseOnCustomDomain(self, sentence, api_key): return None #TODO 調適為能夠進行「多段式對話」 - return customMatch(sentence, api_key) + return self.custom_rulebase.customMatch(sentence, api_key) def getResponseForQA(self, sentence, api_key, threshold): """ From 583997f06492ed657bd2f44157d6470d76f1fad9 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Fri, 11 Nov 2016 23:18:55 +0800 Subject: [PATCH 07/12] Complete offline custom rule match testing. --- Chatbot/RuleMatcher/customRuleBase.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py index e0b33b9..f9fee02 100644 --- a/Chatbot/RuleMatcher/customRuleBase.py +++ b/Chatbot/RuleMatcher/customRuleBase.py @@ -2,7 +2,7 @@ import json import random -from .rulebase import RuleBase +from .rulebase import RuleBase,Rule class CustomRuleBase(RuleBase): @@ -12,14 +12,14 @@ class CustomRuleBase(RuleBase): #TODO 客製化的「階段式對話」 - def customMatch(self, sentence, apiKey): + def customMatch(self, sentence, api_key): """ 比對 sentence 與用戶自定義的規則 Args: - sentence : 用戶輸入 - - apiKey : 該名會員的聊天機器人金鑰 + - api_key : 該名會員的聊天機器人金鑰 Return: response, 暫時目標 FIXME - response : 批配出最適合的主題後,挑選用戶於該主題定義的句子隨機挑一回覆 @@ -28,17 +28,19 @@ def customMatch(self, sentence, apiKey): self.rules.clear() # 重新建構規則表 - customRules = self.getCustomDomainRules(apiKey) - customRules = json.loads(customRules) - self.buildCustomRules(customRules) + custom_rules = self.getCustomDomainRules(api_key) + custom_rules = json.loads(custom_rules) + self.buildCustomRules(custom_rules) # 進行比對 - result_list,path = self.match(sentence, threshold=0.4, root=apiKey) + result_list,path = self.match(sentence, threshold=0.4, root=api_key) # 取出最佳主題的自訂回覆集, 並隨機挑一句回覆 - bestResult = customRules[result_list[0]] - return bestResult["response"][random.randrange(0,len(bestResult["response"]))] + best_domain = result_list[0][1] + target_rule = self.rules[best_domain] + res_num = target_rule.has_response() + return target_rule.response[random.randrange(0,res_num)] def buildCustomRules(self, rules): From 53b10f42ee775d3f4e3ccd2ec0efa1d8c3b726c5 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Fri, 11 Nov 2016 23:29:39 +0800 Subject: [PATCH 08/12] Add a threshold for custom rule matching. * TODO: Maybe the threshold is able to define by user ? --- Chatbot/RuleMatcher/customRuleBase.py | 6 +++++- Chatbot/chatbot.py | 12 +++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Chatbot/RuleMatcher/customRuleBase.py b/Chatbot/RuleMatcher/customRuleBase.py index f9fee02..9aae783 100644 --- a/Chatbot/RuleMatcher/customRuleBase.py +++ b/Chatbot/RuleMatcher/customRuleBase.py @@ -12,7 +12,7 @@ class CustomRuleBase(RuleBase): #TODO 客製化的「階段式對話」 - def customMatch(self, sentence, api_key): + def customMatch(self, sentence, api_key, threshold): """ 比對 sentence 與用戶自定義的規則 @@ -35,6 +35,10 @@ def customMatch(self, sentence, api_key): # 進行比對 result_list,path = self.match(sentence, threshold=0.4, root=api_key) + # 確認最佳回應的有效性 + if result_list[0][0] < threshold: + return None + # 取出最佳主題的自訂回覆集, 並隨機挑一句回覆 best_domain = result_list[0][1] target_rule = self.rules[best_domain] diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index e66a161..2ad9f0f 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -146,7 +146,7 @@ def getResponseOnRootDomains(self, target=None): status = None response = None - handler = self.get_task_handler() + handler = self._get_task_handler() try: status,response = handler.get_response(self.speech, self.speech_domain, target) @@ -170,20 +170,22 @@ def getResponseOnRootDomains(self, target=None): handler.debug(self.extract_attr_log) return [response,status,target,candiates] - def getResponseOnCustomDomain(self, sentence, api_key): + def getResponseOnCustomDomain(self, sentence, api_key, threshold=.4): """ Fetch user's custom rules by api_key and then match the sentence with custom rules. Args: - sentence: user's raw input. (not segmented) - - api_key + - api_key : a string to recognize the user and get rules defined by him/she. + - threshold : a value between 0 to 1, to block the response which + has a similarity lower than threshold. """ if api_key is None: return None #TODO 調適為能夠進行「多段式對話」 - return self.custom_rulebase.customMatch(sentence, api_key) + return self.custom_rulebase.customMatch(sentence, api_key, threshold) def getResponseForQA(self, sentence, api_key, threshold): """ @@ -290,7 +292,7 @@ def _set_root_domain(self): else: self.root_domain = self.last_path.split('>')[0] - def get_task_handler(self, domain=None): + def _get_task_handler(self, domain=None): """ Get the instance of task handler based on the given domain. From f997e7060d8f78aa2776fd51608f595f4b8ee2d4 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Sat, 12 Nov 2016 23:11:28 +0800 Subject: [PATCH 09/12] Add testing cqa module. --- Chatbot/QuestionAnswering/qaBase.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index 8ac5e88..1c341b2 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -29,10 +29,10 @@ def moduleTest(self): def getResponse(self, sentence, api_key=None): - if api_key is not None: - response = self.getCustomQA(sentence,api_key) - else: + if api_key is None: response = self.getGeneralQA(sentence) + else: + response = self.getCustomQA(sentence,api_key) return response def getGeneralQA(self,query,threshold=0): @@ -59,5 +59,11 @@ def randomPick(self, answers): def getCustomQA(self, sentence, api_key, threshold=50): #TODO GET USER'S QA BY api_key + #FIXME REPLACE TESTING DATA TO FORMAL ONE(GET BY DATABASE). + #i.e IMPLEMENT getUserQA(api_key) #customqa_list = json.loads(getUserQA(api_key)) + + data = '[{"Question":"你媽長得像魚人","Answers":["你媽也長得像魚人","你比痲瘋地精還臭"]}]' + customqa_list = json.load(data) + return None,0 From 68b2ee2f9ff80cb158fc13be8639126662ae48a1 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Sat, 12 Nov 2016 23:15:01 +0800 Subject: [PATCH 10/12] Add custom question list for match() in FuzzyMatcher. --- Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py b/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py index 62ff6c5..f3db1d2 100644 --- a/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py +++ b/Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py @@ -42,13 +42,14 @@ def tieBreak(self, query, i, j): else: return (raw2,j) - def match(self, query): + def match(self, query, custom_title=None): """ 讀入使用者 query,若語料庫中存在類似的句子,便回傳該句子與標號 Args: - query: 使用者欲查詢的語句 - removeStopWords: 清除 stopwords + - custom_title: 使用者欲比對的問題集 """ ratio = -1 target = "" @@ -60,7 +61,10 @@ def match(self, query): mQuery = "".join(mQuery) title_list = self.segTitles else: - title_list = self.titles + if custom_title is None: + title_list = self.titles + else: + title_list = custom_title mQuery = query for index,title in enumerate(title_list): From 8d0f326e3c570c61629a0a512750af147a915967 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Sat, 12 Nov 2016 23:43:29 +0800 Subject: [PATCH 11/12] Complete offline custom qa module. --- Chatbot/QuestionAnswering/qaBase.py | 12 ++++++++++-- Chatbot/chatbot.py | 5 +++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index 1c341b2..0bbdf90 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -50,6 +50,7 @@ def getGeneralQA(self,query,threshold=0): return reply,sim def randomPick(self, answers): + try: answer = answers[random.randrange(0,len(answers))][0] except: @@ -64,6 +65,13 @@ def getCustomQA(self, sentence, api_key, threshold=50): #customqa_list = json.loads(getUserQA(api_key)) data = '[{"Question":"你媽長得像魚人","Answers":["你媽也長得像魚人","你比痲瘋地精還臭"]}]' - customqa_list = json.load(data) + customqa_list = json.loads(data) - return None,0 + # Load question to a list. + q_list = [qa["Question"] for qa in customqa_list] + #TODO customized threshold. + title,index = self.matcher.match(sentence,custom_title=q_list) + sim = self.matcher.getSimilarity() + if sim < threshold: + return None,0 + return customqa_list[index]["Answers"][random.randrange(0,len(customqa_list[index]["Answers"]))],sim diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 2ad9f0f..ac43652 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -92,7 +92,8 @@ def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_ # First of all, # Assume this sentence is for qa, but use a very high threshold. - qa_response, qa_sim = self.getResponseForQA(sentence,api_key,qa_threshold) + # FIXME Remove api_key TESTING VALUE + qa_response, qa_sim = self.getResponseForQA(sentence,"TESTING",qa_threshold) if qa_sim > qa_block_threshold: return qa_response,None,None,None,None @@ -202,7 +203,7 @@ def getResponseForQA(self, sentence, api_key, threshold): cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key) if cqa_sim > threshold: - return cus_response,cqa_sim + return cqa_response,cqa_sim gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence) if gqa_sim > threshold: return gqa_response,gqa_sim From ff39bc75607b1359170700f423645a28c93a0711 Mon Sep 17 00:00:00 2001 From: F74026284 Date: Sat, 12 Nov 2016 23:43:38 +0800 Subject: [PATCH 12/12] Add unupdated flag. --- Chatbot/chatbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index ac43652..eb470e8 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -33,7 +33,7 @@ def __init__(self, name="MianBot"): self.custom_rulebase.model = self.console.rb.model # pass word2vec model # For QA - self.github_qa_unupdated = False + self.github_qa_unupdated = True if not self.github_qa_unupdated: self.answerer = qa.Answerer()