Skip to content

Commit

Permalink
Merge pull request zake7749#20 from zake7749/custom_rules_dev
Browse files Browse the repository at this point in the history
Custom rules dev
  • Loading branch information
zake7749 authored Nov 12, 2016
2 parents 7ee01c5 + ff39bc7 commit 58fc061
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 45 deletions.
8 changes: 6 additions & 2 deletions Chatbot/QuestionAnswering/Matcher/fuzzyMatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ def tieBreak(self, query, i, j):
else:
return (raw2,j)

def match(self, query):
def match(self, query, custom_title=None):
"""
讀入使用者 query,若語料庫中存在類似的句子,便回傳該句子與標號
Args:
- query: 使用者欲查詢的語句
- removeStopWords: 清除 stopwords
- custom_title: 使用者欲比對的問題集
"""
ratio = -1
target = ""
Expand All @@ -60,7 +61,10 @@ def match(self, query):
mQuery = "".join(mQuery)
title_list = self.segTitles
else:
title_list = self.titles
if custom_title is None:
title_list = self.titles
else:
title_list = custom_title
mQuery = query

for index,title in enumerate(title_list):
Expand Down
30 changes: 22 additions & 8 deletions Chatbot/QuestionAnswering/qaBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,49 @@ def moduleTest(self):

def getResponse(self, sentence, api_key=None):

if api_key is not None:
response = self.getCustomQA(sentence,api_key)
else:
if api_key is None:
response = self.getGeneralQA(sentence)
else:
response = self.getCustomQA(sentence,api_key)
return response

def getGeneralQA(self,query,threshold=50):
def getGeneralQA(self,query,threshold=0):

title,index = self.matcher.match(query)
sim = self.matcher.getSimilarity()
if sim < threshold:
return None
return None,0
else:
res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'),
'r',encoding='utf-8'))
targetId = index % 1000
candiates = self.evaluator.getBestResponse(res[targetId],topk=3)
reply = self.randomPick(candiates)
return reply
return reply,sim

def randomPick(self, answers):

try:
answer = answers[random.randrange(0,len(answers))][0]
except:
answer = None
return answer

def getCustomQA(self, sentence, api_key):
def getCustomQA(self, sentence, api_key, threshold=50):

#TODO GET USER'S QA BY api_key
#FIXME REPLACE TESTING DATA TO FORMAL ONE(GET BY DATABASE).
#i.e IMPLEMENT getUserQA(api_key)
#customqa_list = json.loads(getUserQA(api_key))
return None

data = '[{"Question":"你媽長得像魚人","Answers":["你媽也長得像魚人","你比痲瘋地精還臭"]}]'
customqa_list = json.loads(data)

# Load question to a list.
q_list = [qa["Question"] for qa in customqa_list]
#TODO customized threshold.
title,index = self.matcher.match(sentence,custom_title=q_list)
sim = self.matcher.getSimilarity()
if sim < threshold:
return None,0
return customqa_list[index]["Answers"][random.randrange(0,len(customqa_list[index]["Answers"]))],sim
39 changes: 28 additions & 11 deletions Chatbot/RuleMatcher/customRuleBase.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# coding=utf-8

import json
import random

from .rulebase import RuleBase
from .rulebase import RuleBase,Rule

class CustomRuleBase(RuleBase):

Expand All @@ -12,25 +12,39 @@ class CustomRuleBase(RuleBase):

#TODO 客製化的「階段式對話」

def customMatch(self, sentence, apiKey):
def customMatch(self, sentence, api_key, threshold):

"""
比對 sentence 與用戶自定義的規則
Args:
- sentence : 用戶輸入
- apiKey : 該名會員的聊天機器人金鑰
- api_key : 該名會員的聊天機器人金鑰
Return: response, 暫時目標 FIXME
- response : 批配出最適合的主題後,挑選用戶於該主題定義的句子隨機挑一回覆
"""
# 清空之前讀入的規則
self.rules.clear()

# 重新建構規則表
customRules = self.getCustomDomainRules(apiKey)
customRules = json.loads(customRules)
self.buildCustomRules(customRules)
custom_rules = self.getCustomDomainRules(api_key)
custom_rules = json.loads(custom_rules)
self.buildCustomRules(custom_rules)

# 進行比對
return self.match(sentence, threshold=customThreshold, root=apiKey)
result_list,path = self.match(sentence, threshold=0.4, root=api_key)

# 確認最佳回應的有效性
if result_list[0][0] < threshold:
return None

# 取出最佳主題的自訂回覆集, 並隨機挑一句回覆
best_domain = result_list[0][1]
target_rule = self.rules[best_domain]
res_num = target_rule.has_response()

return target_rule.response[random.randrange(0,res_num)]

def buildCustomRules(self, rules):

Expand All @@ -52,14 +66,17 @@ def buildCustomRules(self, rules):
if domain not in self.rules:
rule = Rule(domain, concepts_list, children_list, response, self.model)
self.rules[domain] = rule
if is_root:
self.forest_base_roots.append(rule)
else:
#TODO Block invalided rule type on front end.
print("[Rules]: Detect a duplicate domain name '%s'." % domain)

def getCustomDomainRules(self, key):
"""
依照 apiKey 取得該用戶的規則集
"""
#TODO
return None
#FIXME 採用正規方式驗證

data = '[{"domain": "TESTING","response": ["這是個測試客製化規則的回覆1","這是個測試客製化規則的回覆2"],"concepts": ["測試"],"children": []}]'

return data
4 changes: 2 additions & 2 deletions Chatbot/RuleMatcher/rule/rule.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

],
"concepts": [
"","","","好玩",""
"吃喝玩樂","逛街"
],
"children": [
"",
Expand Down Expand Up @@ -172,7 +172,7 @@
"domain": "病症",
"response": [],
"concepts": [
"病症","症狀","病徵","嘔吐"
"病症","症狀","病徵"
],
"children": [
"過敏",
Expand Down
76 changes: 54 additions & 22 deletions Chatbot/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class Chatbot(object):

def __init__(self, name="NCKU"):
def __init__(self, name="MianBot"):
self.name = name # The name of chatbot.

self.speech = '' # The lastest user's input
Expand Down Expand Up @@ -52,7 +52,7 @@ def waiting_loop(self):
res = self.listen(speech)
print(res[0])

def listen(self, sentence, target=None, api_key=None):
def listen(self, sentence, target=None, api_key=None, qa_threshold=50, qa_block_threshold=80):

"""
listen function is to encapsulate the following getResponse methods:
Expand Down Expand Up @@ -90,8 +90,16 @@ def listen(self, sentence, target=None, api_key=None):
# 區隔 custom rule 與 root rule 匹配的原因是 custom rule 並不支援多段式應答
# 若後續在模組上進行更動,可考慮將兩者合併,透過辨識 api_key 的有無來修改操作

# First of all,
# Assume this sentence is for qa, but use a very high threshold.
# FIXME Remove api_key TESTING VALUE
qa_response, qa_sim = self.getResponseForQA(sentence,"TESTING",qa_threshold)
if qa_sim > qa_block_threshold:
return qa_response,None,None,None,None

# matching on custom rules.
response = self.getResponseOnCustomDomain(sentence, api_key)
# FIXME Remove api_key TESTING VALUE
response = self.getResponseOnCustomDomain(sentence, api_key="TESTING")
if response is not None:
return response,None,None,None

Expand All @@ -104,14 +112,11 @@ def listen(self, sentence, target=None, api_key=None):
return response,stauts,target,candiates

# The result based on custom rules and general rules are not confident.
# Assume that there are no intent in the sentence, do query matching for
# question answering.
# Assume that there are no intent in the sentence, consider this questions
# is qa again, but this time use a smaller threshold.
else:
response = self.getResponseForCustomQA(sentence,api_key)
if response is None:
response = self.getResponseForGeneralQA(sentence)
if response is not None:
return response,None,None,None
if qa_sim > 60:
return qa_response,None,None,None
else:
# This query has too low similarity for all matching methods.
# We can only send back a default response.
Expand Down Expand Up @@ -142,7 +147,7 @@ def getResponseOnRootDomains(self, target=None):
status = None
response = None

handler = self.get_task_handler()
handler = self._get_task_handler()

try:
status,response = handler.get_response(self.speech, self.speech_domain, target)
Expand All @@ -166,29 +171,55 @@ def getResponseOnRootDomains(self, target=None):
handler.debug(self.extract_attr_log)
return [response,status,target,candiates]

def getResponseOnCustomDomain(self, sentence, api_key):
def getResponseOnCustomDomain(self, sentence, api_key, threshold=.4):
"""
Fetch user's custom rules by api_key and then match the sentence with
custom rules.
Args:
- sentence: user's raw input. (not segmented)
- api_key
- api_key : a string to recognize the user and get rules defined by him/she.
- threshold : a value between 0 to 1, to block the response which
has a similarity lower than threshold.
"""
if api_key is None:
return None
else:
#TODO 根據 api_key 調適 self.custom_rulebase
pass

#TODO 調適為能夠進行「多段式對話」
return self.custom_rulebase.customMatch(sentence, api_key, threshold)

def getResponseForQA(self, sentence, api_key, threshold):
"""
Encapsulate getResponseForGeneralQA, getResponseForCustomQA
Return:
- response, similarity
if the similarity < threshold will return None,0.
"""

#FIXME Remove this flag when all have done.
if self.github_qa_unupdated:
return None, 0

cqa_response,cqa_sim = self.getResponseForCustomQA(sentence,api_key)
if cqa_sim > threshold:
return cqa_response,cqa_sim
gqa_response,gqa_sim = self.getResponseForGeneralQA(sentence)
if gqa_sim > threshold:
return gqa_response,gqa_sim
return None,0

def getResponseForGeneralQA(self, sentence):

"""
Listen user's input and return a response which is based on our
knowledge base.
Return:
answer, similarity
"""
if self.github_qa_unupdated:
return None
return None, 0

return self.answerer.getResponse(sentence)

Expand All @@ -197,12 +228,13 @@ def getResponseForCustomQA(self,sentence,api_key):
"""
Listen user's input and return a response which is based on a cutsom
knowledge base.
"""
if self.github_qa_unupdated:
return None
Return:
answer, similarity
"""
if api_key is None:
return None
return None, 0

return self.answerer.getResponse(sentence,api_key)

def getLoggerData(self):
Expand Down Expand Up @@ -261,7 +293,7 @@ def _set_root_domain(self):
else:
self.root_domain = self.last_path.split('>')[0]

def get_task_handler(self, domain=None):
def _get_task_handler(self, domain=None):

"""
Get the instance of task handler based on the given domain.
Expand Down

0 comments on commit 58fc061

Please sign in to comment.