Skip to content

Commit

Permalink
Update module for custom matching.
Browse files Browse the repository at this point in the history
  • Loading branch information
zake7749 committed Oct 29, 2016
1 parent 3508c23 commit a5a6ac3
Show file tree
Hide file tree
Showing 26 changed files with 97 additions and 1,088 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ __pycache__
*.model
*.log
.DS_Store
*log.txt
*log.txt
63 changes: 63 additions & 0 deletions Chatbot/RuleMatcher/customRuleBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json

from .rulebase import RuleBase

class CustomRuleBase(RuleBase):

"""
用於客製化比對的規則庫,每次比對完即清空規則
"""

#TODO 客製化的「階段式對話」

def customMatch(self, sentence, apiKey):

"""
比對 sentence 與用戶自定義的規則
Args:
- sentence : 用戶輸入
- apiKey : 該名會員的聊天機器人金鑰
"""
# 清空之前讀入的規則
self.rules.clear()

# 重新建構規則表
customRules = self.getCustomDomainRules(apiKey)
customRules = json.loads(customRules)
self.buildCustomRules(customRules)

# 進行比對
return self.match(sentence, threshold=customThreshold, root=apiKey)

def buildCustomRules(self, rules):

"""
將讀入的規則從字典轉換為 Rule Class 型式
Args:
- rules: 由 json.loads 導出的字典型式的規則
"""
assert self.model is not None, "Please load the model before loading rules."

for rule in rules:

domain = rule["domain"]
concepts_list = rule["concepts"]
children_list = rule["children"]
response = rule["response"]

if domain not in self.rules:
rule = Rule(domain, concepts_list, children_list, response, self.model)
self.rules[domain] = rule
if is_root:
self.forest_base_roots.append(rule)
else:
print("[Rules]: Detect a duplicate domain name '%s'." % domain)

def getCustomDomainRules(self, key):
"""
依照 apiKey 取得該用戶的規則集
"""
#TODO
return None
4 changes: 2 additions & 2 deletions Chatbot/RuleMatcher/rule/rule.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

],
"concepts": [
"","","好吃","","","","好玩","","","好看","",""
"","","","好玩",""
],
"children": [
"",
Expand Down Expand Up @@ -172,7 +172,7 @@
"domain": "病症",
"response": [],
"concepts": [
"病症","症狀","病徵"
"病症","症狀","病徵","嘔吐"
],
"children": [
"過敏",
Expand Down
11 changes: 11 additions & 0 deletions Chatbot/RuleMatcher/rulebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ def match(self, sentence, topk=1, threshold=0, root=None):
Return:
a list holds the top k-th rules and the classification tree travel path.
"""

log = open("matching_log.txt",'w',encoding='utf-8')

assert self.model is not None, "Please load the model before any match."

result_list = []
Expand All @@ -225,6 +228,14 @@ def match(self, sentence, topk=1, threshold=0, root=None):
result_list = sorted(result_list, reverse=True , key=lambda k: k[0])
top_domain = result_list[0][1] # get the best matcher's term.

#Output matching_log.
log.write("---")
for result in result_list:
s,d,m = result
log.write("Sim: %f, Domain: %s, Matchee: %s\n" % (s,d,m))
log.write("---")


if self.rules[top_domain].has_child():
result_list = []
term_trans += top_domain+'>'
Expand Down
5 changes: 0 additions & 5 deletions Chatbot/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,6 @@ def get_task_handler(self, domain=None):

return handler

def getCustomDomainRules(self, key):
"""
"""
#TODO
return None

def getCustomQARules(self, key):
"""
Expand Down
28 changes: 19 additions & 9 deletions Chatbot/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,26 @@

import jieba
import jieba.analyse

import RuleMatcher.rulebase as rulebase
import RuleMatcher.customRuleBase as crb

def main():
console = Console()
console.listen() # goto interactive mode.

class Console(object):

"""
Build some nlp function as an package.
"""

def __init__(self,model_path="model/ch-corpus-3sg.bin",
rule_path="RuleMatcher/rule/",
stopword="jieba_dict/stopword.txt",
jieba_dic="jieba_dict/dict.txt.big",
jieba_user_dic="jieba_dict/userdict.txt"):
print("[Console] Building a console...")
print("*********************************")
try:

cur_dir = os.getcwd()
Expand All @@ -34,11 +39,8 @@ def __init__(self,model_path="model/ch-corpus-3sg.bin",
self.rb = rulebase.RuleBase()
print("[Console] Loading vector model...")
self.rb.load_model(model_path)
print("[Console] Vector model has loaded.")
print("[Console] Loading pre-defined rules.")
self.rb.load_rules_from_dic(rule_path)
print("[Console] Rules have loaded.")
print("*********************************")
print("[Console] Initialized successfully :>")

os.chdir(cur_dir)
Expand All @@ -48,6 +50,9 @@ def __init__(self,model_path="model/ch-corpus-3sg.bin",
print(repr(e))
exit()

self.cusRuleBase = crb.CustomRuleBase()
self.cusRuleBase.model = self.rb.model

def listen(self):
#into interactive console
while True:
Expand Down Expand Up @@ -141,7 +146,7 @@ def word_segment(self, sentence):
keyword.append(word)
return keyword

def rule_match(self, sentence, best_only=False, search_from=None, segmented=False):
def rule_match(self, sentence, best_only=False, search_from=None, segmented=False, api_key=None):

"""
Match the sentence with rules.
Expand All @@ -152,6 +157,7 @@ def rule_match(self, sentence, best_only=False, search_from=None, segmented=Fals
- root : a domain name, then the rule match will start
at searching from that domain, not from forest roots.
- segmented : the sentence is segmented or not.
- api_key : a key to fetch custom rules in the database.
Return:
- a list of candiate rule
- the travel path of classification tree.
Expand All @@ -162,16 +168,20 @@ def rule_match(self, sentence, best_only=False, search_from=None, segmented=Fals
else:
keyword = self.word_segment(sentence)

if search_from is None: # use for rule matching.
result_list,path = self.rb.match(keyword,threshold=0.1)
else: # use for reasoning.
result_list,path = self.rb.match(keyword,threshold=0.1,root=search_from)
if api_key is None:
if search_from is None: # use for classification (rule matching).
result_list,path = self.rb.match(keyword,threshold=0.1)
else: # use for reasoning.
result_list,path = self.rb.match(keyword,threshold=0.1,root=search_from)
else:
result_list,path = self.cusRuleBase.customMatch()

if best_only:
return [result_list[0], path]
else:
return [result_list, path]


def get_response(self, rule_id):

"""
Expand Down
1 change: 1 addition & 0 deletions Chatbot/jieba_dict/stopword.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1217,5 +1217,6 @@ $
好像
幾個
想要
覺得
直到
Loading

0 comments on commit a5a6ac3

Please sign in to comment.