forked from zake7749/Chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
1,459 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,103 @@ | ||
# chatbot-backend | ||
# Chatbot | ||
|
||
Chatbot 可以將對話向量化,基於與規則庫間的主題相似度匹配,來依照使用者可能的需求提供答覆。 | ||
|
||
## 匹配示例 | ||
|
||
更多的樣例可以參照 `example/output.txt` | ||
|
||
Case# 明天早上叫我起床。 | ||
------------------ | ||
0.4521 鬧鐘 起床 | ||
0.3904 天氣 早上 | ||
0.3067 住宿 起床 | ||
0.1747 病症 起床 | ||
0.1580 購買 早上 | ||
0.1270 股票 早上 | ||
0.1096 觀光 早上 | ||
|
||
Case# 明天上海會不會下雨? | ||
------------------ | ||
0.5665 天氣 下雨 | ||
0.3918 鬧鐘 下雨 | ||
0.1807 病症 下雨 | ||
0.1362 住宿 下雨 | ||
0.0000 股票 | ||
0.0000 觀光 | ||
0.0000 購買 | ||
|
||
## 環境需求 | ||
|
||
* 安裝 python3 開發環境 | ||
* 安裝 [gensim – Topic Modelling in Python](https://github.com/RaRe-Technologies/gensim) | ||
* 安裝 [jieba 结巴中文分词 ](https://github.com/fxsjy/jieba) | ||
* 有已訓練好的中文詞向量,根據目錄調整 `Class Console` 的初始化參數 | ||
``` | ||
import console | ||
c = console.Console(model_path='your_model') | ||
``` | ||
|
||
## 使用方式 | ||
|
||
### 聊天機器人 | ||
|
||
演示可見 `python3 chatbot.py` | ||
|
||
### 計算匹配度 | ||
|
||
``` | ||
import console | ||
c = console.Console(model_path='your_model') | ||
speech = input('Input a sentence:') | ||
res,path = c.rule_match(speech) #取得已照相似度排序的規則 | ||
c.write_output(speech,res,path) | ||
``` | ||
|
||
## 規則格式 | ||
|
||
規則採用 json 格式,樣板規則放置於`\RuleMatcher\rule`中, | ||
|
||
``` | ||
{ | ||
"domain": 代表這個規則的抽象概念, | ||
"response": [ | ||
對應到該規則後, | ||
機器人所會給予的回覆, | ||
機器人會隨機抽取一條 response | ||
], | ||
"concepts": [ | ||
該規則的可能表示方式 | ||
], | ||
"children": [該規則的子規則,如購買 -> 購買飲料,購買衣服......] | ||
} | ||
``` | ||
|
||
### Example | ||
|
||
``` | ||
{ | ||
"domain": "購買", | ||
"response": [ | ||
"正在將您導向購物模組" | ||
], | ||
"concepts": [ | ||
"購買","購物","訂購" | ||
], | ||
"children": [ | ||
"購買生活用品", | ||
"購買家電", | ||
"購買食物", | ||
"購買飲料", | ||
"購買鞋子", | ||
"購買衣服", | ||
"購買電腦產品" | ||
] | ||
}, | ||
``` | ||
|
||
## 開發日誌 | ||
|
||
## TODO | ||
* 追加規則案例 | ||
* 實作平台 adapter | ||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,7 +56,7 @@ | |
"domain": "購買", | ||
"response": [], | ||
"concepts": [ | ||
"購買" | ||
"購買","購物","訂購" | ||
], | ||
"children": [ | ||
"購買生活用品", | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import console | ||
c = console.Console() | ||
speech = input('Input a sentence:') | ||
res,path = c.rule_match(speech) | ||
c.write_output(speech,res,path) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#coding=utf-8 | ||
#author: Justin Yang | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
import lxml | ||
import time | ||
import sys | ||
import os | ||
|
||
def main(): | ||
symptomSet = loadDataWithSet("symptoms.txt") | ||
# result = matchDiseaseWithSymptoms(symptomSet, "rawpage") | ||
result = getDCPair("data") | ||
writeDDPair2file('result/dcpair',result) | ||
# writeDSPairs2file('result/without_suffix_6', result) | ||
|
||
def loadDataWithSet(path): | ||
|
||
elementSet = set() | ||
with open(path, 'r') as file: | ||
for line in file: | ||
element = line.strip('\n') | ||
elementSet.add(element) | ||
return elementSet | ||
|
||
def matchDiseaseWithSymptoms(sym,dir): | ||
|
||
'''從原始頁面中抽取疾病與症狀的配對集 | ||
''' | ||
dic = {} | ||
|
||
for filename in os.listdir("./"+dir): | ||
# IGNORE .DS_STORE | ||
if not filename.startswith('.'): | ||
soup = BeautifulSoup(open(dir+"/"+filename),"lxml") | ||
|
||
# clean the url in 症状查詢專題 like http://cht.a-hospital.com/w/%E6%96%9C%E8%A7%86 | ||
for nonce in soup.find_all('table'): | ||
nonce.clear() | ||
for candidate in soup.select("p > a"): | ||
if candidate.text in sym: | ||
#d_xxx.txt | ||
disease_mata = filename.split(".")[0] | ||
disease = disease_mata.split("_")[1] | ||
if candidate.text in dic: | ||
dic[candidate.text].add(disease) | ||
else: | ||
dic[candidate.text] = set() | ||
dic[candidate.text].add(disease) | ||
return dic | ||
|
||
def getDCPair(dir): | ||
|
||
'''取得疾病的對應科別,資料來自data,格式為:'symptom:disease1,disease2......' | ||
''' | ||
|
||
dic = {} | ||
for department in os.listdir("./"+dir): | ||
# IGNORE .DS_STORE | ||
if not department.startswith('.'): | ||
with open(dir+"/"+department) as input: | ||
diseaseSet = set() | ||
for line in input: | ||
line = line.strip('\n') | ||
line = line.strip(',') | ||
diseaseList = line.split(':')[1].split(',') | ||
for disease in diseaseList: | ||
if disease not in diseaseSet: | ||
diseaseSet.add(disease) | ||
dic[department.split('.')[0]] = diseaseSet | ||
return dic | ||
|
||
def writeDSPairs2file(filename,dic): | ||
|
||
'''輸出症狀集,急性XXX與慢性XXX統一視為XXX | ||
''' | ||
with open(filename,'w',encoding='utf-8') as res: | ||
for symptom,diseaseSet in dic.items(): | ||
res.write(symptom+":") | ||
for disease in diseaseSet: | ||
if disease != symptom and disease != "口臭": | ||
if "急性" not in disease and "慢性" not in disease: | ||
res.write(disease+",") | ||
res.write('\n') | ||
|
||
def writeDDPair2file(filename,dic): | ||
|
||
'''輸出疾病與部門的配對列表 | ||
''' | ||
with open(filename,'w',encoding='utf-8') as output: | ||
for department,diseaseSet in dic.items(): | ||
output.write(department+":") | ||
for disease in diseaseSet: | ||
output.write(disease+",") | ||
output.write('\n') | ||
|
||
if __name__=="__main__": | ||
main() |
Oops, something went wrong.