Skip to content

Commit

Permalink
removed unused files.
Browse files Browse the repository at this point in the history
  • Loading branch information
zake7749 committed Aug 21, 2016
1 parent 455b76b commit 70aae39
Show file tree
Hide file tree
Showing 18 changed files with 1,459 additions and 2 deletions.
104 changes: 103 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,103 @@
# chatbot-backend
# Chatbot

Chatbot 可以將對話向量化,基於與規則庫間的主題相似度匹配,來依照使用者可能的需求提供答覆。

## 匹配示例

更多的樣例可以參照 `example/output.txt`

Case# 明天早上叫我起床。
------------------
0.4521 鬧鐘 起床
0.3904 天氣 早上
0.3067 住宿 起床
0.1747 病症 起床
0.1580 購買 早上
0.1270 股票 早上
0.1096 觀光 早上

Case# 明天上海會不會下雨?
------------------
0.5665 天氣 下雨
0.3918 鬧鐘 下雨
0.1807 病症 下雨
0.1362 住宿 下雨
0.0000 股票
0.0000 觀光
0.0000 購買

## 環境需求

* 安裝 python3 開發環境
* 安裝 [gensim – Topic Modelling in Python](https://github.com/RaRe-Technologies/gensim)
* 安裝 [jieba 结巴中文分词 ](https://github.com/fxsjy/jieba)
* 有已訓練好的中文詞向量,根據目錄調整 `Class Console` 的初始化參數
```
import console
c = console.Console(model_path='your_model')
```

## 使用方式

### 聊天機器人

演示可見 `python3 chatbot.py`

### 計算匹配度

```
import console
c = console.Console(model_path='your_model')
speech = input('Input a sentence:')
res,path = c.rule_match(speech) #取得已照相似度排序的規則
c.write_output(speech,res,path)
```

## 規則格式

規則採用 json 格式,樣板規則放置於`\RuleMatcher\rule`中,

```
{
"domain": 代表這個規則的抽象概念,
"response": [
對應到該規則後,
機器人所會給予的回覆,
機器人會隨機抽取一條 response
],
"concepts": [
該規則的可能表示方式
],
"children": [該規則的子規則,如購買 -> 購買飲料,購買衣服......]
}
```

### Example

```
{
"domain": "購買",
"response": [
"正在將您導向購物模組"
],
"concepts": [
"購買","購物","訂購"
],
"children": [
"購買生活用品",
"購買家電",
"購買食物",
"購買飲料",
"購買鞋子",
"購買衣服",
"購買電腦產品"
]
},
```

## 開發日誌

## TODO
* 追加規則案例
* 實作平台 adapter

Binary file modified RuleMatcher/__pycache__/__init__.cpython-35.pyc
Binary file not shown.
Binary file modified RuleMatcher/__pycache__/rulebase.cpython-35.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion RuleMatcher/rule/rule.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"domain": "購買",
"response": [],
"concepts": [
"購買"
"購買","購物","訂購"
],
"children": [
"購買生活用品",
Expand Down
Binary file modified __pycache__/console.cpython-35.pyc
Binary file not shown.
5 changes: 5 additions & 0 deletions demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import console
c = console.Console()
speech = input('Input a sentence:')
res,path = c.rule_match(speech)
c.write_output(speech,res,path)
Empty file added medicine/__init__.py
Empty file.
99 changes: 99 additions & 0 deletions medicine/combineData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#coding=utf-8
#author: Justin Yang

import requests
from bs4 import BeautifulSoup
import lxml
import time
import sys
import os

def main():
symptomSet = loadDataWithSet("symptoms.txt")
# result = matchDiseaseWithSymptoms(symptomSet, "rawpage")
result = getDCPair("data")
writeDDPair2file('result/dcpair',result)
# writeDSPairs2file('result/without_suffix_6', result)

def loadDataWithSet(path):

elementSet = set()
with open(path, 'r') as file:
for line in file:
element = line.strip('\n')
elementSet.add(element)
return elementSet

def matchDiseaseWithSymptoms(sym,dir):

'''從原始頁面中抽取疾病與症狀的配對集
'''
dic = {}

for filename in os.listdir("./"+dir):
# IGNORE .DS_STORE
if not filename.startswith('.'):
soup = BeautifulSoup(open(dir+"/"+filename),"lxml")

# clean the url in 症状查詢專題 like http://cht.a-hospital.com/w/%E6%96%9C%E8%A7%86
for nonce in soup.find_all('table'):
nonce.clear()
for candidate in soup.select("p > a"):
if candidate.text in sym:
#d_xxx.txt
disease_mata = filename.split(".")[0]
disease = disease_mata.split("_")[1]
if candidate.text in dic:
dic[candidate.text].add(disease)
else:
dic[candidate.text] = set()
dic[candidate.text].add(disease)
return dic

def getDCPair(dir):

'''取得疾病的對應科別,資料來自data,格式為:'symptom:disease1,disease2......'
'''

dic = {}
for department in os.listdir("./"+dir):
# IGNORE .DS_STORE
if not department.startswith('.'):
with open(dir+"/"+department) as input:
diseaseSet = set()
for line in input:
line = line.strip('\n')
line = line.strip(',')
diseaseList = line.split(':')[1].split(',')
for disease in diseaseList:
if disease not in diseaseSet:
diseaseSet.add(disease)
dic[department.split('.')[0]] = diseaseSet
return dic

def writeDSPairs2file(filename,dic):

'''輸出症狀集,急性XXX與慢性XXX統一視為XXX
'''
with open(filename,'w',encoding='utf-8') as res:
for symptom,diseaseSet in dic.items():
res.write(symptom+":")
for disease in diseaseSet:
if disease != symptom and disease != "口臭":
if "急性" not in disease and "慢性" not in disease:
res.write(disease+",")
res.write('\n')

def writeDDPair2file(filename,dic):

'''輸出疾病與部門的配對列表
'''
with open(filename,'w',encoding='utf-8') as output:
for department,diseaseSet in dic.items():
output.write(department+":")
for disease in diseaseSet:
output.write(disease+",")
output.write('\n')

if __name__=="__main__":
main()
Loading

0 comments on commit 70aae39

Please sign in to comment.