-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
12 changed files
with
358 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
version: "2.0" | ||
nlu: | ||
- intent: want_to_buy_finance | ||
examples: | | ||
- 我想买理财 | ||
- 我想买一款理财产品 | ||
- 有什么样的理财产品可以购买 | ||
- 推荐一款理财产品 | ||
- 我想买[5万块](amount-of-money)的[鑫利贷](finance_product) | ||
- 我要买[五千块](amount-of-money)的[天天赢理财](finance_product) | ||
- 帮我买个理财产品吧 | ||
- 给我推荐几款理财产品 | ||
- 有没有理财产品卖 | ||
- 我想买一份理财 | ||
- 我想买10元的[鑫利](finance_product)理财 | ||
- 我要买理财 | ||
- 买理财 | ||
- 还有什么理财产品? | ||
- intent: want_to_other_recommand | ||
examples: | | ||
- 请帮我买一份[基金](finance_product) | ||
- 我想看一下股票型[基金](finance_type) | ||
- 有[股票](finance_type)吗 | ||
- 有[基金](finance_type)吗 | ||
- 还有其他的吗 | ||
- 我想看看其他的 | ||
- 有其他推荐吗 | ||
- 有其他产品吗 | ||
- 有理财吗 | ||
- 有别的理财产品吗 | ||
- 有保险吗 | ||
- 有柜台债吗 | ||
- 有大额存单吗 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,25 @@ | ||
version: "2.0" | ||
|
||
rules: | ||
#### | ||
- rule: Activate purchase finace when no other form is active | ||
condition: | ||
# this condition allows stories to handle form switching | ||
- active_loop: null | ||
steps: | ||
- intent: want_to_buy_finance | ||
- action: recommand_finance_product | ||
- action: purchase_finance_form | ||
- active_loop: purchase_finance_form | ||
|
||
- rule: Submit purchase_finance_form while not switched from previous form | ||
condition: | ||
- active_loop: purchase_finance_form | ||
- slot_was_set: | ||
- previous_form_name: null | ||
steps: | ||
- action: purchase_finance_form | ||
- active_loop: null | ||
- slot_was_set: | ||
- requested_slot: null | ||
- action: buy_financial_products | ||
#rules: | ||
# #### | ||
# - rule: Activate purchase finace when no other form is active | ||
# condition: | ||
# # this condition allows stories to handle form switching | ||
# - active_loop: null | ||
# steps: | ||
# - intent: want_to_buy_finance | ||
# - action: recommand_finance_product | ||
# - action: purchase_finance_form | ||
# - active_loop: purchase_finance_form | ||
# | ||
# - rule: Submit purchase_finance_form while not switched from previous form | ||
# condition: | ||
# - active_loop: purchase_finance_form | ||
# - slot_was_set: | ||
# - previous_form_name: null | ||
# steps: | ||
# - action: purchase_finance_form | ||
# - active_loop: null | ||
# - slot_was_set: | ||
# - requested_slot: null | ||
# - action: buy_financial_products |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@author:XuMing(xuming624@qq.com) | ||
@description: 文本语义相似度计算和文本匹配搜索 | ||
""" | ||
import sys | ||
|
||
sys.path.append('..') | ||
from similarities import Similarity | ||
|
||
# 1.Compute cosine similarity between two sentences. | ||
sentences = ['如何更换花呗绑定银行卡', | ||
'花呗更改绑定银行卡'] | ||
corpus = [ | ||
'花呗更改绑定银行卡', | ||
'我什么时候开通了花呗', | ||
'俄罗斯警告乌克兰反对欧盟协议', | ||
'暴风雨掩埋了东北部;新泽西16英寸的降雪', | ||
'中央情报局局长访问以色列叙利亚会谈', | ||
'人在巴基斯坦基地的炸弹袭击中丧生', | ||
] | ||
model = Similarity(model_name_or_path="shibing624/text2vec-base-chinese") | ||
print(model) | ||
similarity_score = model.similarity(sentences[0], sentences[1]) | ||
print(f"{sentences[0]} vs {sentences[1]}, score: {float(similarity_score):.4f}") | ||
|
||
print('-' * 50 + '\n') | ||
# 2.Compute similarity between two list | ||
similarity_scores = model.similarity(sentences, corpus) | ||
print(similarity_scores.numpy()) | ||
for i in range(len(sentences)): | ||
for j in range(len(corpus)): | ||
print(f"{sentences[i]} vs {corpus[j]}, score: {similarity_scores.numpy()[i][j]:.4f}") | ||
|
||
print('-' * 50 + '\n') | ||
# 3.Semantic Search | ||
model.add_corpus(corpus) | ||
res = model.most_similar(queries=sentences, topn=3) | ||
print(res) | ||
for q_id, c in res.items(): | ||
print('query:', sentences[q_id]) | ||
print("search top 3:") | ||
for corpus_id, s in c.items(): | ||
print(f'\t{model.corpus[corpus_id]}: {s:.4f}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import numpy as np | ||
import tensorflow as tf | ||
import tensorflow_hub as hub | ||
import tensorflow_text | ||
import spacy | ||
|
||
from flask import Flask, request | ||
from flask_restful import Resource, Api, reqparse | ||
import logging | ||
|
||
from logging.config import dictConfig | ||
|
||
dictConfig({ | ||
'version': 1, | ||
'formatters': {'default': { | ||
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s', | ||
}}, | ||
'handlers': {'wsgi': { | ||
'class': 'logging.StreamHandler', | ||
'stream': 'ext://flask.logging.wsgi_errors_stream', | ||
'formatter': 'default' | ||
}}, | ||
'root': { | ||
'level': 'INFO', | ||
'handlers': ['wsgi'] | ||
} | ||
}) | ||
|
||
app = Flask(__name__) | ||
api = Api(app) | ||
|
||
@app.route('/') | ||
def testing(): | ||
return 'testing app' | ||
|
||
|
||
app.logger.info('LOADING SPACY MODEL') | ||
nlp = spacy.load('en_core_web_sm') | ||
app.logger.info('LOADING USEM') | ||
module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3' | ||
|
||
model = hub.load(module_url) | ||
def embed(input): | ||
return model(input) | ||
|
||
app.logger.info('READY APP') | ||
|
||
|
||
|
||
parser_answer = reqparse.RequestParser() | ||
parser_answer.add_argument('sentences') | ||
|
||
def sentences_similarity(sentences, mode='usem'): | ||
|
||
similarities = [] | ||
|
||
if mode == 'usem': | ||
embeding = embed(sentences) | ||
corr = np.inner(embeding, embeding) | ||
for n,s in enumerate(corr): | ||
for q in s[n+1:]: | ||
|
||
app.logger.info('corr {}'.format(float(q))) | ||
similarities.append(float(q)) | ||
if mode == 'spacy': | ||
for n,s in enumerate(sentences): | ||
for q in sentences[n+1:]: | ||
token_s = nlp(s) | ||
token_q = nlp(q) | ||
simil = token_s.similarity(token_q) | ||
app.logger.info('simil {}'.format(float(simil))) | ||
similarities.append(float(simil)) | ||
|
||
return {'error': False, 'similarity': similarities} | ||
|
||
class SimilarityTF(Resource): | ||
def get(self): | ||
return {"error": True, "message": 'not_implemented'} | ||
|
||
def post(self): | ||
|
||
app.logger.info('SIMILARITY {}'.format('POST CALLED')) | ||
|
||
json_sentences = request.get_json(force=True) | ||
|
||
try: | ||
sentences = json_sentences['sentences'] | ||
|
||
if len(sentences) < 1 and isinstance(sentences, list): | ||
return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'} | ||
error = sentences_similarity(sentences) | ||
|
||
if error['error'] == False: | ||
|
||
return {'error': False, 'similarity': error['similarity'] } | ||
else: | ||
return error | ||
|
||
except Exception as e: | ||
app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e)) | ||
return {'error': True, 'message': 'ERROR_PARSER'} | ||
|
||
class SimilaritySP(Resource): | ||
def get(self): | ||
return {"error": True, "message": 'not_implemented'} | ||
|
||
def post(self): | ||
|
||
app.logger.info('SIMILARITY {}'.format('POST CALLED')) | ||
|
||
json_sentences = request.get_json(force=True) | ||
|
||
try: | ||
sentences = json_sentences['sentences'] | ||
|
||
if len(sentences) < 1 and isinstance(sentences, list): | ||
return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'} | ||
error = sentences_similarity(sentences, mode='spacy') | ||
|
||
if error['error'] == False: | ||
|
||
return {'error': False, 'similarity': error['similarity'] } | ||
else: | ||
return error | ||
|
||
except Exception as e: | ||
app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e)) | ||
return {'error': True, 'message': 'ERROR_PARSER'} | ||
|
||
|
||
''' | ||
curl -d '{"sentences": ["hello world", "hello world"]}' -H 'Content-Type: application/json' -X POST localhost:8000/get_similarity/ | ||
''' | ||
api.add_resource(SimilarityTF, '/get_similarity_tf/') | ||
api.add_resource(SimilaritySP, '/get_similarity_sp/') | ||
|
||
if __name__ == '__main__': | ||
app.run(host='0.0.0.0', port=5000, debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import datetime | ||
|
||
import loguru | ||
import numpy as np | ||
import spacy | ||
from loguru import logger | ||
|
||
logger.info('spacy model启动加载') | ||
time1 = datetime.datetime.now() | ||
nlp = spacy.load('zh_core_web_md') | ||
time2 = datetime.datetime.now() | ||
logger.info(f'spacy model加载完成,耗时{time2 - time1}秒') | ||
|
||
|
||
def sentences_similarity(sentences, corpus, topk=3, min_simil=0): | ||
similarities = [] | ||
for n, s in enumerate(sentences): | ||
for q in corpus: | ||
token_s = nlp(s) | ||
token_q = nlp(q) | ||
simil = token_s.similarity(token_q) | ||
# logger.info('simil {}'.format(float(simil))) | ||
similarities.append({'q': q, 'simil': float(simil)}) | ||
similarities = filter(lambda x: x['simil'] >= min_simil, similarities) | ||
similarities = sorted(similarities, key=lambda x: x['simil'], reverse=True) | ||
logger.info(similarities[:topk]) | ||
return similarities[:topk] | ||
|
||
|
||
if __name__ == '__main__': | ||
sentences = ['如何更换花呗绑定银行卡'] | ||
corpus = [ | ||
'花呗更改绑定银行卡', | ||
'我什么时候开通了花呗', | ||
'俄罗斯警告乌克兰反对欧盟协议', | ||
'暴风雨掩埋了东北部;新泽西16英寸的降雪', | ||
'中央情报局局长访问以色列叙利亚会谈', | ||
'人在巴基斯坦基地的炸弹袭击中丧生', | ||
] | ||
sentences_similarity(sentences, corpus) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.\venv\Scripts\activate | ||
rasa train |