Skip to content

Commit

Permalink
增加语义相似度等一大堆打包push
Browse files Browse the repository at this point in the history
wyt1234 committed Apr 27, 2022
1 parent 06e1c1e commit 1a4e560
Showing 12 changed files with 358 additions and 40 deletions.
27 changes: 27 additions & 0 deletions actions/actions_finance.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,33 @@

from actions.custom_forms import CustomFormValidationAction

import datetime

import numpy as np
import spacy
from loguru import logger

# spacy语义相似度
time1 = datetime.datetime.now()
nlp = spacy.load('zh_core_web_md')
time2 = datetime.datetime.now()
logger.info(f'spacy model加载完成,耗时{time2 - time1}秒')


def sentences_similarity(sentences, corpus, topk=3, min_simil=0):
similarities = []
for n, s in enumerate(sentences):
for q in corpus:
token_s = nlp(s)
token_q = nlp(q)
simil = token_s.similarity(token_q)
# logger.info('simil {}'.format(float(simil)))
similarities.append({'q': q, 'simil': float(simil)})
similarities = filter(lambda x: x['simil'] >= min_simil, similarities)
similarities = sorted(similarities, key=lambda x: x['simil'], reverse=True)
logger.info(similarities[:topk])
return similarities[:topk]


#####购买理财产品##########
class ActionBuyFinancialProducts(Action):
10 changes: 10 additions & 0 deletions actions/profile_db.py
Original file line number Diff line number Diff line change
@@ -46,6 +46,16 @@
Base = declarative_base()



class Finance(Base):
__tablename__ = "finance"
id = Column(Integer, primary_key=True)
name = Column(String(255))
type = Column(String(255))
rate = Column(String(255))
minimum_amount = Column(String(255))
description = Column(String(255))

class Account(Base):
"""Accounts table.
`session_id` is only meaningful for accounts generated by conversation sessions,
39 changes: 24 additions & 15 deletions data/nlu/faq.yml
Original file line number Diff line number Diff line change
@@ -35,19 +35,28 @@ nlu:
- tell me about the different parts of rasa
- what are your features ?
- what are the features does rasa have?
- intent: want_to_buy_finance
- intent: finance_detail
examples: |
- 我想买理财
- 我想买一款理财产品
- 有什么样的理财产品可以购买
- 推荐一款理财产品
- 我想买[5万块](amount-of-money)的[鑫利贷](finance_product)
- 我要买[五千块](amount-of-money)的[天天赢理财](finance_product)
- 帮我买个理财产品吧
- 请帮我买一份[基金](finance_product)
- 给我推荐几款理财产品
- 有没有理财产品卖
- 我想买一份理财
- 我想买10元的[鑫利](finance_product)理财
- 我要买理财
- 买理财
- 能不能介绍一下这款理财产品
- 能仔细说说吗
- 这款产品的收益率是多少
- 这款产品的起购金额是多少
- intent: want_to_detail
examples: |
- 能给我详细介绍下吗
- 介绍一下这款产品
- 详细情况说一下
- 有这款产品的其他介绍吗
- 了解一下这款产品
- 讲一讲这款产品
- 我想了解这款产品的具体情况
- 具体说说
- intent: want_to_purchase
examples: |
- 好的我想下单
- 就买它了
- 行帮我下单吧
- 帮我买[1万](amount-of-money)块钱
- 这款产品请帮我买[2万](amount-of-money)块钱
- 好的下单吧
- 可以的直接买吧
7 changes: 6 additions & 1 deletion data/nlu/general.yml
Original file line number Diff line number Diff line change
@@ -195,4 +195,9 @@ nlu:
- [杭州](location)
- 1
- 1000
- 鑫利达
- [鑫利达](product)
- [2022年二十六期个人大额存单](product)
- [华安安康A](product)
- [日鑫系列天天盈B款](product)
- [国家开发银行2022年第二期金融债券](product)
- [利安永吉终身寿险](product)
33 changes: 33 additions & 0 deletions data/nlu/nlu_fina.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
version: "2.0"
nlu:
- intent: want_to_buy_finance
examples: |
- 我想买理财
- 我想买一款理财产品
- 有什么样的理财产品可以购买
- 推荐一款理财产品
- 我想买[5万块](amount-of-money)的[鑫利贷](finance_product)
- 我要买[五千块](amount-of-money)的[天天赢理财](finance_product)
- 帮我买个理财产品吧
- 给我推荐几款理财产品
- 有没有理财产品卖
- 我想买一份理财
- 我想买10元的[鑫利](finance_product)理财
- 我要买理财
- 买理财
- 还有什么理财产品?
- intent: want_to_other_recommand
examples: |
- 请帮我买一份[基金](finance_product)
- 我想看一下股票型[基金](finance_type)
- 有[股票](finance_type)吗
- 有[基金](finance_type)吗
- 还有其他的吗
- 我想看看其他的
- 有其他推荐吗
- 有其他产品吗
- 有理财吗
- 有别的理财产品吗
- 有保险吗
- 有柜台债吗
- 有大额存单吗
46 changes: 23 additions & 23 deletions data/rules/rules_purchase.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
version: "2.0"

rules:
####
- rule: Activate purchase finace when no other form is active
condition:
# this condition allows stories to handle form switching
- active_loop: null
steps:
- intent: want_to_buy_finance
- action: recommand_finance_product
- action: purchase_finance_form
- active_loop: purchase_finance_form

- rule: Submit purchase_finance_form while not switched from previous form
condition:
- active_loop: purchase_finance_form
- slot_was_set:
- previous_form_name: null
steps:
- action: purchase_finance_form
- active_loop: null
- slot_was_set:
- requested_slot: null
- action: buy_financial_products
#rules:
# ####
# - rule: Activate purchase finace when no other form is active
# condition:
# # this condition allows stories to handle form switching
# - active_loop: null
# steps:
# - intent: want_to_buy_finance
# - action: recommand_finance_product
# - action: purchase_finance_form
# - active_loop: purchase_finance_form
#
# - rule: Submit purchase_finance_form while not switched from previous form
# condition:
# - active_loop: purchase_finance_form
# - slot_was_set:
# - previous_form_name: null
# steps:
# - action: purchase_finance_form
# - active_loop: null
# - slot_was_set:
# - requested_slot: null
# - action: buy_financial_products
5 changes: 5 additions & 0 deletions domain.yml
Original file line number Diff line number Diff line change
@@ -35,6 +35,10 @@ intents:
- thank
- faq
- want_to_buy_finance
- finance_detail
- want_to_detail
- want_to_other_recommand
- want_to_purchase
entities:
- amount-of-money
- credit_card
@@ -51,6 +55,7 @@ entities:
- song_title
- product
- finance_product
- finance_type
slots:
AA_CONTINUE_FORM:
type: any
7 changes: 6 additions & 1 deletion endpoints.yml
Original file line number Diff line number Diff line change
@@ -11,8 +11,13 @@ version: "2.0"
# Server which runs your custom actions.
# https://rasa.com/docs/rasa/core/actions/#custom-actions/



action_endpoint:
url: "http://localhost:5056/webhook"
# url: "http://localhost:5056/webhook" #### wsl无法访问到localhost
# url: "http://172.18.160.1:5056/webhook" #### cat /etc/resolv.conf|grep nameserver|awk '{print $2}'
url: "http://192.168.0.109:5056/webhook" #### 或直接用对外ip
# url: "http://192.168.8.137:5056/webhook" #### 或直接用对外ip

# Tracker store which is used to store the conversations.
# By default the conversations are stored in memory.
44 changes: 44 additions & 0 deletions tests/base_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description: 文本语义相似度计算和文本匹配搜索
"""
import sys

sys.path.append('..')
from similarities import Similarity

# 1.Compute cosine similarity between two sentences.
sentences = ['如何更换花呗绑定银行卡',
'花呗更改绑定银行卡']
corpus = [
'花呗更改绑定银行卡',
'我什么时候开通了花呗',
'俄罗斯警告乌克兰反对欧盟协议',
'暴风雨掩埋了东北部;新泽西16英寸的降雪',
'中央情报局局长访问以色列叙利亚会谈',
'人在巴基斯坦基地的炸弹袭击中丧生',
]
model = Similarity(model_name_or_path="shibing624/text2vec-base-chinese")
print(model)
similarity_score = model.similarity(sentences[0], sentences[1])
print(f"{sentences[0]} vs {sentences[1]}, score: {float(similarity_score):.4f}")

print('-' * 50 + '\n')
# 2.Compute similarity between two list
similarity_scores = model.similarity(sentences, corpus)
print(similarity_scores.numpy())
for i in range(len(sentences)):
for j in range(len(corpus)):
print(f"{sentences[i]} vs {corpus[j]}, score: {similarity_scores.numpy()[i][j]:.4f}")

print('-' * 50 + '\n')
# 3.Semantic Search
model.add_corpus(corpus)
res = model.most_similar(queries=sentences, topn=3)
print(res)
for q_id, c in res.items():
print('query:', sentences[q_id])
print("search top 3:")
for corpus_id, s in c.items():
print(f'\t{model.corpus[corpus_id]}: {s:.4f}')
138 changes: 138 additions & 0 deletions tests/spacy_similarities_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
import spacy

from flask import Flask, request
from flask_restful import Resource, Api, reqparse
import logging

from logging.config import dictConfig

dictConfig({
'version': 1,
'formatters': {'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}},
'handlers': {'wsgi': {
'class': 'logging.StreamHandler',
'stream': 'ext://flask.logging.wsgi_errors_stream',
'formatter': 'default'
}},
'root': {
'level': 'INFO',
'handlers': ['wsgi']
}
})

app = Flask(__name__)
api = Api(app)

@app.route('/')
def testing():
return 'testing app'


app.logger.info('LOADING SPACY MODEL')
nlp = spacy.load('en_core_web_sm')
app.logger.info('LOADING USEM')
module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3'

model = hub.load(module_url)
def embed(input):
return model(input)

app.logger.info('READY APP')



parser_answer = reqparse.RequestParser()
parser_answer.add_argument('sentences')

def sentences_similarity(sentences, mode='usem'):

similarities = []

if mode == 'usem':
embeding = embed(sentences)
corr = np.inner(embeding, embeding)
for n,s in enumerate(corr):
for q in s[n+1:]:

app.logger.info('corr {}'.format(float(q)))
similarities.append(float(q))
if mode == 'spacy':
for n,s in enumerate(sentences):
for q in sentences[n+1:]:
token_s = nlp(s)
token_q = nlp(q)
simil = token_s.similarity(token_q)
app.logger.info('simil {}'.format(float(simil)))
similarities.append(float(simil))

return {'error': False, 'similarity': similarities}

class SimilarityTF(Resource):
def get(self):
return {"error": True, "message": 'not_implemented'}

def post(self):

app.logger.info('SIMILARITY {}'.format('POST CALLED'))

json_sentences = request.get_json(force=True)

try:
sentences = json_sentences['sentences']

if len(sentences) < 1 and isinstance(sentences, list):
return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'}
error = sentences_similarity(sentences)

if error['error'] == False:

return {'error': False, 'similarity': error['similarity'] }
else:
return error

except Exception as e:
app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e))
return {'error': True, 'message': 'ERROR_PARSER'}

class SimilaritySP(Resource):
def get(self):
return {"error": True, "message": 'not_implemented'}

def post(self):

app.logger.info('SIMILARITY {}'.format('POST CALLED'))

json_sentences = request.get_json(force=True)

try:
sentences = json_sentences['sentences']

if len(sentences) < 1 and isinstance(sentences, list):
return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'}
error = sentences_similarity(sentences, mode='spacy')

if error['error'] == False:

return {'error': False, 'similarity': error['similarity'] }
else:
return error

except Exception as e:
app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e))
return {'error': True, 'message': 'ERROR_PARSER'}


'''
curl -d '{"sentences": ["hello world", "hello world"]}' -H 'Content-Type: application/json' -X POST localhost:8000/get_similarity/
'''
api.add_resource(SimilarityTF, '/get_similarity_tf/')
api.add_resource(SimilaritySP, '/get_similarity_sp/')

if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
40 changes: 40 additions & 0 deletions tests/spacy_similarities_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import datetime

import loguru
import numpy as np
import spacy
from loguru import logger

logger.info('spacy model启动加载')
time1 = datetime.datetime.now()
nlp = spacy.load('zh_core_web_md')
time2 = datetime.datetime.now()
logger.info(f'spacy model加载完成,耗时{time2 - time1}秒')


def sentences_similarity(sentences, corpus, topk=3, min_simil=0):
similarities = []
for n, s in enumerate(sentences):
for q in corpus:
token_s = nlp(s)
token_q = nlp(q)
simil = token_s.similarity(token_q)
# logger.info('simil {}'.format(float(simil)))
similarities.append({'q': q, 'simil': float(simil)})
similarities = filter(lambda x: x['simil'] >= min_simil, similarities)
similarities = sorted(similarities, key=lambda x: x['simil'], reverse=True)
logger.info(similarities[:topk])
return similarities[:topk]


if __name__ == '__main__':
sentences = ['如何更换花呗绑定银行卡']
corpus = [
'花呗更改绑定银行卡',
'我什么时候开通了花呗',
'俄罗斯警告乌克兰反对欧盟协议',
'暴风雨掩埋了东北部;新泽西16英寸的降雪',
'中央情报局局长访问以色列叙利亚会谈',
'人在巴基斯坦基地的炸弹袭击中丧生',
]
sentences_similarity(sentences, corpus)
2 changes: 2 additions & 0 deletions windows_train.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.\venv\Scripts\activate
rasa train

0 comments on commit 1a4e560

Please sign in to comment.