增加语义相似度等一大堆打包push

wyt1234 · Apr 27, 2022 · 1a4e560 · 1a4e560
1 parent 06e1c1e
commit 1a4e560
Showing 12 changed files with 358 additions and 40 deletions.
diff --git a/actions/actions_finance.py b/actions/actions_finance.py
@@ -28,6 +28,33 @@
 
 from actions.custom_forms import CustomFormValidationAction
 
+import datetime
+
+import numpy as np
+import spacy
+from loguru import logger
+
+# spacy语义相似度
+time1 = datetime.datetime.now()
+nlp = spacy.load('zh_core_web_md')
+time2 = datetime.datetime.now()
+logger.info(f'spacy model加载完成，耗时{time2 - time1}秒')
+
+
+def sentences_similarity(sentences, corpus, topk=3, min_simil=0):
+    similarities = []
+    for n, s in enumerate(sentences):
+        for q in corpus:
+            token_s = nlp(s)
+            token_q = nlp(q)
+            simil = token_s.similarity(token_q)
+            # logger.info('simil {}'.format(float(simil)))
+            similarities.append({'q': q, 'simil': float(simil)})
+    similarities = filter(lambda x: x['simil'] >= min_simil, similarities)
+    similarities = sorted(similarities, key=lambda x: x['simil'], reverse=True)
+    logger.info(similarities[:topk])
+    return similarities[:topk]
+
 
 #####购买理财产品##########
 class ActionBuyFinancialProducts(Action):

diff --git a/actions/profile_db.py b/actions/profile_db.py
@@ -46,6 +46,16 @@
 Base = declarative_base()
 
 
+
+class Finance(Base):
+    __tablename__ = "finance"
+    id = Column(Integer, primary_key=True)
+    name = Column(String(255))
+    type = Column(String(255))
+    rate = Column(String(255))
+    minimum_amount = Column(String(255))
+    description = Column(String(255))
+
 class Account(Base):
     """Accounts table.
     `session_id` is only meaningful for accounts generated by conversation sessions,

diff --git a/data/nlu/faq.yml b/data/nlu/faq.yml
@@ -35,19 +35,28 @@ nlu:
     - tell me about the different parts of rasa
     - what are your features ?
     - what are the features does rasa have?
-- intent: want_to_buy_finance
+- intent: finance_detail
   examples: |
-    - 我想买理财
-    - 我想买一款理财产品
-    - 有什么样的理财产品可以购买
-    - 推荐一款理财产品
-    - 我想买[5万块](amount-of-money)的[鑫利贷](finance_product)
-    - 我要买[五千块](amount-of-money)的[天天赢理财](finance_product)
-    - 帮我买个理财产品吧
-    - 请帮我买一份[基金](finance_product)
-    - 给我推荐几款理财产品
-    - 有没有理财产品卖
-    - 我想买一份理财
-    - 我想买10元的[鑫利](finance_product)理财
-    - 我要买理财
-    - 买理财
+    - 能不能介绍一下这款理财产品
+    - 能仔细说说吗
+    - 这款产品的收益率是多少
+    - 这款产品的起购金额是多少
+- intent: want_to_detail
+  examples: |
+    - 能给我详细介绍下吗
+    - 介绍一下这款产品
+    - 详细情况说一下
+    - 有这款产品的其他介绍吗
+    - 了解一下这款产品
+    - 讲一讲这款产品
+    - 我想了解这款产品的具体情况
+    - 具体说说
+- intent: want_to_purchase
+  examples: |
+    - 好的我想下单
+    - 就买它了
+    - 行帮我下单吧
+    - 帮我买[1万](amount-of-money)块钱
+    - 这款产品请帮我买[2万](amount-of-money)块钱
+    - 好的下单吧
+    - 可以的直接买吧
diff --git a/data/nlu/general.yml b/data/nlu/general.yml
@@ -195,4 +195,9 @@ nlu:
     - [杭州](location)
     - 1
     - 1000
-    - 鑫利达
+    - [鑫利达](product)
+    - [2022年二十六期个人大额存单](product)
+    - [华安安康A](product)
+    - [日鑫系列天天盈B款](product)
+    - [国家开发银行2022年第二期金融债券](product)
+    - [利安永吉终身寿险](product)
diff --git a/data/nlu/nlu_fina.yml b/data/nlu/nlu_fina.yml
@@ -0,0 +1,33 @@
+version: "2.0"
+nlu:
+- intent: want_to_buy_finance
+  examples: |
+    - 我想买理财
+    - 我想买一款理财产品
+    - 有什么样的理财产品可以购买
+    - 推荐一款理财产品
+    - 我想买[5万块](amount-of-money)的[鑫利贷](finance_product)
+    - 我要买[五千块](amount-of-money)的[天天赢理财](finance_product)
+    - 帮我买个理财产品吧
+    - 给我推荐几款理财产品
+    - 有没有理财产品卖
+    - 我想买一份理财
+    - 我想买10元的[鑫利](finance_product)理财
+    - 我要买理财
+    - 买理财
+    - 还有什么理财产品？
+- intent: want_to_other_recommand
+  examples: |
+    - 请帮我买一份[基金](finance_product)
+    - 我想看一下股票型[基金](finance_type)
+    - 有[股票](finance_type)吗
+    - 有[基金](finance_type)吗
+    - 还有其他的吗
+    - 我想看看其他的
+    - 有其他推荐吗
+    - 有其他产品吗
+    - 有理财吗
+    - 有别的理财产品吗
+    - 有保险吗
+    - 有柜台债吗
+    - 有大额存单吗
diff --git a/data/rules/rules_purchase.yml b/data/rules/rules_purchase.yml
@@ -1,25 +1,25 @@
 version: "2.0"
 
-rules:
-  ####
-  - rule: Activate purchase finace when no other form is active
-    condition:
-      # this condition allows stories to handle form switching
-      - active_loop: null
-    steps:
-      - intent: want_to_buy_finance
-      - action: recommand_finance_product
-      - action: purchase_finance_form
-      - active_loop: purchase_finance_form
-
-  - rule: Submit purchase_finance_form while not switched from previous form
-    condition:
-      - active_loop: purchase_finance_form
-      - slot_was_set:
-          - previous_form_name: null
-    steps:
-      - action: purchase_finance_form
-      - active_loop: null
-      - slot_was_set:
-          - requested_slot: null
-      - action: buy_financial_products
+#rules:
+#  ####
+#  - rule: Activate purchase finace when no other form is active
+#    condition:
+#      # this condition allows stories to handle form switching
+#      - active_loop: null
+#    steps:
+#      - intent: want_to_buy_finance
+#      - action: recommand_finance_product
+#      - action: purchase_finance_form
+#      - active_loop: purchase_finance_form
+#
+#  - rule: Submit purchase_finance_form while not switched from previous form
+#    condition:
+#      - active_loop: purchase_finance_form
+#      - slot_was_set:
+#          - previous_form_name: null
+#    steps:
+#      - action: purchase_finance_form
+#      - active_loop: null
+#      - slot_was_set:
+#          - requested_slot: null
+#      - action: buy_financial_products
diff --git a/domain.yml b/domain.yml
@@ -35,6 +35,10 @@ intents:
 - thank
 - faq
 - want_to_buy_finance
+- finance_detail
+- want_to_detail
+- want_to_other_recommand
+- want_to_purchase
 entities:
 - amount-of-money
 - credit_card
@@ -51,6 +55,7 @@ entities:
 - song_title
 - product
 - finance_product
+- finance_type
 slots:
   AA_CONTINUE_FORM:
     type: any

diff --git a/endpoints.yml b/endpoints.yml
@@ -11,8 +11,13 @@ version: "2.0"
 # Server which runs your custom actions.
 # https://rasa.com/docs/rasa/core/actions/#custom-actions/
 
+
+
 action_endpoint:
- url: "http://localhost:5056/webhook"
+# url: "http://localhost:5056/webhook"        #### wsl无法访问到localhost
+# url: "http://172.18.160.1:5056/webhook"      #### cat /etc/resolv.conf|grep nameserver|awk '{print $2}'
+ url: "http://192.168.0.109:5056/webhook"    #### 或直接用对外ip
+# url: "http://192.168.8.137:5056/webhook"    #### 或直接用对外ip
 
 # Tracker store which is used to store the conversations.
 # By default the conversations are stored in memory.

diff --git a/tests/base_demo.py b/tests/base_demo.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""
+@author:XuMing(xuming624@qq.com)
+@description: 文本语义相似度计算和文本匹配搜索
+"""
+import sys
+
+sys.path.append('..')
+from similarities import Similarity
+
+# 1.Compute cosine similarity between two sentences.
+sentences = ['如何更换花呗绑定银行卡',
+             '花呗更改绑定银行卡']
+corpus = [
+    '花呗更改绑定银行卡',
+    '我什么时候开通了花呗',
+    '俄罗斯警告乌克兰反对欧盟协议',
+    '暴风雨掩埋了东北部；新泽西16英寸的降雪',
+    '中央情报局局长访问以色列叙利亚会谈',
+    '人在巴基斯坦基地的炸弹袭击中丧生',
+]
+model = Similarity(model_name_or_path="shibing624/text2vec-base-chinese")
+print(model)
+similarity_score = model.similarity(sentences[0], sentences[1])
+print(f"{sentences[0]} vs {sentences[1]}, score: {float(similarity_score):.4f}")
+
+print('-' * 50 + '\n')
+# 2.Compute similarity between two list
+similarity_scores = model.similarity(sentences, corpus)
+print(similarity_scores.numpy())
+for i in range(len(sentences)):
+    for j in range(len(corpus)):
+        print(f"{sentences[i]} vs {corpus[j]}, score: {similarity_scores.numpy()[i][j]:.4f}")
+
+print('-' * 50 + '\n')
+# 3.Semantic Search
+model.add_corpus(corpus)
+res = model.most_similar(queries=sentences, topn=3)
+print(res)
+for q_id, c in res.items():
+    print('query:', sentences[q_id])
+    print("search top 3:")
+    for corpus_id, s in c.items():
+        print(f'\t{model.corpus[corpus_id]}: {s:.4f}')
diff --git a/tests/spacy_similarities_app.py b/tests/spacy_similarities_app.py
@@ -0,0 +1,138 @@
+import numpy as np
+import tensorflow as tf
+import tensorflow_hub as hub
+import tensorflow_text
+import spacy
+
+from flask import Flask, request
+from flask_restful import Resource, Api, reqparse
+import logging
+
+from logging.config import dictConfig
+
+dictConfig({
+    'version': 1,
+    'formatters': {'default': {
+        'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
+    }},
+    'handlers': {'wsgi': {
+        'class': 'logging.StreamHandler',
+        'stream': 'ext://flask.logging.wsgi_errors_stream',
+        'formatter': 'default'
+    }},
+    'root': {
+        'level': 'INFO',
+        'handlers': ['wsgi']
+    }
+})
+
+app = Flask(__name__)
+api = Api(app)
+
+@app.route('/')
+def testing():
+    return 'testing app'
+
+
+app.logger.info('LOADING SPACY MODEL')
+nlp = spacy.load('en_core_web_sm')
+app.logger.info('LOADING USEM')
+module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3'
+
+model = hub.load(module_url)
+def embed(input):
+    return model(input)
+
+app.logger.info('READY APP')
+
+
+
+parser_answer = reqparse.RequestParser()
+parser_answer.add_argument('sentences')
+
+def sentences_similarity(sentences, mode='usem'):
+
+    similarities = []
+
+    if mode == 'usem':
+        embeding = embed(sentences)
+        corr = np.inner(embeding, embeding)
+        for n,s in enumerate(corr):
+            for q in s[n+1:]:
+
+                app.logger.info('corr {}'.format(float(q)))
+                similarities.append(float(q))
+    if mode == 'spacy':
+        for n,s in enumerate(sentences):
+            for q in sentences[n+1:]:
+                token_s = nlp(s)
+                token_q = nlp(q)
+                simil = token_s.similarity(token_q) 
+                app.logger.info('simil {}'.format(float(simil)))
+                similarities.append(float(simil))
+
+    return {'error': False, 'similarity': similarities}
+
+class SimilarityTF(Resource):
+    def get(self):
+        return {"error": True, "message": 'not_implemented'}
+
+    def post(self):
+
+        app.logger.info('SIMILARITY {}'.format('POST CALLED'))
+
+        json_sentences = request.get_json(force=True)
+
+        try:
+            sentences = json_sentences['sentences']
+
+            if len(sentences) < 1 and isinstance(sentences, list):
+                return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'}
+            error = sentences_similarity(sentences)
+
+            if error['error'] == False:
+
+                return {'error': False, 'similarity': error['similarity'] }
+            else:
+                return error
+
+        except Exception as e:
+            app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e))
+            return {'error': True, 'message': 'ERROR_PARSER'}
+
+class SimilaritySP(Resource):
+    def get(self):
+        return {"error": True, "message": 'not_implemented'}
+
+    def post(self):
+
+        app.logger.info('SIMILARITY {}'.format('POST CALLED'))
+
+        json_sentences = request.get_json(force=True)
+
+        try:
+            sentences = json_sentences['sentences']
+
+            if len(sentences) < 1 and isinstance(sentences, list):
+                return {'error': True, 'message': 'NOT_ENOUGH_SENTENCES'}
+            error = sentences_similarity(sentences, mode='spacy')
+
+            if error['error'] == False:
+
+                return {'error': False, 'similarity': error['similarity'] }
+            else:
+                return error
+
+        except Exception as e:
+            app.logger.info('SIMILARITY ERROR {} {}'.format('POST PARSER', e))
+            return {'error': True, 'message': 'ERROR_PARSER'}
+
+
+'''
+curl -d '{"sentences": ["hello world", "hello world"]}' -H 'Content-Type: application/json' -X POST localhost:8000/get_similarity/
+'''
+api.add_resource(SimilarityTF, '/get_similarity_tf/')
+api.add_resource(SimilaritySP, '/get_similarity_sp/')
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/tests/spacy_similarities_search.py b/tests/spacy_similarities_search.py
@@ -0,0 +1,40 @@
+import datetime
+
+import loguru
+import numpy as np
+import spacy
+from loguru import logger
+
+logger.info('spacy model启动加载')
+time1 = datetime.datetime.now()
+nlp = spacy.load('zh_core_web_md')
+time2 = datetime.datetime.now()
+logger.info(f'spacy model加载完成，耗时{time2 - time1}秒')
+
+
+def sentences_similarity(sentences, corpus, topk=3, min_simil=0):
+    similarities = []
+    for n, s in enumerate(sentences):
+        for q in corpus:
+            token_s = nlp(s)
+            token_q = nlp(q)
+            simil = token_s.similarity(token_q)
+            # logger.info('simil {}'.format(float(simil)))
+            similarities.append({'q': q, 'simil': float(simil)})
+    similarities = filter(lambda x: x['simil'] >= min_simil, similarities)
+    similarities = sorted(similarities, key=lambda x: x['simil'], reverse=True)
+    logger.info(similarities[:topk])
+    return similarities[:topk]
+
+
+if __name__ == '__main__':
+    sentences = ['如何更换花呗绑定银行卡']
+    corpus = [
+        '花呗更改绑定银行卡',
+        '我什么时候开通了花呗',
+        '俄罗斯警告乌克兰反对欧盟协议',
+        '暴风雨掩埋了东北部；新泽西16英寸的降雪',
+        '中央情报局局长访问以色列叙利亚会谈',
+        '人在巴基斯坦基地的炸弹袭击中丧生',
+    ]
+    sentences_similarity(sentences, corpus)
diff --git a/windows_train.cmd b/windows_train.cmd
@@ -0,0 +1,2 @@
+.\venv\Scripts\activate
+rasa train