Skip to content

Commit

Permalink
多模态的支持
Browse files Browse the repository at this point in the history
+ 修复多个bug:消息框换行及空格问题、语音识别优化;
+ 彩蛋转正,Fay沟通与ChatGPT并行;
+ 加入yolov8姿态识别;
+ 加入VisualGLM-6B多模态单机离线大语言模型。
  • Loading branch information
xszyou committed May 27, 2023
1 parent 65884af commit ae1d2ae
Show file tree
Hide file tree
Showing 16 changed files with 310 additions and 48 deletions.
42 changes: 28 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Fay数字人助理版是fay开源项目的重要分支,专注于构建智能

## **推荐集成**

集成VisualGLM:B站视频

给Fay加上本地免费语音识别(达摩院funaar): https://www.bilibili.com/video/BV1qs4y1g74e/?share_source=copy_web&vd_source=64cd9062f5046acba398177b62bea9ad

消费级pc大模型(ChatGLM-6B的基础上前置Rasa会话管理):https://m.bilibili.com/video/BV1D14y1f7pr
Expand Down Expand Up @@ -39,7 +41,7 @@ UE5工程:https://github.com/xszyou/fay-ue5

控制器与采用 WebSocket 方式与 UE 通讯

![](images/cs.png)
![](images/UE.png)

下载工程: [https://pan.baidu.com/s/1RBo2Pie6A5yTrCf1cn_Tuw?pwd=ck99](https://pan.baidu.com/s/1RBo2Pie6A5yTrCf1cn_Tuw?pwd=ck99)

Expand Down Expand Up @@ -92,8 +94,11 @@ UE5工程:https://github.com/xszyou/fay-ue5
│   ├── ms_tts_sdk.py # 微软 文本转语音
│   ├── xf_aiui.py # 讯飞 人机交互-自然语言处理
│   ├── chatgpt.py # gpt3.5对接
│   ├── yuan_1_0.py # 浪潮.源大模型对接
│   ├── nlp_rasa.py # ChatGLM-6B的基础上前置Rasa会话管理(强烈推荐)
│   ├── nlp_gpt.py # 对接chat.openai.com(免key)
│   ├── yuan_1_0.py # 浪潮.源大模型对接
│   ├── nlp_rasa.py # ChatGLM-6B的基础上前置Rasa会话管理(强烈推荐)
│   ├── nlp_VisualGLM.py # 对接多模态大语言模型VisualGLM-6B
│   ├── yolov8.py # yolov8资态识别
│   └── xf_ltp.py # 讯飞 情感分析
├── bin # 可执行文件目录
├── core # 数字人核心
Expand All @@ -109,28 +114,36 @@ UE5工程:https://github.com/xszyou/fay-ue5
│   └── window.py # 窗口模块
├── scheduler
│   └── thread_manager.py # 调度管理器
── utils # 工具模块
── utils # 工具模块
├── config_util.py
├── storer.py
└── util.py
└── test # 都是惊喜
```


## **三、升级日志**

**2023.05.27:**

+ 修复多个bug:消息框换行及空格问题、语音识别优化;
+ 彩蛋转正,Fay沟通与ChatGPT并行;
+ 加入yolov8姿态识别;
+ 加入VisualGLM-6B多模态单机离线大语言模型。

**2023.05.12:**

+ 打出Fay数字人助理版作为主分支(带货版移到分支[`fay-sales-edition`](https://github.com/TheRamU/Fay/tree/fay-sales-edition));
+ 添加Fay助理的文字沟通窗口(文字与语音同步);
+ 添加沟通记录本地保存功能;
+ 升级ChatGLM-6B的应用逻辑,长文本与语音回复分享;
+ 升级ChatGLM-6B的应用逻辑,长文本与语音回复分离。


## **四、安装说明**


### **环境**
- Python 3.8、3.9、3.10
- Python 3.9、3.10
- Windows、macos、linux

### **安装依赖**
Expand All @@ -155,15 +168,16 @@ python main.py

| 代码模块 | 描述 | 链接 |
| ------------------------- | -------------------------- | ------------------------------------------------------------ |
| ./ai_module/ali_nls.py | 实时语音识别(免费3个月,asr二选一) | https://ai.aliyun.com/nls/trans |
| ./ai_module/funasr.py | 达摩院开源免费本地asr (asr二选一) | fay/test/funasr/README.MD |
| ./ai_module/ms_tts_sdk.py | 微软 文本转情绪语音(可选) | https://azure.microsoft.com/zh-cn/services/cognitive-services/text-to-speech/ |
| ./ai_module/ali_nls.py | 实时语音识别(非必须,免费3个月,asr二选一) | https://ai.aliyun.com/nls/trans |
| ./ai_module/funasr.py | 达摩院开源免费本地asr (非必须,asr二选一) | fay/test/funasr/README.MD |
| ./ai_module/ms_tts_sdk.py | 微软 文本转情绪语音(非必须,不配置时使用免费的edge-tts) | https://azure.microsoft.com/zh-cn/services/cognitive-services/text-to-speech/ |
| ./ai_module/xf_ltp.py | 讯飞 情感分析 | https://www.xfyun.cn/service/emotion-analysis |
| ./utils/ngrok_util.py | ngrok.cc 外网穿透(可选) | http://ngrok.cc |
| ./ai_module/yuan_1_0.py | 浪潮源大模型(NLP 4选1) | https://air.inspur.com/ |
| ./ai_module/chatgpt.py | ChatGPT(NLP 4选1) | ******* |
| ./ai_module/xf_aiui.py | 讯飞自然语言处理(NLP 4选1) | https://aiui.xfyun.cn/solution/webapi |
| ./ai_module/nlp_rasa.py | ChatGLM-6B的基础上前置Rasa会话管理(NLP 4选1) | https://m.bilibili.com/video/BV1D14y1f7pr |
| ./ai_module/yuan_1_0.py | 浪潮源大模型(NLP 多选1) | https://air.inspur.com/ |
| ./ai_module/chatgpt.py | ChatGPT(NLP多选1) | ******* |
| ./ai_module/xf_aiui.py | 讯飞自然语言处理(NLP多选1) | https://aiui.xfyun.cn/solution/webapi |
| ./ai_module/nlp_rasa.py | ChatGLM-6B的基础上前置Rasa会话管理(NLP 多选1) | https://m.bilibili.com/video/BV1D14y1f7pr |
| ./ai_module/nlp_VisualGLM.py | 对接VisualGLM-6B多模态单机离线大语言模型(NLP 多选1) | B站视频 |



Expand Down Expand Up @@ -228,7 +242,7 @@ python main.py
商务联系QQ 467665317,我们提供:开发顾问、数字人模型定制及高校教学资源实施服务
http://yafrm.com/forum.php?mod=viewthread&tid=302

关注公众号获取最新微信技术交流群二维码**请先star本仓库**
关注公众号(fay数字人)获取最新微信技术交流群二维码**请先star本仓库**

![](images/gzh.jpg)

Expand Down
37 changes: 37 additions & 0 deletions ai_module/nlp_VisualGLM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
这是对于清华智谱VisualGLM-6B的代码,在使用前请先安装并启动好VisualGLM-6B.
https://github.com/THUDM/VisualGLM-6B
"""
import json
import requests
import uuid
import os
import cv2
from ai_module import yolov8

# Initialize an empty history list
communication_history = []

def question(cont):
if not yolov8.new_instance().get_status():
return "请先启动“Fay Eyes”"
content = {
"text":cont,
"history":communication_history}
img = yolov8.new_instance().get_img()
if yolov8.new_instance().get_status() and img is not None:
filename = str(uuid.uuid4()) + ".jpg"
current_working_directory = os.getcwd()
filepath = os.path.join(current_working_directory, "data", filename)
cv2.imwrite(filepath, img)
content["image"] = filepath
url = "http://127.0.0.1:8080"
print(content)
req = json.dumps(content)
headers = {'content-type': 'application/json'}
r = requests.post(url, headers=headers, data=req)

# Save this conversation to history
communication_history.append([cont, r.text])

return r.text + "\n(相片:" + filepath + ")"
13 changes: 11 additions & 2 deletions ai_module/nlp_gpt.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from revChatGPT.V1 import Chatbot
from core.content_db import Content_Db
from utils import config_util as cfg
import time

count = 0
def question(cont):
global count
try:
chatbot = Chatbot(config={
"access_token": cfg.key_gpt_access_token,
"paid": False,
"collect_analytics": True,
"model": "gpt-4",
"conversation_id":cfg.key_gpt_conversation_id
},conversation_id=cfg.key_gpt_conversation_id,
parent_id=None)
Expand All @@ -16,6 +20,11 @@ def question(cont):
response = ""
for data in chatbot.ask(prompt):
response = data["message"]
count = 0
return response
except:
return 'gpt当前繁忙,请稍后重试'
except Exception as e:
count += 1
if count < 3:
time.sleep(15)
return question(cont)
return 'gpt当前繁忙,请稍后重试' + e
146 changes: 146 additions & 0 deletions ai_module/yolov8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from ultralytics import YOLO
from scipy.spatial import procrustes
import numpy as np
import cv2
import time
from scheduler.thread_manager import MyThread

__fei_eyes = None
class FeiEyes:

def __init__(self):

"""
鼻子(0)
左眼(1),右眼(2)
左耳(3),右耳(4)
左肩(5),右肩(6)
左肘(7),右肘(8)
左腕(9),右腕(10)
左髋(11),右髋(12)
左膝(13),右膝(14)
左脚踝(15),右脚踝(16)
"""
self.POSE_PAIRS = [
(3, 5), (5, 6), # upper body
(5, 7), (6, 8), (7, 9), (8, 10), # lower body
(11, 12), (11, 13), (12, 14), (13, 15) # arms
]
self.my_face = np.array([[154.4565, 193.7006],
[181.8575, 164.8366],
[117.1820, 164.3602],
[213.5605, 193.0460],
[ 62.7056, 193.5217]])
self.is_running = False
self.img = None

def is_sitting(self,keypoints):
left_hip, right_hip = keypoints[11][:2], keypoints[12][:2]
left_knee, right_knee = keypoints[13][:2], keypoints[14][:2]
left_ankle, right_ankle = keypoints[15][:2], keypoints[16][:2]

# 髋部和膝盖的平均位置
hip_knee_y = (left_hip[1] + right_hip[1] + left_knee[1] + right_knee[1]) / 4

# 膝盖和脚踝的平均位置
knee_ankle_y = (left_knee[1] + right_knee[1] + left_ankle[1] + right_ankle[1]) / 4

# 如果髋部和膝盖的平均位置在膝盖和脚踝的平均位置上方,判定为坐着
return hip_knee_y < knee_ankle_y

def is_standing(self,keypoints):
head = keypoints[0][:2]
left_ankle, right_ankle = keypoints[15][:2], keypoints[16][:2]
# 头部位置较高且脚部与地面接触
if head[1] > left_ankle[1] and head[1] > right_ankle[1]:
return True
else:
return False

def get_counts(self):
if not self.is_running:
return 0,0,0
return self.person_count, self.stand_count, self.sit_count

def get_status(self):
return self.is_running

def get_img(self):
if self.is_running:
return self.img
else:
return None

def start(self):
cap = cv2.VideoCapture(0)
if cap.isOpened():
self.is_running = True
MyThread(target=self.run, args=[cap]).start()

def stop(self):
self.is_running = False

def run(self, cap):
model = YOLO("yolov8n-pose.pt")
while self.is_running:
time.sleep(0.033)
ret, frame = cap.read()
self.img = frame
operated_frame = frame.copy()
if not ret:
break
results = model.predict(operated_frame, verbose=False)
person_count = 0
sit_count = 0
stand_count = 0
for res in results: # loop over results
for box, cls in zip(res.boxes.xyxy, res.boxes.cls): # loop over detections
x1, y1, x2, y2 = box
cv2.rectangle(operated_frame, (int(x1.item()), int(y1.item())), (int(x2.item()), int(y2.item())), (0, 255, 0), 2)
cv2.putText(operated_frame, f"{res.names[int(cls.item())]}", (int(x1.item()), int(y1.item()) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
if res.keypoints is not None and res.keypoints.size(0) > 0: # check if keypoints exist
keypoints = res.keypoints[0]

#TODO人脸相似性的比较,待优化
keypoints_np = keypoints[0:5].cpu().numpy()
mtx1, mtx2, disparity = procrustes(keypoints_np[:, :2], self.my_face)
#总人数
person_count += 1
#坐着的人数
if self.is_sitting(keypoints):
sit_count += 1
#站着的人数
elif self.is_standing(keypoints):
stand_count += 1

for keypoint in keypoints: # loop over keypoints
x, y, conf = keypoint
if conf > 0.5: # draw keypoints with confidence greater than 0.5
cv2.circle(operated_frame, (int(x.item()), int(y.item())), 3, (0, 0, 255), -1)

# Draw lines connecting keypoints
for pair in self.POSE_PAIRS:
pt1, pt2 = keypoints[pair[0]][:2], keypoints[pair[1]][:2]
conf1, conf2 = keypoints[pair[0]][2], keypoints[pair[1]][2]
if conf1 > 0.5 and conf2 > 0.5:
# cv2.line(operated_frame, (int(pt1[0].item()), int(pt1[1].item())), (int(pt2[0].item()), int(pt2[1].item())), (255, 255, 0), 2)
pass
self.person_count = person_count
self.sit_count = sit_count
self.stand_count = stand_count
cv2.imshow("YOLO v8 Fay Eyes", operated_frame)
cv2.waitKey(1)

cap.release()
cv2.destroyAllWindows()


def new_instance():
global __fei_eyes
if __fei_eyes is None:
__fei_eyes = FeiEyes()
return __fei_eyes




29 changes: 22 additions & 7 deletions core/fay_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
from datetime import datetime
from ai_module import nlp_rasa
from ai_module import nlp_gpt
from ai_module import yolov8
from ai_module import nlp_VisualGLM as VisualGLM

#文本消息处理
def send_for_answer(msg,sendto):
contentdb = Content_Db()
contentdb.add_content('member','send',msg)
contentdb.add_content('member','send', msg)
text = ''
textlist = []
try:
Expand All @@ -53,7 +55,8 @@ def send_for_answer(msg,sendto):
elif cfg.key_chat_module == 'rasa':
textlist = nlp_rasa.question(msg)
text = textlist[0]['text']

elif cfg.key_chat_module == "VisualGLM":
text = VisualGLM.question(msg)

else:
raise RuntimeError('讯飞key、yuan key、chatgpt key都没有配置!')
Expand Down Expand Up @@ -289,12 +292,22 @@ def __auto_speak(self):
# self.__isExecute = True #!!!!

if index == 1:
fay_eyes = yolov8.new_instance()
if fay_eyes.get_status():#YOLO正在运行
person_count, stand_count, sit_count = fay_eyes.get_counts()
if person_count != 1: #不是有且只有一个人,不互动
wsa_server.get_web_instance().add_cmd({"panelMsg": "不是有且只有一个人,不互动"})
continue

answer = self.__get_answer(interact.interleaver, self.q_msg)
if(self.muting): #静音指令正在执行
wsa_server.get_web_instance().add_cmd({"panelMsg": "静音指令正在执行,不互动"})
continue

contentdb = Content_Db()
contentdb.add_content('member','speak',self.q_msg)
wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"member","content":self.q_msg}})
answer = self.__get_answer(interact.interleaver, self.q_msg)
if self.muting:
continue

text = ''
textlist = []
if answer is None:
Expand All @@ -312,6 +325,9 @@ def __auto_speak(self):
elif cfg.key_chat_module == 'rasa':
textlist = nlp_rasa.question(self.q_msg)
text = textlist[0]['text']
elif cfg.key_chat_module == "VisualGLM":
text = VisualGLM.question(self.q_msg)

else:
raise RuntimeError('讯飞key、yuan key、chatgpt key都没有配置!')
util.log(1, '自然语言处理完成. 耗时: {} ms'.format(math.floor((time.time() - tm) * 1000)))
Expand Down Expand Up @@ -593,11 +609,10 @@ def __send_audio(self, file_url, say_type):


wsa_server.get_web_instance().add_cmd({"panelMsg": self.a_msg})

time.sleep(audio_length + 0.5)
wsa_server.get_web_instance().add_cmd({"panelMsg": ""})
if config_util.config["interact"]["playSound"]:
util.log(1, '结束播放!')
time.sleep(audio_length + 0.5)
self.speaking = False
except Exception as e:
print(e)
Expand Down
Loading

0 comments on commit ae1d2ae

Please sign in to comment.