Skip to content

Commit

Permalink
Merge pull request #385 from wanggang1987/google_voice
Browse files Browse the repository at this point in the history
Voice support
  • Loading branch information
zhayujie authored Mar 8, 2023
2 parents 51f0b89 + d02508d commit 99b4700
Show file tree
Hide file tree
Showing 12 changed files with 254 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ venv*
config.json
QR.png
nohup.out
tmp
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ cd chatgpt-on-wechat/
```bash
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip chardet
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
--在Windows中安装ffmpeg,下载ffmpeg.exe
--在Linux中安装ffmpeg,apt-get install ffmpeg espeak
```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。

Expand Down Expand Up @@ -112,7 +120,11 @@ cp config-template.json config.json
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix`
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))

**3.其他配置**
**3.语音识别**
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件。使用wechaty则回复的是微信语音。

**4.其他配置**

+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix `
Expand Down
7 changes: 7 additions & 0 deletions bridge/bridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bot import bot_factory
from voice import voice_factory


class Bridge(object):
Expand All @@ -7,3 +8,9 @@ def __init__(self):

def fetch_reply_content(self, query, context):
return bot_factory.create_bot("chatGPT").reply(query, context)

def fetch_voice_to_text(self, voiceFile):
return voice_factory.create_voice("openai").voiceToText(voiceFile)

def fetch_text_to_voice(self, text):
return voice_factory.create_voice("baidu").textToVoice(text)
8 changes: 7 additions & 1 deletion channel/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def startup(self):
"""
raise NotImplementedError

def handle(self, msg):
def handle_text(self, msg):
"""
process received msg
:param msg: message object
Expand All @@ -29,3 +29,9 @@ def send(self, msg, receiver):

def build_reply_content(self, query, context=None):
return Bridge().fetch_reply_content(query, context)

def build_voice_to_text(self, voice_file):
return Bridge().fetch_voice_to_text(voice_file)

def build_text_to_voice(self, text):
return Bridge().fetch_text_to_voice(text)
62 changes: 51 additions & 11 deletions channel/wechat/wechat_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
"""
wechat channel
"""

import itchat
import json
from itchat.content import *
from channel.channel import Channel
from concurrent.futures import ThreadPoolExecutor
from common.log import logger
from common.tmp_dir import TmpDir
from config import conf
import requests
import io
Expand All @@ -18,7 +20,7 @@

@itchat.msg_register(TEXT)
def handler_single_msg(msg):
WechatChannel().handle(msg)
WechatChannel().handle_text(msg)
return None


Expand All @@ -28,6 +30,12 @@ def handler_group_msg(msg):
return None


@itchat.msg_register(VOICE)
def handler_single_voice(msg):
WechatChannel().handle_voice(msg)
return None


class WechatChannel(Channel):
def __init__(self):
pass
Expand All @@ -39,12 +47,27 @@ def startup(self):
# start message listener
itchat.run()

def handle(self, msg):
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
def handle_voice(self, msg):
if conf().get('speech_recognition') != True :
return
logger.debug("[WX]receive voice msg: " + msg['FileName'])
thread_pool.submit(self._do_handle_voice, msg)

def _do_handle_voice(self, msg):
fileName = TmpDir().path() + msg['FileName']
msg.download(fileName)
content = super().build_voice_to_text(fileName)
self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))

def handle_text(self, msg):
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
content = msg['Text']
self._handle_single_msg(msg, content, False)

def _handle_single_msg(self, msg, content, reply_voice=False):
from_user_id = msg['FromUserName']
to_user_id = msg['ToUserName'] # 接收人id
other_user_id = msg['User']['UserName'] # 对手方id
content = msg['Text']
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
if "」\n- - - - - - - - - - - - - - -" in content:
logger.debug("[WX]reference query skipped")
Expand All @@ -60,9 +83,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, from_user_id)
else:
thread_pool.submit(self._do_send, content, from_user_id)

elif reply_voice:
thread_pool.submit(self._do_send_voice, content, from_user_id)
else :
thread_pool.submit(self._do_send_text, content, from_user_id)
elif to_user_id == other_user_id and match_prefix:
# 自己给好友发送消息
str_list = content.split(match_prefix, 1)
Expand All @@ -72,8 +96,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, to_user_id)
elif reply_voice:
thread_pool.submit(self._do_send_voice, content, to_user_id)
else:
thread_pool.submit(self._do_send, content, to_user_id)
thread_pool.submit(self._do_send_text, content, to_user_id)


def handle_group(self, msg):
Expand Down Expand Up @@ -105,10 +131,24 @@ def handle_group(self, msg):
thread_pool.submit(self._do_send_group, content, msg)

def send(self, msg, receiver):
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
itchat.send(msg, toUserName=receiver)
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))

def _do_send(self, query, reply_user_id):
def _do_send_voice(self, query, reply_user_id):
try:
if not query:
return
context = dict()
context['from_user_id'] = reply_user_id
reply_text = super().build_reply_content(query, context)
if reply_text:
replyFile = super().build_text_to_voice(reply_text)
itchat.send_file(replyFile, toUserName=reply_user_id)
logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id))
except Exception as e:
logger.exception(e)

def _do_send_text(self, query, reply_user_id):
try:
if not query:
return
Expand Down Expand Up @@ -138,8 +178,8 @@ def _do_send_img(self, query, reply_user_id):
image_storage.seek(0)

# 图片发送
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
itchat.send_image(image_storage, reply_user_id)
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
except Exception as e:
logger.exception(e)

Expand Down
20 changes: 20 additions & 0 deletions common/tmp_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

import os
import pathlib
from config import conf


class TmpDir(object):
"""A temporary directory that is deleted when the object is destroyed.
"""

tmpFilePath = pathlib.Path('./tmp/')

def __init__(self):
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath)

def path(self):
return str(self.tmpFilePath) + '/'

5 changes: 5 additions & 0 deletions config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["", "", ""],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"voice_reply_voice": false,
"baidu_app_id": "YOUR BAIDU APP ID",
"baidu_api_key": "YOUR BAIDU API KEY",
"baidu_secret_key": "YOUR BAIDU SERVICE KEY",
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
}
36 changes: 36 additions & 0 deletions voice/baidu/baidu_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

"""
baidu voice service
"""
import time
from aip import AipSpeech
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from config import conf

class BaiduVoice(Voice):
APP_ID = conf().get('baidu_app_id')
API_KEY = conf().get('baidu_api_key')
SECRET_KEY = conf().get('baidu_secret_key')
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def __init__(self):
pass

def voiceToText(self, voice_file):
pass

def textToVoice(self, text):
result = self.client.synthesis(text, 'zh', 1, {
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
})
if not isinstance(result, dict):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
with open(fileName, 'wb') as f:
f.write(result)
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
return fileName
else:
logger.error('[Baidu] textToVoice error={}'.format(result))
return None
51 changes: 51 additions & 0 deletions voice/google/google_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

"""
google voice service
"""

import pathlib
import subprocess
import time
import speech_recognition
import pyttsx3
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice


class GoogleVoice(Voice):
recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init()

def __init__(self):
# 语速
self.engine.setProperty('rate', 125)
# 音量
self.engine.setProperty('volume', 1.0)
# 0为男声,1为女声
voices = self.engine.getProperty('voices')
self.engine.setProperty('voice', voices[1].id)

def voiceToText(self, voice_file):
new_file = voice_file.replace('.mp3', '.wav')
subprocess.call('ffmpeg -i ' + voice_file +
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
with speech_recognition.AudioFile(new_file) as source:
audio = self.recognizer.record(source)
try:
text = self.recognizer.recognize_google(audio, language='zh-CN')
logger.info(
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
return text
except speech_recognition.UnknownValueError:
return "抱歉,我听不懂。"
except speech_recognition.RequestError as e:
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)

def textToVoice(self, text):
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
self.engine.save_to_file(text, textFile)
self.engine.runAndWait()
logger.info(
'[Google] textToVoice text={} voice file name={}'.format(text, textFile))
return textFile
27 changes: 27 additions & 0 deletions voice/openai/openai_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

"""
google voice service
"""
import json
import openai
from config import conf
from common.log import logger
from voice.voice import Voice


class OpenaiVoice(Voice):
def __init__(self):
openai.api_key = conf().get('open_ai_api_key')

def voiceToText(self, voice_file):
logger.debug(
'[Openai] voice file name={}'.format(voice_file))
file = open(voice_file, "rb")
reply = openai.Audio.transcribe("whisper-1", file)
text = reply["text"]
logger.info(
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
return text

def textToVoice(self, text):
pass
16 changes: 16 additions & 0 deletions voice/voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Voice service abstract class
"""

class Voice(object):
def voiceToText(self, voice_file):
"""
Send voice to voice service and get text
"""
raise NotImplementedError

def textToVoice(self, text):
"""
Send text to voice service and get voice
"""
raise NotImplementedError
20 changes: 20 additions & 0 deletions voice/voice_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
voice factory
"""

def create_voice(voice_type):
"""
create a voice instance
:param voice_type: voice type code
:return: voice instance
"""
if voice_type == 'baidu':
from voice.baidu.baidu_voice import BaiduVoice
return BaiduVoice()
elif voice_type == 'google':
from voice.google.google_voice import GoogleVoice
return GoogleVoice()
elif voice_type == 'openai':
from voice.openai.openai_voice import OpenaiVoice
return OpenaiVoice()
raise RuntimeError

0 comments on commit 99b4700

Please sign in to comment.