diff --git a/bot/chatgpt/chat_gpt_session.py b/bot/chatgpt/chat_gpt_session.py index d39d76955..f7ff12fd8 100644 --- a/bot/chatgpt/chat_gpt_session.py +++ b/bot/chatgpt/chat_gpt_session.py @@ -67,7 +67,7 @@ def num_tokens_from_messages(messages, model): elif model in ["gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", "gpt-4-turbo-preview", "gpt-4-1106-preview", const.GPT4_TURBO_PREVIEW, const.GPT4_VISION_PREVIEW, const.GPT4_TURBO_01_25, - const.GPT_4o, const.LINKAI_4o, const.LINKAI_4_TURBO]: + const.GPT_4o, const.GPT_4o_MINI, const.LINKAI_4o, const.LINKAI_4_TURBO]: return num_tokens_from_messages(messages, model="gpt-4") elif model.startswith("claude-3"): return num_tokens_from_messages(messages, model="gpt-3.5-turbo") diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index 3fe813164..95c514dae 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -399,6 +399,7 @@ def _send_image(self, channel, context, image_urls): return max_send_num = conf().get("max_media_send_count") send_interval = conf().get("media_send_interval") + file_type = (".pdf", ".doc", ".docx", ".csv", ".xls", ".xlsx", ".txt", ".rtf", ".ppt", ".pptx") try: i = 0 for url in image_urls: @@ -407,7 +408,7 @@ def _send_image(self, channel, context, image_urls): i += 1 if url.endswith(".mp4"): reply_type = ReplyType.VIDEO_URL - elif url.endswith(".pdf") or url.endswith(".doc") or url.endswith(".docx") or url.endswith(".csv"): + elif url.endswith(file_type): reply_type = ReplyType.FILE url = _download_file(url) if not url: diff --git a/common/const.py b/common/const.py index 45e72e708..68d3795cd 100644 --- a/common/const.py +++ b/common/const.py @@ -32,6 +32,7 @@ GPT4_VISION_PREVIEW = "gpt-4-vision-preview" GPT4 = "gpt-4" +GPT_4o_MINI = "gpt-4o-mini" GPT4_32k = "gpt-4-32k" GPT4_06_13 = "gpt-4-0613" GPT4_32k_06_13 = "gpt-4-32k-0613" @@ -57,7 +58,7 @@ MODEL_LIST = [ GPT35, GPT35_0125, GPT35_1106, "gpt-3.5-turbo-16k", - GPT_4o, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13, + GPT_4o, GPT_4o_MINI, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13, WEN_XIN, WEN_XIN_4, XUNFEI, ZHIPU_AI, MOONSHOT, MiniMax, GEMINI, GEMINI_PRO, GEMINI_15_flash, GEMINI_15_PRO, diff --git a/config.py b/config.py index cd778f0c0..bc87fa4d0 100644 --- a/config.py +++ b/config.py @@ -17,7 +17,7 @@ "open_ai_api_base": "https://api.openai.com/v1", "proxy": "", # openai使用的代理 # chatgpt模型, 当use_azure_chatgpt为true时,其名称为Azure上model deployment名称 - "model": "gpt-3.5-turbo", # 可选择: gpt-4o, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型,全部可选模型详见common/const.py文件 + "model": "gpt-3.5-turbo", # 可选择: gpt-4o, pt-4o-mini, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型,全部可选模型详见common/const.py文件 "bot_type": "", # 可选配置,使用兼容openai格式的三方服务时候,需填"chatGPT"。bot具体名称详见common/const.py文件列出的bot_type,如不填根据model名称判断, "use_azure_chatgpt": False, # 是否使用azure的chatgpt "azure_deployment_id": "", # azure 模型部署名称 diff --git a/plugins/source.json b/plugins/source.json index c5c99d068..3e97bddc5 100644 --- a/plugins/source.json +++ b/plugins/source.json @@ -22,7 +22,7 @@ }, "pictureChange": { "url": "https://github.com/Yanyutin753/pictureChange.git", - "desc": "利用stable-diffusion和百度Ai进行图生图或者画图的插件" + "desc": "1. 支持百度AI和Stable Diffusion WebUI进行图像处理,提供多种模型选择,支持图生图、文生图自定义模板。2. 支持Suno音乐AI可将图像和文字转为音乐。3. 支持自定义模型进行文件、图片总结功能。4. 支持管理员控制群聊内容与参数和功能改变。" }, "Blackroom": { "url": "https://github.com/dividduang/blackroom.git", diff --git a/voice/ali/ali_api.py b/voice/ali/ali_api.py index cac0c8c13..def5c7add 100644 --- a/voice/ali/ali_api.py +++ b/voice/ali/ali_api.py @@ -8,6 +8,7 @@ """ +import http.client import json import time import requests @@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token): return output_file +def speech_to_text_aliyun(url, audioContent, appkey, token): + """ + 使用阿里云的语音识别服务识别音频文件中的语音。 + + 参数: + - url (str): 阿里云语音识别服务的端点URL。 + - audioContent (byte): pcm音频数据。 + - appkey (str): 您的阿里云appkey。 + - token (str): 阿里云API的认证令牌。 + + 返回值: + - str: 成功时输出识别到的文本,否则为None。 + """ + format = 'pcm' + sample_rate = 16000 + enablePunctuationPrediction = True + enableInverseTextNormalization = True + enableVoiceDetection = False + + # 设置RESTful请求参数 + request = url + '?appkey=' + appkey + request = request + '&format=' + format + request = request + '&sample_rate=' + str(sample_rate) + + if enablePunctuationPrediction : + request = request + '&enable_punctuation_prediction=' + 'true' + + if enableInverseTextNormalization : + request = request + '&enable_inverse_text_normalization=' + 'true' + + if enableVoiceDetection : + request = request + '&enable_voice_detection=' + 'true' + + host = 'nls-gateway-cn-shanghai.aliyuncs.com' + + # 设置HTTPS请求头部 + httpHeaders = { + 'X-NLS-Token': token, + 'Content-type': 'application/octet-stream', + 'Content-Length': len(audioContent) + } + + conn = http.client.HTTPSConnection(host) + conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders) + + response = conn.getresponse() + body = response.read() + try: + body = json.loads(body) + status = body['status'] + if status == 20000000 : + result = body['result'] + if result : + logger.info(f"阿里云语音识别到了:{result}") + conn.close() + return result + else : + logger.error(f"语音识别失败,状态码: {status}") + except ValueError: + logger.error(f"语音识别失败,收到非JSON格式的数据: {body}") + conn.close() + return None + class AliyunTokenGenerator: """ diff --git a/voice/ali/ali_voice.py b/voice/ali/ali_voice.py index 79a9aaa78..43ea0b46f 100644 --- a/voice/ali/ali_voice.py +++ b/voice/ali/ali_voice.py @@ -15,9 +15,9 @@ from bridge.reply import Reply, ReplyType from common.log import logger +from voice.audio_convert import get_pcm_from_wav from voice.voice import Voice -from voice.ali.ali_api import AliyunTokenGenerator -from voice.ali.ali_api import text_to_speech_aliyun +from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun from config import conf @@ -34,7 +34,8 @@ def __init__(self): self.token = None self.token_expire_time = 0 # 默认复用阿里云千问的 access_key 和 access_secret - self.api_url = config.get("api_url") + self.api_url_voice_to_text = config.get("api_url_voice_to_text") + self.api_url_text_to_voice = config.get("api_url_text_to_voice") self.app_key = config.get("app_key") self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id") self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret") @@ -53,7 +54,7 @@ def textToVoice(self, text): r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ,。!?,.]', '', text) # 提取有效的token token_id = self.get_valid_token() - fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id) + fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id) if fileName: logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName)) reply = Reply(ReplyType.VOICE, fileName) @@ -61,6 +62,25 @@ def textToVoice(self, text): reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") return reply + def voiceToText(self, voice_file): + """ + 将语音文件转换为文本。 + + :param voice_file: 要转换的语音文件。 + :return: 返回一个Reply对象,其中包含转换得到的文本或错误信息。 + """ + # 提取有效的token + token_id = self.get_valid_token() + logger.debug("[Ali] voice file name={}".format(voice_file)) + pcm = get_pcm_from_wav(voice_file) + text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id) + if text: + logger.info("[Ali] VoicetoText = {}".format(text)) + reply = Reply(ReplyType.TEXT, text) + else: + reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") + return reply + def get_valid_token(self): """ 获取有效的阿里云token。 diff --git a/voice/ali/config.json.template b/voice/ali/config.json.template index 6a4aaa9a5..563c57f0e 100644 --- a/voice/ali/config.json.template +++ b/voice/ali/config.json.template @@ -1,5 +1,6 @@ { - "api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr", "app_key": "", "access_key_id": "", "access_key_secret": ""