From e42ede512b99325be29507193010f9b568d015cf Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Wed, 20 Mar 2024 17:22:23 +0800 Subject: [PATCH] Update Claude3 api request and fix some bugs (#1641) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update version to 3.74 * Add support for Yi Model API (#1635) * 更新以支持零一万物模型 * 删除newbing * 修改config --------- Co-authored-by: binary-husky * Update claude requrest to http type * Update for endpoint * Add support for other tpyes of pictures * Update pip packages * Fix console_slience issue while error handling * revert version changes --------- Co-authored-by: binary-husky --- config.py | 8 + request_llms/bridge_all.py | 64 +++++--- request_llms/bridge_claude.py | 221 +++++++++++++++---------- request_llms/bridge_yimodel.py | 283 +++++++++++++++++++++++++++++++++ 4 files changed, 476 insertions(+), 100 deletions(-) create mode 100644 request_llms/bridge_yimodel.py diff --git a/config.py b/config.py index 1bdb29955f..75e0b90bb4 100644 --- a/config.py +++ b/config.py @@ -47,6 +47,7 @@ # "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125" # "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", # "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama", +# "yi-34b-chat-0205", "yi-34b-chat-200k" # ] # --- --- --- --- # 此外,为了更灵活地接入one-api多模型管理界面,您还可以在接入one-api时, @@ -212,6 +213,10 @@ MOONSHOT_API_KEY = "" +# 零一万物(Yi Model) API KEY +YIMODEL_API_KEY = "" + + # Mathpix 拥有执行PDF的OCR功能,但是需要注册账号 MATHPIX_APPID = "" MATHPIX_APPKEY = "" @@ -313,6 +318,9 @@ ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型 │ └── ZHIPUAI_API_KEY │ +├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型 +│ └── YIMODEL_API_KEY +│ ├── "qwen-turbo" 等通义千问大模型 │ └── DASHSCOPE_API_KEY │ diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 947074759a..31749f179d 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -62,7 +62,8 @@ def decode(self, *args, **kwargs): api2d_endpoint = "https://openai.api2d.net/v1/chat/completions" newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub" gemini_endpoint = "https://generativelanguage.googleapis.com/v1beta/models" -claude_endpoint = "https://api.anthropic.com" +claude_endpoint = "https://api.anthropic.com/v1/messages" +yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/' azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' @@ -80,6 +81,7 @@ def decode(self, *args, **kwargs): if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint] if gemini_endpoint in API_URL_REDIRECT: gemini_endpoint = API_URL_REDIRECT[gemini_endpoint] if claude_endpoint in API_URL_REDIRECT: claude_endpoint = API_URL_REDIRECT[claude_endpoint] +if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] # 获取tokenizer tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") @@ -316,6 +318,7 @@ def decode(self, *args, **kwargs): "moonshot-v1-8k": { "fn_with_ui": moonshot_ui, "fn_without_ui": moonshot_no_ui, + "can_multi_thread": True, "endpoint": None, "max_token": 1024 * 8, "tokenizer": tokenizer_gpt35, @@ -324,6 +327,7 @@ def decode(self, *args, **kwargs): "moonshot-v1-32k": { "fn_with_ui": moonshot_ui, "fn_without_ui": moonshot_no_ui, + "can_multi_thread": True, "endpoint": None, "max_token": 1024 * 32, "tokenizer": tokenizer_gpt35, @@ -332,6 +336,7 @@ def decode(self, *args, **kwargs): "moonshot-v1-128k": { "fn_with_ui": moonshot_ui, "fn_without_ui": moonshot_no_ui, + "can_multi_thread": True, "endpoint": None, "max_token": 1024 * 128, "tokenizer": tokenizer_gpt35, @@ -473,22 +478,6 @@ def decode(self, *args, **kwargs): "token_cnt": get_token_num_gpt35, } }) -if "newbing-free" in AVAIL_LLM_MODELS: - try: - from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui - from .bridge_newbingfree import predict as newbingfree_ui - model_info.update({ - "newbing-free": { - "fn_with_ui": newbingfree_ui, - "fn_without_ui": newbingfree_noui, - "endpoint": newbing_endpoint, - "max_token": 4096, - "tokenizer": tokenizer_gpt35, - "token_cnt": get_token_num_gpt35, - } - }) - except: - print(trimmed_format_exc()) if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free try: from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui @@ -521,6 +510,7 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=- if "internlm" in AVAIL_LLM_MODELS: try: from .bridge_internlm import predict_no_ui_long_connection as internlm_noui @@ -553,6 +543,7 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=- if "qwen-local" in AVAIL_LLM_MODELS: try: from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui @@ -570,6 +561,7 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=- if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai try: from .bridge_qwen import predict_no_ui_long_connection as qwen_noui @@ -605,7 +597,35 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) -if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 +# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=- +if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MODELS: # zhipuai + try: + from .bridge_yimodel import predict_no_ui_long_connection as yimodel_noui + from .bridge_yimodel import predict as yimodel_ui + model_info.update({ + "yi-34b-chat-0205": { + "fn_with_ui": yimodel_ui, + "fn_without_ui": yimodel_noui, + "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 + "endpoint": yimodel_endpoint, + "max_token": 4000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "yi-34b-chat-200k": { + "fn_with_ui": yimodel_ui, + "fn_without_ui": yimodel_noui, + "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 + "endpoint": yimodel_endpoint, + "max_token": 200000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + except: + print(trimmed_format_exc()) +# -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=- +if "spark" in AVAIL_LLM_MODELS: try: from .bridge_spark import predict_no_ui_long_connection as spark_noui from .bridge_spark import predict as spark_ui @@ -681,6 +701,7 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=- if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai 是glm-4的别名,向后兼容配置 try: model_info.update({ @@ -695,6 +716,7 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) +# -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=- if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder try: from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui @@ -711,6 +733,8 @@ def decode(self, *args, **kwargs): }) except: print(trimmed_format_exc()) + + # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]: # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"] @@ -735,8 +759,8 @@ def decode(self, *args, **kwargs): }) -# <-- 用于定义和切换多个azure模型 --> -AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") +# -=-=-=-=-=-=- azure模型对齐支持 -=-=-=-=-=-=- +AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") # <-- 用于定义和切换多个azure模型 --> if len(AZURE_CFG_ARRAY) > 0: for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items(): # 可能会覆盖之前的配置,但这是意料之中的 diff --git a/request_llms/bridge_claude.py b/request_llms/bridge_claude.py index 50c0329aae..6eb0a89b21 100644 --- a/request_llms/bridge_claude.py +++ b/request_llms/bridge_claude.py @@ -9,12 +9,13 @@ 具备多线程调用能力的函数 2. predict_no_ui_long_connection:支持多线程 """ - +import logging import os import time import traceback from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path - +import json +import requests picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。" Claude_3_Models = ["claude-3-sonnet-20240229", "claude-3-opus-20240229"] @@ -38,6 +39,34 @@ def get_full_error(chunk, stream_response): break return chunk +def decode_chunk(chunk): + # 提前读取一些信息(用于判断异常) + chunk_decoded = chunk.decode() + chunkjson = None + is_last_chunk = False + need_to_pass = False + if chunk_decoded.startswith('data:'): + try: + chunkjson = json.loads(chunk_decoded[6:]) + except: + need_to_pass = True + pass + elif chunk_decoded.startswith('event:'): + try: + event_type = chunk_decoded.split(':')[1].strip() + if event_type == 'content_block_stop' or event_type == 'message_stop': + is_last_chunk = True + elif event_type == 'content_block_start' or event_type == 'message_start': + need_to_pass = True + pass + except: + need_to_pass = True + pass + else: + need_to_pass = True + pass + return need_to_pass, chunkjson, is_last_chunk + def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): """ @@ -53,53 +82,60 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window = None: 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 """ - from anthropic import Anthropic watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 - if inputs == "": inputs = "空空如也的输入栏" - message = generate_payload(inputs, llm_kwargs, history, stream=True, image_paths=None) - retry = 0 if len(ANTHROPIC_API_KEY) == 0: raise RuntimeError("没有设置ANTHROPIC_API_KEY选项") + if inputs == "": inputs = "空空如也的输入栏" + headers, message = generate_payload(inputs, llm_kwargs, history, sys_prompt, image_paths=None) + retry = 0 + while True: try: # make a POST request to the API endpoint, stream=False from .bridge_all import model_info - anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint']) - # endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] - # with ProxyNetworkActivate() - stream = anthropic.messages.create( - messages=message, - max_tokens=4096, # The maximum number of tokens to generate before stopping. - model=llm_kwargs['llm_model'], - stream=True, - temperature = llm_kwargs['temperature'], - system=sys_prompt - ) - break - except Exception as e: + endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] + response = requests.post(endpoint, headers=headers, json=message, + proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break + except requests.exceptions.ReadTimeout as e: retry += 1 traceback.print_exc() if retry > MAX_RETRY: raise TimeoutError if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') + stream_response = response.iter_lines() result = '' - try: - for completion in stream: - if completion.type == "message_start" or completion.type == "content_block_start": - continue - elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta": - break - result += completion.delta.text - if not console_slience: print(completion.delta.text, end='') - if observe_window is not None: - # 观测窗,把已经获取的数据显示出去 - if len(observe_window) >= 1: observe_window[0] += completion.delta.text - # 看门狗,如果超过期限没有喂狗,则终止 - if len(observe_window) >= 2: - if (time.time()-observe_window[1]) > watch_dog_patience: - raise RuntimeError("用户取消了程序。") - except Exception as e: - traceback.print_exc() + while True: + try: chunk = next(stream_response) + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk) + if chunk: + try: + if need_to_pass: + pass + elif is_last_chunk: + logging.info(f'[response] {result}') + break + else: + if chunkjson and chunkjson['type'] == 'content_block_delta': + result += chunkjson['delta']['text'] + print(chunkjson['delta']['text'], end='') + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: + observe_window[0] += chunkjson['delta']['text'] + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("用户取消了程序。") + except Exception as e: + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + error_msg = chunk_decoded + print(error_msg) + raise RuntimeError("Json解析不合常规") return result @@ -119,7 +155,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp additional_fn代表点击的哪个按钮,按钮见functional.py """ if inputs == "": inputs = "空空如也的输入栏" - from anthropic import Anthropic if len(ANTHROPIC_API_KEY) == 0: chatbot.append((inputs, "没有设置ANTHROPIC_API_KEY")) yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 @@ -145,7 +180,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 try: - message = generate_payload(inputs, llm_kwargs, history, stream, image_paths) + headers, message = generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths) except RuntimeError as e: chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。") yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面 @@ -158,46 +193,61 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp try: # make a POST request to the API endpoint, stream=True from .bridge_all import model_info - anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint']) - # endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] - # with ProxyNetworkActivate() - stream = anthropic.messages.create( - messages=message, - max_tokens=4096, # The maximum number of tokens to generate before stopping. - model=llm_kwargs['llm_model'], - stream=True, - temperature = llm_kwargs['temperature'], - system=system_prompt - ) - break - except: + endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] + response = requests.post(endpoint, headers=headers, json=message, + proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break + except requests.exceptions.ReadTimeout as e: retry += 1 - chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg)) - retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" - yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 + traceback.print_exc() if retry > MAX_RETRY: raise TimeoutError - + if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') + stream_response = response.iter_lines() gpt_replying_buffer = "" - for completion in stream: - if completion.type == "message_start" or completion.type == "content_block_start": - continue - elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta": + while True: + try: chunk = next(stream_response) + except StopIteration: break - try: - gpt_replying_buffer = gpt_replying_buffer + completion.delta.text - history[-1] = gpt_replying_buffer - chatbot[-1] = (history[-2], history[-1]) - yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面 - - except Exception as e: - from toolbox import regular_txt_to_markdown - tb_str = '```\n' + trimmed_format_exc() + '```' - chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str}") - yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + tb_str) # 刷新界面 - return - -def generate_payload(inputs, llm_kwargs, history, stream, image_paths): + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk) + if chunk: + try: + if need_to_pass: + pass + elif is_last_chunk: + logging.info(f'[response] {gpt_replying_buffer}') + break + else: + if chunkjson and chunkjson['type'] == 'content_block_delta': + gpt_replying_buffer += chunkjson['delta']['text'] + history[-1] = gpt_replying_buffer + chatbot[-1] = (history[-2], history[-1]) + yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面 + + except Exception as e: + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + error_msg = chunk_decoded + print(error_msg) + raise RuntimeError("Json解析不合常规") + +def multiple_picture_types(image_paths): + """ + 根据图片类型返回image/jpeg, image/png, image/gif, image/webp,无法判断则返回image/jpeg + """ + for image_path in image_paths: + if image_path.endswith('.jpeg') or image_path.endswith('.jpg'): + return 'image/jpeg' + elif image_path.endswith('.png'): + return 'image/png' + elif image_path.endswith('.gif'): + return 'image/gif' + elif image_path.endswith('.webp'): + return 'image/webp' + return 'image/jpeg' + +def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths): """ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 """ @@ -223,19 +273,16 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths): messages[-1]['content'][0]['text'] = what_gpt_answer['content'][0]['text'] if any([llm_kwargs['llm_model'] == model for model in Claude_3_Models]) and image_paths: - base64_images = [] - for image_path in image_paths: - base64_images.append(encode_image(image_path)) what_i_ask_now = {} what_i_ask_now["role"] = "user" what_i_ask_now["content"] = [] - for base64_image in base64_images: + for image_path in image_paths: what_i_ask_now["content"].append({ "type": "image", "source": { "type": "base64", - "media_type": "image/jpeg", - "data": base64_image, + "media_type": multiple_picture_types(image_paths), + "data": encode_image(image_path), } }) what_i_ask_now["content"].append({"type": "text", "text": inputs}) @@ -244,4 +291,18 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths): what_i_ask_now["role"] = "user" what_i_ask_now["content"] = [{"type": "text", "text": inputs}] messages.append(what_i_ask_now) - return messages \ No newline at end of file + # 开始整理headers与message + headers = { + 'x-api-key': ANTHROPIC_API_KEY, + 'anthropic-version': '2023-06-01', + 'content-type': 'application/json' + } + payload = { + 'model': llm_kwargs['llm_model'], + 'max_tokens': 4096, + 'messages': messages, + 'temperature': llm_kwargs['temperature'], + 'stream': True, + 'system': system_prompt + } + return headers, payload \ No newline at end of file diff --git a/request_llms/bridge_yimodel.py b/request_llms/bridge_yimodel.py new file mode 100644 index 0000000000..6d65a56d82 --- /dev/null +++ b/request_llms/bridge_yimodel.py @@ -0,0 +1,283 @@ +# 借鉴自同目录下的bridge_chatgpt.py + +""" + 该文件中主要包含三个函数 + + 不具备多线程能力的函数: + 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 + + 具备多线程调用能力的函数 + 2. predict_no_ui_long_connection:支持多线程 +""" + +import json +import time +import gradio as gr +import logging +import traceback +import requests +import importlib +import random + +# config_private.py放自己的秘密如API和代理网址 +# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件 +from toolbox import get_conf, update_ui, trimmed_format_exc, is_the_upload_folder, read_one_api_model_name +proxies, TIMEOUT_SECONDS, MAX_RETRY, YIMODEL_API_KEY = \ + get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'YIMODEL_API_KEY') + +timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ + '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' + +def get_full_error(chunk, stream_response): + """ + 获取完整的从Openai返回的报错 + """ + while True: + try: + chunk += next(stream_response) + except: + break + return chunk + +def decode_chunk(chunk): + # 提前读取一些信息(用于判断异常) + chunk_decoded = chunk.decode() + chunkjson = None + is_last_chunk = False + try: + chunkjson = json.loads(chunk_decoded[6:]) + is_last_chunk = chunkjson.get("lastOne", False) + except: + pass + return chunk_decoded, chunkjson, is_last_chunk + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): + """ + 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + llm_kwargs: + chatGPT的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 + """ + watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 + if inputs == "": inputs = "空空如也的输入栏" + headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) + retry = 0 + while True: + try: + # make a POST request to the API endpoint, stream=False + from .bridge_all import model_info + endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] + response = requests.post(endpoint, headers=headers, proxies=proxies, + json=payload, stream=True, timeout=TIMEOUT_SECONDS); break + except requests.exceptions.ReadTimeout as e: + retry += 1 + traceback.print_exc() + if retry > MAX_RETRY: raise TimeoutError + if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') + + stream_response = response.iter_lines() + result = '' + is_head_of_the_stream = True + while True: + try: chunk = next(stream_response) + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk) + if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded): + # 数据流的第一帧不携带content + is_head_of_the_stream = False; continue + if chunk: + try: + if is_last_chunk: + # 判定为数据流的结束,gpt_replying_buffer也写完了 + logging.info(f'[response] {result}') + break + result += chunkjson['choices'][0]["delta"]["content"] + if not console_slience: print(chunkjson['choices'][0]["delta"]["content"], end='') + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: + observe_window[0] += chunkjson['choices'][0]["delta"]["content"] + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("用户取消了程序。") + except Exception as e: + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + error_msg = chunk_decoded + print(error_msg) + raise RuntimeError("Json解析不合常规") + return result + + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 发送至chatGPT,流式获取输出。 + 用于基础的对话功能。 + inputs 是本次问询的输入 + top_p, temperature是chatGPT的内部调优参数 + history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) + chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 + additional_fn代表点击的哪个按钮,按钮见functional.py + """ + if len(YIMODEL_API_KEY) == 0: + raise RuntimeError("没有设置YIMODEL_API_KEY选项") + if inputs == "": inputs = "空空如也的输入栏" + user_input = inputs + if additional_fn is not None: + from core_functional import handle_core_functionality + inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) + + raw_input = inputs + logging.info(f'[raw_input] {raw_input}') + chatbot.append((inputs, "")) + yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 + + # check mis-behavior + if is_the_upload_folder(user_input): + chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。") + yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面 + time.sleep(2) + + headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream) + + from .bridge_all import model_info + endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] + + history.append(inputs); history.append("") + + retry = 0 + while True: + try: + # make a POST request to the API endpoint, stream=True + response = requests.post(endpoint, headers=headers, proxies=proxies, + json=payload, stream=True, timeout=TIMEOUT_SECONDS);break + except: + retry += 1 + chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg)) + retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" + yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 + if retry > MAX_RETRY: raise TimeoutError + + gpt_replying_buffer = "" + + is_head_of_the_stream = True + if stream: + stream_response = response.iter_lines() + while True: + try: + chunk = next(stream_response) + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + + # 提前读取一些信息 (用于判断异常) + chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk) + + if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded): + # 数据流的第一帧不携带content + is_head_of_the_stream = False; continue + + if chunk: + try: + if is_last_chunk: + # 判定为数据流的结束,gpt_replying_buffer也写完了 + logging.info(f'[response] {gpt_replying_buffer}') + break + # 处理数据流的主体 + status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" + gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"] + # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出 + history[-1] = gpt_replying_buffer + chatbot[-1] = (history[-2], history[-1]) + yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 + except Exception as e: + yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + error_msg = chunk_decoded + chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) + yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 + print(error_msg) + return + +def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg): + from .bridge_all import model_info + if "bad_request" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] 已经超过了模型的最大上下文或是模型格式错误,请尝试削减单次输入的文本量。") + elif "authentication_error" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. 请确保API key有效。") + elif "not_found" in error_msg: + chatbot[-1] = (chatbot[-1][0], f"[Local Message] {llm_kwargs['llm_model']} 无效,请确保使用小写的模型名称。") + elif "rate_limit" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] 遇到了控制请求速率限制,请一分钟后重试。") + elif "system_busy" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] 系统繁忙,请一分钟后重试。") + else: + from toolbox import regular_txt_to_markdown + tb_str = '```\n' + trimmed_format_exc() + '```' + chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}") + return chatbot, history + +def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): + """ + 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 + """ + api_key = f"Bearer {YIMODEL_API_KEY}" + + headers = { + "Content-Type": "application/json", + "Authorization": api_key + } + + conversation_cnt = len(history) // 2 + + messages = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + if what_gpt_answer["content"] == timeout_bot_msg: continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + model = llm_kwargs['llm_model'] + if llm_kwargs['llm_model'].startswith('one-api-'): + model = llm_kwargs['llm_model'][len('one-api-'):] + model, _ = read_one_api_model_name(model) + tokens = 600 if llm_kwargs['llm_model'] == 'yi-34b-chat-0205' else 4096 #yi-34b-chat-0205只有4k上下文... + payload = { + "model": model, + "messages": messages, + "temperature": llm_kwargs['temperature'], # 1.0, + "stream": stream, + "max_tokens": tokens + } + try: + print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") + except: + print('输入中可能存在乱码。') + return headers,payload \ No newline at end of file