From e42ede512b99325be29507193010f9b568d015cf Mon Sep 17 00:00:00 2001
From: Menghuan1918 <menghuan2003@outlook.com>
Date: Wed, 20 Mar 2024 17:22:23 +0800
Subject: [PATCH] Update Claude3 api request and fix some bugs (#1641)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update version to 3.74

* Add support for Yi Model API (#1635)

* 更新以支持零一万物模型

* 删除newbing

* 修改config

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>

* Update claude requrest to http type

* Update for endpoint

* Add support for other tpyes of pictures

* Update pip packages

* Fix console_slience issue while error handling

* revert version changes

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
---
 config.py                      |   8 +
 request_llms/bridge_all.py     |  64 +++++---
 request_llms/bridge_claude.py  | 221 +++++++++++++++----------
 request_llms/bridge_yimodel.py | 283 +++++++++++++++++++++++++++++++++
 4 files changed, 476 insertions(+), 100 deletions(-)
 create mode 100644 request_llms/bridge_yimodel.py

diff --git a/config.py b/config.py
index 1bdb29955f..75e0b90bb4 100644
--- a/config.py
+++ b/config.py
@@ -47,6 +47,7 @@
 #   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125"
 #   "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
 #   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
+#   "yi-34b-chat-0205", "yi-34b-chat-200k"
 # ]
 # --- --- --- ---
 # 此外，为了更灵活地接入one-api多模型管理界面，您还可以在接入one-api时，
@@ -212,6 +213,10 @@
 MOONSHOT_API_KEY = ""
 
 
+# 零一万物(Yi Model) API KEY
+YIMODEL_API_KEY = ""
+
+
 # Mathpix 拥有执行PDF的OCR功能，但是需要注册账号
 MATHPIX_APPID = ""
 MATHPIX_APPKEY = ""
@@ -313,6 +318,9 @@
 ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
 │   └── ZHIPUAI_API_KEY
 │
+├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型
+│   └── YIMODEL_API_KEY
+│
 ├── "qwen-turbo" 等通义千问大模型
 │   └──  DASHSCOPE_API_KEY
 │
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 947074759a..31749f179d 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -62,7 +62,8 @@ def decode(self, *args, **kwargs):
 api2d_endpoint = "https://openai.api2d.net/v1/chat/completions"
 newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub"
 gemini_endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
-claude_endpoint = "https://api.anthropic.com"
+claude_endpoint = "https://api.anthropic.com/v1/messages"
+yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -80,6 +81,7 @@ def decode(self, *args, **kwargs):
 if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint]
 if gemini_endpoint in API_URL_REDIRECT: gemini_endpoint = API_URL_REDIRECT[gemini_endpoint]
 if claude_endpoint in API_URL_REDIRECT: claude_endpoint = API_URL_REDIRECT[claude_endpoint]
+if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -316,6 +318,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-8k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 8,
         "tokenizer": tokenizer_gpt35,
@@ -324,6 +327,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-32k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 32,
         "tokenizer": tokenizer_gpt35,
@@ -332,6 +336,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-128k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 128,
         "tokenizer": tokenizer_gpt35,
@@ -473,22 +478,6 @@ def decode(self, *args, **kwargs):
             "token_cnt": get_token_num_gpt35,
         }
     })
-if "newbing-free" in AVAIL_LLM_MODELS:
-    try:
-        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
-        from .bridge_newbingfree import predict as newbingfree_ui
-        model_info.update({
-            "newbing-free": {
-                "fn_with_ui": newbingfree_ui,
-                "fn_without_ui": newbingfree_noui,
-                "endpoint": newbing_endpoint,
-                "max_token": 4096,
-                "tokenizer": tokenizer_gpt35,
-                "token_cnt": get_token_num_gpt35,
-            }
-        })
-    except:
-        print(trimmed_format_exc())
 if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
     try:
         from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
@@ -521,6 +510,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=-
 if "internlm" in AVAIL_LLM_MODELS:
     try:
         from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
@@ -553,6 +543,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=-
 if "qwen-local" in AVAIL_LLM_MODELS:
     try:
         from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
@@ -570,6 +561,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
 if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
     try:
         from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
@@ -605,7 +597,35 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
-if "spark" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
+if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_yimodel import predict_no_ui_long_connection as yimodel_noui
+        from .bridge_yimodel import predict as yimodel_ui
+        model_info.update({
+            "yi-34b-chat-0205": {
+                "fn_with_ui": yimodel_ui,
+                "fn_without_ui": yimodel_noui,
+                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
+                "endpoint": yimodel_endpoint,
+                "max_token": 4000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "yi-34b-chat-200k": {
+                "fn_with_ui": yimodel_ui,
+                "fn_without_ui": yimodel_noui,
+                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
+                "endpoint": yimodel_endpoint,
+                "max_token": 200000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+        })
+    except:
+        print(trimmed_format_exc())
+# -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=-
+if "spark" in AVAIL_LLM_MODELS:
     try:
         from .bridge_spark import predict_no_ui_long_connection as spark_noui
         from .bridge_spark import predict as spark_ui
@@ -681,6 +701,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=-
 if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容配置
     try:
         model_info.update({
@@ -695,6 +716,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
 if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
     try:
         from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
@@ -711,6 +733,8 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+
+
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
     # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@@ -735,8 +759,8 @@ def decode(self, *args, **kwargs):
     })
 
 
-# <-- 用于定义和切换多个azure模型 -->
-AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
+# -=-=-=-=-=-=- azure模型对齐支持 -=-=-=-=-=-=-
+AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") # <-- 用于定义和切换多个azure模型 -->
 if len(AZURE_CFG_ARRAY) > 0:
     for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
         # 可能会覆盖之前的配置，但这是意料之中的
diff --git a/request_llms/bridge_claude.py b/request_llms/bridge_claude.py
index 50c0329aae..6eb0a89b21 100644
--- a/request_llms/bridge_claude.py
+++ b/request_llms/bridge_claude.py
@@ -9,12 +9,13 @@
     具备多线程调用能力的函数
     2. predict_no_ui_long_connection：支持多线程
 """
-
+import logging
 import os
 import time
 import traceback
 from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path
-
+import json
+import requests
 picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。"
 Claude_3_Models = ["claude-3-sonnet-20240229", "claude-3-opus-20240229"]
 
@@ -38,6 +39,34 @@ def get_full_error(chunk, stream_response):
             break
     return chunk
 
+def decode_chunk(chunk):
+    # 提前读取一些信息（用于判断异常）
+    chunk_decoded = chunk.decode()
+    chunkjson = None
+    is_last_chunk = False
+    need_to_pass = False
+    if chunk_decoded.startswith('data:'):
+        try:
+            chunkjson = json.loads(chunk_decoded[6:])
+        except:
+            need_to_pass = True
+            pass
+    elif chunk_decoded.startswith('event:'):
+        try:
+            event_type = chunk_decoded.split(':')[1].strip()
+            if event_type == 'content_block_stop' or event_type == 'message_stop':
+                is_last_chunk = True
+            elif event_type == 'content_block_start' or event_type == 'message_start':
+                need_to_pass = True
+                pass
+        except:
+            need_to_pass = True
+            pass
+    else:
+        need_to_pass = True
+        pass
+    return need_to_pass, chunkjson, is_last_chunk
+
 
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
     """
@@ -53,53 +82,60 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
     observe_window = None：
         用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
     """
-    from anthropic import Anthropic
     watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
-    if inputs == "":     inputs = "空空如也的输入栏"
-    message = generate_payload(inputs, llm_kwargs, history, stream=True, image_paths=None)
-    retry = 0
     if len(ANTHROPIC_API_KEY) == 0:
         raise RuntimeError("没有设置ANTHROPIC_API_KEY选项")
+    if inputs == "":     inputs = "空空如也的输入栏"
+    headers, message = generate_payload(inputs, llm_kwargs, history, sys_prompt, image_paths=None)
+    retry = 0
+
 
     while True:
         try:
             # make a POST request to the API endpoint, stream=False
             from .bridge_all import model_info
-            anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint'])
-            # endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            # with ProxyNetworkActivate()
-            stream = anthropic.messages.create(
-                messages=message,
-                max_tokens=4096,       # The maximum number of tokens to generate before stopping.
-                model=llm_kwargs['llm_model'],
-                stream=True,
-                temperature = llm_kwargs['temperature'],
-                system=sys_prompt
-            )
-            break
-        except Exception as e:
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, json=message, 
+                                     proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break
+        except requests.exceptions.ReadTimeout as e:
             retry += 1
             traceback.print_exc()
             if retry > MAX_RETRY: raise TimeoutError
             if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+    stream_response = response.iter_lines()
     result = ''
-    try:
-        for completion in stream:
-            if completion.type == "message_start" or completion.type == "content_block_start":
-                continue
-            elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta":
-                break
-            result += completion.delta.text
-            if not console_slience: print(completion.delta.text, end='')
-            if observe_window is not None:
-                # 观测窗，把已经获取的数据显示出去
-                if len(observe_window) >= 1: observe_window[0] += completion.delta.text
-                # 看门狗，如果超过期限没有喂狗，则终止
-                if len(observe_window) >= 2:
-                    if (time.time()-observe_window[1]) > watch_dog_patience:
-                        raise RuntimeError("用户取消了程序。")
-    except Exception as e:
-        traceback.print_exc()
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration:
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk)
+        if chunk:
+            try:
+                if need_to_pass:
+                    pass
+                elif is_last_chunk:
+                    logging.info(f'[response] {result}')
+                    break
+                else:
+                    if chunkjson and chunkjson['type'] == 'content_block_delta':
+                        result += chunkjson['delta']['text']
+                        print(chunkjson['delta']['text'], end='')
+                        if observe_window is not None:
+                            # 观测窗，把已经获取的数据显示出去
+                            if len(observe_window) >= 1:
+                                observe_window[0] += chunkjson['delta']['text']
+                            # 看门狗，如果超过期限没有喂狗，则终止
+                            if len(observe_window) >= 2:
+                                if (time.time()-observe_window[1]) > watch_dog_patience:
+                                    raise RuntimeError("用户取消了程序。")
+            except Exception as e:
+                chunk = get_full_error(chunk, stream_response)
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                print(error_msg)
+                raise RuntimeError("Json解析不合常规")
 
     return result
 
@@ -119,7 +155,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
     additional_fn代表点击的哪个按钮，按钮见functional.py
     """
     if inputs == "":     inputs = "空空如也的输入栏"
-    from anthropic import Anthropic
     if len(ANTHROPIC_API_KEY) == 0:
         chatbot.append((inputs, "没有设置ANTHROPIC_API_KEY"))
         yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
@@ -145,7 +180,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
 
     try:
-        message = generate_payload(inputs, llm_kwargs, history, stream, image_paths)
+        headers, message = generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths)
     except RuntimeError as e:
         chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
         yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
@@ -158,46 +193,61 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
         try:
             # make a POST request to the API endpoint, stream=True
             from .bridge_all import model_info
-            anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint'])
-            # endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            # with ProxyNetworkActivate()
-            stream = anthropic.messages.create(
-                messages=message,
-                max_tokens=4096,       # The maximum number of tokens to generate before stopping.
-                model=llm_kwargs['llm_model'],
-                stream=True,
-                temperature = llm_kwargs['temperature'],
-                system=system_prompt
-            )
-            break
-        except:
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, json=message, 
+                                     proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break
+        except requests.exceptions.ReadTimeout as e:
             retry += 1
-            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
-            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
-            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            traceback.print_exc()
             if retry > MAX_RETRY: raise TimeoutError
-
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+    stream_response = response.iter_lines()
     gpt_replying_buffer = ""
 
-    for completion in stream:
-        if completion.type == "message_start" or completion.type == "content_block_start":
-            continue
-        elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta":
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration:
             break
-        try:
-            gpt_replying_buffer = gpt_replying_buffer + completion.delta.text
-            history[-1] = gpt_replying_buffer
-            chatbot[-1] = (history[-2], history[-1])
-            yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面
-
-        except Exception as e:
-            from toolbox import regular_txt_to_markdown
-            tb_str = '```\n' + trimmed_format_exc() + '```'
-            chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str}")
-            yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + tb_str) # 刷新界面
-            return
-
-def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk)
+        if chunk:
+            try:
+                if need_to_pass:
+                    pass
+                elif is_last_chunk:
+                    logging.info(f'[response] {gpt_replying_buffer}')
+                    break
+                else:
+                    if chunkjson and chunkjson['type'] == 'content_block_delta':
+                        gpt_replying_buffer += chunkjson['delta']['text']
+                        history[-1] = gpt_replying_buffer
+                        chatbot[-1] = (history[-2], history[-1])
+                        yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面
+
+            except Exception as e:
+                chunk = get_full_error(chunk, stream_response)
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                print(error_msg)
+                raise RuntimeError("Json解析不合常规")
+
+def multiple_picture_types(image_paths):
+    """
+    根据图片类型返回image/jpeg, image/png, image/gif, image/webp，无法判断则返回image/jpeg
+    """
+    for image_path in image_paths:
+        if image_path.endswith('.jpeg') or image_path.endswith('.jpg'):
+            return 'image/jpeg'
+        elif image_path.endswith('.png'):
+            return 'image/png'
+        elif image_path.endswith('.gif'):
+            return 'image/gif'
+        elif image_path.endswith('.webp'):
+            return 'image/webp'
+    return 'image/jpeg'
+
+def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
     """
     整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
     """
@@ -223,19 +273,16 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
                 messages[-1]['content'][0]['text'] = what_gpt_answer['content'][0]['text']
 
     if any([llm_kwargs['llm_model'] == model for model in Claude_3_Models]) and image_paths:
-        base64_images = []
-        for image_path in image_paths:
-            base64_images.append(encode_image(image_path))
         what_i_ask_now = {}
         what_i_ask_now["role"] = "user"
         what_i_ask_now["content"] = []
-        for base64_image in base64_images:
+        for image_path in image_paths:
             what_i_ask_now["content"].append({
                 "type": "image",
                 "source": {
                     "type": "base64",
-                    "media_type": "image/jpeg",
-                    "data": base64_image,
+                    "media_type": multiple_picture_types(image_paths),
+                    "data": encode_image(image_path),
                 }
             })
         what_i_ask_now["content"].append({"type": "text", "text": inputs})
@@ -244,4 +291,18 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
         what_i_ask_now["role"] = "user"
         what_i_ask_now["content"] = [{"type": "text", "text": inputs}]
     messages.append(what_i_ask_now)
-    return messages
\ No newline at end of file
+    # 开始整理headers与message
+    headers = {
+        'x-api-key': ANTHROPIC_API_KEY,
+        'anthropic-version': '2023-06-01',
+        'content-type': 'application/json'
+    }
+    payload = {
+        'model': llm_kwargs['llm_model'],
+        'max_tokens': 4096,
+        'messages': messages,
+        'temperature': llm_kwargs['temperature'],
+        'stream': True,
+        'system': system_prompt
+    }
+    return headers, payload
\ No newline at end of file
diff --git a/request_llms/bridge_yimodel.py b/request_llms/bridge_yimodel.py
new file mode 100644
index 0000000000..6d65a56d82
--- /dev/null
+++ b/request_llms/bridge_yimodel.py
@@ -0,0 +1,283 @@
+# 借鉴自同目录下的bridge_chatgpt.py
+
+"""
+    该文件中主要包含三个函数
+
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：支持多线程
+"""
+
+import json
+import time
+import gradio as gr
+import logging
+import traceback
+import requests
+import importlib
+import random
+
+# config_private.py放自己的秘密如API和代理网址
+# 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
+from toolbox import get_conf, update_ui, trimmed_format_exc, is_the_upload_folder, read_one_api_model_name
+proxies, TIMEOUT_SECONDS, MAX_RETRY, YIMODEL_API_KEY = \
+    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'YIMODEL_API_KEY')
+
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+
+def decode_chunk(chunk):
+    # 提前读取一些信息（用于判断异常）
+    chunk_decoded = chunk.decode()
+    chunkjson = None
+    is_last_chunk = False
+    try:
+        chunkjson = json.loads(chunk_decoded[6:])
+        is_last_chunk = chunkjson.get("lastOne", False)
+    except:
+        pass
+    return chunk_decoded, chunkjson, is_last_chunk
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        chatGPT的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+    if inputs == "":     inputs = "空空如也的输入栏"
+    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=False
+            from .bridge_all import model_info
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
+        except requests.exceptions.ReadTimeout as e:
+            retry += 1
+            traceback.print_exc()
+            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+
+    stream_response = response.iter_lines()
+    result = ''
+    is_head_of_the_stream = True
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration:
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk)
+        if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded):
+            # 数据流的第一帧不携带content
+            is_head_of_the_stream = False; continue
+        if chunk:
+            try:
+                if is_last_chunk:
+                    # 判定为数据流的结束，gpt_replying_buffer也写完了
+                    logging.info(f'[response] {result}')
+                    break
+                result += chunkjson['choices'][0]["delta"]["content"]
+                if not console_slience: print(chunkjson['choices'][0]["delta"]["content"], end='')
+                if observe_window is not None:
+                    # 观测窗，把已经获取的数据显示出去
+                    if len(observe_window) >= 1:
+                        observe_window[0] += chunkjson['choices'][0]["delta"]["content"]
+                    # 看门狗，如果超过期限没有喂狗，则终止
+                    if len(observe_window) >= 2:
+                        if (time.time()-observe_window[1]) > watch_dog_patience:
+                            raise RuntimeError("用户取消了程序。")
+            except Exception as e:
+                chunk = get_full_error(chunk, stream_response)
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                print(error_msg)
+                raise RuntimeError("Json解析不合常规")
+    return result
+
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+    发送至chatGPT，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是chatGPT的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    if len(YIMODEL_API_KEY) == 0:
+        raise RuntimeError("没有设置YIMODEL_API_KEY选项")
+    if inputs == "":     inputs = "空空如也的输入栏"
+    user_input = inputs
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+
+    raw_input = inputs
+    logging.info(f'[raw_input] {raw_input}')
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+
+    # check mis-behavior
+    if is_the_upload_folder(user_input):
+        chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
+        time.sleep(2)
+
+    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
+
+    from .bridge_all import model_info
+    endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+
+    history.append(inputs); history.append("")
+
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=True
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            if retry > MAX_RETRY: raise TimeoutError
+
+    gpt_replying_buffer = ""
+
+    is_head_of_the_stream = True
+    if stream:
+        stream_response =  response.iter_lines()
+        while True:
+            try:
+                chunk = next(stream_response)
+            except StopIteration:
+                break
+            except requests.exceptions.ConnectionError:
+                chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+
+            # 提前读取一些信息 （用于判断异常）
+            chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk)
+
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+
+            if chunk:
+                try:
+                    if is_last_chunk:
+                        # 判定为数据流的结束，gpt_replying_buffer也写完了
+                        logging.info(f'[response] {gpt_replying_buffer}')
+                        break
+                    # 处理数据流的主体
+                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
+                    gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+                except Exception as e:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    chunk_decoded = chunk.decode()
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    print(error_msg)
+                    return
+
+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
+    from .bridge_all import model_info
+    if "bad_request" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] 已经超过了模型的最大上下文或是模型格式错误,请尝试削减单次输入的文本量。")
+    elif "authentication_error" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. 请确保API key有效。")
+    elif "not_found" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] {llm_kwargs['llm_model']} 无效，请确保使用小写的模型名称。")
+    elif "rate_limit" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] 遇到了控制请求速率限制，请一分钟后重试。")
+    elif "system_busy" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] 系统繁忙，请一分钟后重试。")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+
+def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    api_key = f"Bearer {YIMODEL_API_KEY}"
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": api_key
+    }
+
+    conversation_cnt = len(history) // 2
+
+    messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue
+                if what_gpt_answer["content"] == timeout_bot_msg: continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    model = llm_kwargs['llm_model']
+    if llm_kwargs['llm_model'].startswith('one-api-'):
+        model = llm_kwargs['llm_model'][len('one-api-'):]
+        model, _ = read_one_api_model_name(model)
+    tokens = 600 if llm_kwargs['llm_model'] == 'yi-34b-chat-0205' else 4096    #yi-34b-chat-0205只有4k上下文...
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": llm_kwargs['temperature'],  # 1.0,
+        "stream": stream,
+        "max_tokens": tokens
+    }
+    try:
+        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
+    except:
+        print('输入中可能存在乱码。')
+    return headers,payload
\ No newline at end of file