xusenlinzy · xusenlinzy · Nov 10, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 8, 2023
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 
 ## 📢 新闻
 
-+ 【2023.11.08】 `dev` 分支已经支持 `openai=1.1.0` 版本
++ 【2023.11.09】 `dev` 分支已经支持 `openai=1.2.0` 版本
 
 
 + 【2023.11.03】 支持 `chatglm3` 和 `qwen` 模型的 `function call` 调用功能，同时支持流式和非流式模式, [工具使用示例](https://github.com/xusenlinzy/api-for-open-llm/tree/master/examples/chatglm3/tool_using.py), 网页 `demo` 已经集成到 [streamlit-demo](./streamlit-demo)
@@ -148,30 +148,45 @@ streamlit run streamlit_app.py
 
 ![img.png](images/demo.png)
 
-### [openai](https://github.com/openai/openai-python)
+### [openai v1.1.0](https://github.com/openai/openai-python)
 
 <details>
 <summary>👉 Chat Completions</summary>
 
 ```python
-import openai
+from openai import OpenAI
 
-openai.api_base = "http://192.168.0.xx:80/v1"
-
-# Enter any non-empty API key to pass the client library's check.
-openai.api_key = "xxx"
+client = OpenAI(
+    api_key="EMPTY",
+    base_url="http://192.168.20.59:7891/v1/",
+)
 
-# Enter any non-empty model name to pass the client library's check.
-completion = openai.ChatCompletion.create(
-    model="chatglm-6b",
+# Chat completion API
+chat_completion = client.chat.completions.create(
     messages=[
-        {"role": "user", "content": "你好"},
+        {
+            "role": "user",
+            "content": "你好",
+        }
     ],
-    stream=False,
+    model="gpt-3.5-turbo",
 )
-
-print(completion.choices[0].message.content)
-# 你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。
+print(chat_completion)
+# 你好👋！我是人工智能助手 ChatGLM3-6B，很高兴见到你，欢迎问我任何问题。
+
+
+# stream = client.chat.completions.create(
+#     messages=[
+#         {
+#             "role": "user",
+#             "content": "感冒了怎么办",
+#         }
+#     ],
+#     model="gpt-3.5-turbo",
+#     stream=True,
+# )
+# for part in stream:
+#     print(part.choices[0].delta.content or "", end="", flush=True)
 ```
 
 </details>
@@ -180,17 +195,20 @@ print(completion.choices[0].message.content)
 <summary>👉 Completions</summary>
 
 ```python
-import openai
-
-openai.api_base = "http://192.168.0.xx:80/v1"
+from openai import OpenAI
 
-# Enter any non-empty API key to pass the client library's check.
-openai.api_key = "xxx"
+client = OpenAI(
+    api_key="EMPTY",
+    base_url="http://192.168.20.59:7891/v1/",
+)
 
-# Enter any non-empty model name to pass the client library's check.
-completion = openai.Completion.create(prompt="你好", model="chatglm-6b")
 
-print(completion.choices[0].text)
+# Chat completion API
+completion = client.completions.create(
+    model="gpt-3.5-turbo",
+    prompt="你好",
+)
+print(completion)
 # 你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。
 ```
 
@@ -200,82 +218,29 @@ print(completion.choices[0].text)
 <summary>👉 Embeddings</summary>
 
 ```python
-import openai
+from openai import OpenAI
 
-openai.api_base = "http://192.168.0.xx:80/v1"
+client = OpenAI(
+    api_key="EMPTY",
+    base_url="http://192.168.20.59:7891/v1/",
+)
 
-# Enter any non-empty API key to pass the client library's check.
-openai.api_key = "xxx"
 
 # compute the embedding of the text
-embedding = openai.Embedding.create(
-    input="什么是chatgpt？", 
-    model="text2vec-large-chinese"
+embedding = client.embeddings.create(
+    input="你好",
+    model="text-embedding-ada-002"
 )
+print(embedding)
 
-print(embedding['data'][0]['embedding'])
-```
-
-</details>
-
-### [langchain](https://github.com/hwchase17/langchain)
-
-<details>
-<summary>👉 Chat Completions</summary>
-
-```python
-import os
-
-os.environ["OPENAI_API_BASE"] = "http://192.168.0.xx:80/v1"
-os.environ["OPENAI_API_KEY"] = "xxx"
-
-from langchain.chat_models import ChatOpenAI
-from langchain.schema import HumanMessage
-
-chat = ChatOpenAI()
-print(chat([HumanMessage(content="你好")]))
-# content='你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。' additional_kwargs={}
-```
-</details>
-
-<details>
-<summary>👉 Completions</summary>
-
-```python
-import os
-
-os.environ["OPENAI_API_BASE"] = "http://192.168.0.xx:80/v1"
-os.environ["OPENAI_API_KEY"] = "xxx"
-
-from langchain.llms import OpenAI
-
-llm = OpenAI()
-print(llm("你好"))
-# 你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。
 ```
 
 </details>
 
-<details>
-<summary>👉 Embeddings</summary>
-
-```python
-import os
-
-os.environ["OPENAI_API_BASE"] = "http://192.168.0.xx:80/v1"
-os.environ["OPENAI_API_KEY"] = "xxx"
-
-from langchain.embeddings import OpenAIEmbeddings
-
-embeddings = OpenAIEmbeddings()
-query_result = embeddings.embed_query("什么是chatgpt？")
-print(query_result)
-```
-</details>
 
 ### 可接入的项目
 
-**通过修改上面的 `OPENAI_API_BASE` 环境变量，大部分的 `chatgpt` 应用和前后端项目都可以无缝衔接！**
+**通过修改 `OPENAI_API_BASE` 环境变量，大部分的 `chatgpt` 应用和前后端项目都可以无缝衔接！**
 
 + [ChatGPT-Next-Web: One-Click to deploy well-designed ChatGPT web UI on Vercel](https://github.com/Yidadaa/ChatGPT-Next-Web)
 

diff --git a/api/apapter/template.py b/api/apapter/template.py
@@ -1,7 +1,7 @@
 from functools import lru_cache
-from typing import List, Optional, Union, Dict
+from typing import List, Optional, Dict
 
-from api.utils.protocol import ChatMessage
+from openai.types.chat import ChatCompletionMessageParam
 
 
 @lru_cache
@@ -32,14 +32,14 @@ def match(self, name) -> bool:
 
     def apply_chat_template(
         self,
-        conversation: List[Union[Dict[str, str], ChatMessage]],
+        conversation: List[ChatCompletionMessageParam],
         add_generation_prompt: bool = True,
     ) -> str:
         """
         Converts a Conversation object or a list of dictionaries with `"role"` and `"content"` keys to a prompt.
 
         Args:
-            conversation (List[Union[Dict[str, str], ChatMessage]]): A Conversation object or list of dicts
+            conversation (List[ChatCompletionMessageParam]): A Conversation object or list of dicts
                 with "role" and "content" keys, representing the chat history so far.
             add_generation_prompt (bool, *optional*): Whether to end the prompt with the token(s) that indicate
                 the start of an assistant message. This is useful when you want to generate a response from the model.
@@ -49,10 +49,6 @@ def apply_chat_template(
         Returns:
             `str`: A prompt, which is ready to pass to the tokenizer.
         """
-
-        if isinstance(conversation[0], ChatMessage):
-            conversation = [c.dict(exclude_none=True) for c in conversation]
-
         # Compilation function uses a cache to avoid recompiling the same template
         compiled_template = _compile_jinja_template(self.template)
 

diff --git a/api/generation/baichuan.py b/api/generation/baichuan.py
@@ -1,14 +1,15 @@
 from typing import List
 
-from transformers import PreTrainedTokenizer
+from openai.types.chat import ChatCompletionMessageParam
 
 from api.generation.utils import parse_messages
-from api.utils.protocol import Role, ChatMessage
+from api.utils.protocol import Role
+from transformers import PreTrainedTokenizer
 
 
 def build_baichuan_chat_input(
     tokenizer: PreTrainedTokenizer,
-    messages: List[ChatMessage],
+    messages: List[ChatCompletionMessageParam],
     context_len: int = 4096,
     max_new_tokens: int = 256
 ) -> List[int]:
@@ -22,11 +23,11 @@ def build_baichuan_chat_input(
     for r in rounds[::-1]:
         round_tokens = []
         for message in r:
-            if message.role == Role.USER:
+            if message["role"] == Role.USER:
                 round_tokens.append(195)
             else:
                 round_tokens.append(196)
-            round_tokens.extend(tokenizer.encode(message.content))
+            round_tokens.extend(tokenizer.encode(message["content"]))
 
         if len(history_tokens) == 0 or len(history_tokens) + len(round_tokens) <= max_history_tokens:
             history_tokens = round_tokens + history_tokens  # concat left
@@ -35,7 +36,7 @@ def build_baichuan_chat_input(
         break
 
     input_tokens = system_tokens + history_tokens
-    if messages[-1].role != Role.ASSISTANT:
+    if messages[-1]["role"] != Role.ASSISTANT:
         input_tokens.append(196)
 
     return input_tokens[-max_input_tokens:]  # truncate left

diff --git a/api/generation/chatglm.py b/api/generation/chatglm.py
@@ -5,10 +5,11 @@
 
 import torch
 from loguru import logger
+from openai.types.chat import ChatCompletionMessageParam
 from transformers.generation.logits_process import LogitsProcessor
 
 from api.generation.utils import apply_stopping_strings
-from api.utils.protocol import Role, ChatMessage
+from api.utils.protocol import Role
 
 
 class InvalidScoreLogitsProcessor(LogitsProcessor):
@@ -77,7 +78,7 @@ def generate_stream_chatglm(
     context_len=2048,
     stream_interval=2,
 ):
-    prompt = params["prompt"]
+    prompt = params["messages"]
     temperature = float(params.get("temperature", 1.0))
     repetition_penalty = float(params.get("repetition_penalty", 1.0))
     top_p = float(params.get("top_p", 1.0))
@@ -147,8 +148,8 @@ def generate_stream_chatglm_v3(
     context_len=2048,
     stream_interval=2,
 ):
-    prompt: List[ChatMessage] = params["prompt"]
-    functions = params["functions"]
+    prompt: List[ChatCompletionMessageParam] = params["prompt"]
+    functions = params.get("functions", None)
     temperature = float(params.get("temperature", 1.0))
     repetition_penalty = float(params.get("repetition_penalty", 1.0))
     top_p = float(params.get("top_p", 1.0))
@@ -225,7 +226,7 @@ def generate_stream_chatglm_v3(
     torch.cuda.empty_cache()
 
 
-def process_chatglm_messages(messages: List[ChatMessage], functions: Union[dict, List[dict]] = None) -> List[dict]:
+def process_chatglm_messages(messages: List[ChatCompletionMessageParam], functions: Union[dict, List[dict]] = None) -> List[dict]:
     _messages = messages
     messages = []
 
@@ -239,10 +240,10 @@ def process_chatglm_messages(messages: List[ChatMessage], functions: Union[dict,
         )
 
     for m in _messages:
-        role, content, func_call = m.role, m.content, m.function_call
+        role, content = m["role"], m["content"]
+        func_call = m.get("function_call", None)
         if role == Role.FUNCTION:
             messages.append({"role": "observation", "content": content})
-
         elif role == Role.ASSISTANT and func_call is not None:
             for response in content.split("<|assistant|>"):
                 metadata, sub_content = response.split("\n", maxsplit=1)