tiny fix

xusenlin · xusenlin · commit 995a1a124a6b · 2023-12-01T23:50:15.000+08:00
diff --git a/api/adapter/template.py b/api/adapter/template.py
@@ -9,7 +9,7 @@
 
 
 @lru_cache
-def _compile_jinja_template(chat_template):
+def _compile_jinja_template(chat_template: str):
     try:
         from jinja2.exceptions import TemplateError
         from jinja2.sandbox import ImmutableSandboxedEnvironment
@@ -64,8 +64,15 @@ def apply_chat_template(
         return rendered
 
     @property
-    def template(self):
-        raise NotImplementedError
+    def template(self) -> str:
+        return (
+            "{% for message in messages %}"
+            "{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}"
+            "{% endfor %}"
+            "{% if add_generation_prompt %}"
+            "{{ '<|im_start|>assistant\\n' }}"
+            "{% endif %}"
+        )
 
     def postprocess_messages(
         self,
@@ -80,8 +87,8 @@ def parse_assistant_response(
         output: str,
         functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
-    ) -> Tuple[str, Union[str, Dict[str, Any]]]:
-        raise NotImplementedError
+    ) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
+        return output, None
 
 
 # A global registry for all prompt adapters
@@ -119,7 +126,7 @@ class QwenTemplate(BaseTemplate):
     function_call_available = True
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ This template formats inputs in the standard ChatML format. See
         https://github.com/openai/openai-python/blob/main/chatml.md
         """
@@ -138,7 +145,7 @@ def parse_assistant_response(
         output: str,
         functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
-    ) -> Tuple[str, Union[str, Dict[str, Any]]]:
+    ) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
         func_name, func_args = "", ""
         i = output.rfind("\nAction:")
         j = output.rfind("\nAction Input:")
@@ -177,6 +184,7 @@ def parse_assistant_response(
 
 
 class Llama2Template(BaseTemplate):
+
     name = "llama2"
     system_prompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe." \
                     "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content." \
@@ -189,7 +197,7 @@ class Llama2Template(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """
         LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
         Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
@@ -257,7 +265,7 @@ def match(self, name) -> bool:
         return name == "chatglm"
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         [Round 0]
@@ -292,7 +300,7 @@ def match(self, name) -> bool:
         return name == "chatglm2"
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         [Round 1]
@@ -337,7 +345,7 @@ def match(self, name) -> bool:
         return name == "chatglm3"
 
     @property
-    def template(self):
+    def template(self) -> str:
         """
         The reference for this chat template is [this code
         snippet](https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py)
@@ -410,7 +418,7 @@ def parse_assistant_response(
         output: str,
         functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
-    ) -> Tuple[str, Union[str, Dict[str, Any]]]:
+    ) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
         content = ""
         for response in output.split("<|assistant|>"):
             if "\n" in response:
@@ -471,7 +479,7 @@ class MossTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <|Human|>: {Prompt}<eoh>
@@ -501,7 +509,7 @@ class PhoenixTemplate(BaseTemplate):
     allow_models = ["phoenix"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Human: <s>{Prompt}</s>Assistant: <s>{Answer}</s>
@@ -536,7 +544,7 @@ class AlpacaTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         ### Instruction:
@@ -573,7 +581,7 @@ class FireflyTemplate(BaseTemplate):
     allow_models = ["firefly"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <s>{Prompt}</s>{Answer}</s>{Prompt}</s>
@@ -597,7 +605,7 @@ class FireflyForQwenTemplate(BaseTemplate):
     allow_models = ["firefly-qwen"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <|endoftext|>{Prompt}<|endoftext|>{Answer}<|endoftext|>{Prompt}<|endoftext|>
@@ -620,7 +628,7 @@ class BelleTemplate(BaseTemplate):
     allow_models = ["belle"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Human: {Prompt}
@@ -658,7 +666,7 @@ class OpenBuddyTemplate(BaseTemplate):
 """
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         User: {Prompt}
@@ -692,7 +700,7 @@ class InternLMTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <s><|User|>:{Prompt}<eoh>
@@ -721,7 +729,7 @@ class BaiChuanTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <reserved_102>{Prompt}<reserved_103>{Answer}<reserved_102>{Prompt}<reserved_103>
@@ -747,7 +755,7 @@ class BaiChuan2Template(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <reserved_106>{Prompt}<reserved_107>{Answer}<reserved_106>{Prompt}<reserved_107>
@@ -773,7 +781,7 @@ class StarChatTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         <|user|>
@@ -809,7 +817,7 @@ class AquilaChatTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Human: {Prompt}###
@@ -850,7 +858,7 @@ class OctopackTemplate(BaseTemplate):
     allow_models = ["starcoder-self-instruct"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Question:{Prompt}
@@ -878,7 +886,7 @@ class XverseTemplate(BaseTemplate):
     allow_models = ["xverse"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Human: {Prompt}
@@ -905,7 +913,7 @@ class VicunaTemplate(BaseTemplate):
     allow_models = ["vicuna", "xwin"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         USER: {Prompt} ASSISTANT: {Answer}</s>USER: {Prompt} ASSISTANT:
@@ -933,7 +941,7 @@ class XuanYuanTemplate(BaseTemplate):
     allow_models = ["xuanyuan"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         """ The output should look something like:
 
         Human: {Prompt} Assistant: {Answer}</s>Human: {Prompt} Assistant:
@@ -964,7 +972,7 @@ class PhindTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% if messages[0]['role'] == 'system' %}"
             "{{ messages[0]['content'] }}"
@@ -1001,7 +1009,7 @@ def match(self, name) -> bool:
         return name == "deepseek-coder"
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% if messages[0]['role'] == 'system' %}"
             "{{ messages[0]['content'] }}"
@@ -1028,7 +1036,7 @@ class DeepseekTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{{ '<｜begin▁of▁sentence｜>' }}"
             "{% for message in messages %}"
@@ -1052,7 +1060,7 @@ class BlueLMTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% for message in messages %}"
             "{% if message['role'] == 'system' %}"
@@ -1072,7 +1080,7 @@ class ZephyrTemplate(BaseTemplate):
     allow_models = ["zephyr"]
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% for message in messages %}"
             "{% if message['role'] == 'system' %}"
@@ -1100,7 +1108,7 @@ class HuatuoTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% if messages[0]['role'] == 'system' %}"
             "{{ messages[0]['content'] }}"
@@ -1129,7 +1137,7 @@ class OrionStarTemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{{ '<|startoftext|>' }}"
             "{% for message in messages %}"
@@ -1153,7 +1161,7 @@ class YiAITemplate(BaseTemplate):
     }
 
     @property
-    def template(self):
+    def template(self) -> str:
         return (
             "{% for message in messages %}"
             "{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}"
@@ -1203,6 +1211,6 @@ def template(self):
         {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
         {"role": "user", "content": "I'd like to show off how chat templating works!"},
     ]
-    template = get_prompt_adapter(prompt_name="deepseek")
+    template = get_prompt_adapter(prompt_name="yi")
     messages = template.postprocess_messages(chat)
     print(template.apply_chat_template(messages))
diff --git a/api/vllm_routes/completion.py b/api/vllm_routes/completion.py
@@ -64,6 +64,9 @@ async def create_completion(
     request.max_tokens = request.max_tokens or 128
     request, stop_token_ids = await handle_request(request, engine.prompt_adapter.stop, chat=False)
 
+    if isinstance(request.prompt, list):
+        request.prompt = request.prompt[0]
+
     params = request.model_dump()
     params.update(dict(stop_token_ids=stop_token_ids, prompt_or_messages=request.prompt))
     logger.debug(f"==== request ====\n{params}")