Skip to content

Commit 995a1a1

Browse files
author
xusenlin
committed
tiny fix
1 parent 7389089 commit 995a1a1

File tree

2 files changed

+48
-37
lines changed

2 files changed

+48
-37
lines changed

api/adapter/template.py

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
@lru_cache
12-
def _compile_jinja_template(chat_template):
12+
def _compile_jinja_template(chat_template: str):
1313
try:
1414
from jinja2.exceptions import TemplateError
1515
from jinja2.sandbox import ImmutableSandboxedEnvironment
@@ -64,8 +64,15 @@ def apply_chat_template(
6464
return rendered
6565

6666
@property
67-
def template(self):
68-
raise NotImplementedError
67+
def template(self) -> str:
68+
return (
69+
"{% for message in messages %}"
70+
"{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}"
71+
"{% endfor %}"
72+
"{% if add_generation_prompt %}"
73+
"{{ '<|im_start|>assistant\\n' }}"
74+
"{% endif %}"
75+
)
6976

7077
def postprocess_messages(
7178
self,
@@ -80,8 +87,8 @@ def parse_assistant_response(
8087
output: str,
8188
functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
8289
tools: Optional[List[Dict[str, Any]]] = None,
83-
) -> Tuple[str, Union[str, Dict[str, Any]]]:
84-
raise NotImplementedError
90+
) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
91+
return output, None
8592

8693

8794
# A global registry for all prompt adapters
@@ -119,7 +126,7 @@ class QwenTemplate(BaseTemplate):
119126
function_call_available = True
120127

121128
@property
122-
def template(self):
129+
def template(self) -> str:
123130
""" This template formats inputs in the standard ChatML format. See
124131
https://github.com/openai/openai-python/blob/main/chatml.md
125132
"""
@@ -138,7 +145,7 @@ def parse_assistant_response(
138145
output: str,
139146
functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
140147
tools: Optional[List[Dict[str, Any]]] = None,
141-
) -> Tuple[str, Union[str, Dict[str, Any]]]:
148+
) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
142149
func_name, func_args = "", ""
143150
i = output.rfind("\nAction:")
144151
j = output.rfind("\nAction Input:")
@@ -177,6 +184,7 @@ def parse_assistant_response(
177184

178185

179186
class Llama2Template(BaseTemplate):
187+
180188
name = "llama2"
181189
system_prompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe." \
182190
"Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content." \
@@ -189,7 +197,7 @@ class Llama2Template(BaseTemplate):
189197
}
190198

191199
@property
192-
def template(self):
200+
def template(self) -> str:
193201
"""
194202
LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
195203
Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
@@ -257,7 +265,7 @@ def match(self, name) -> bool:
257265
return name == "chatglm"
258266

259267
@property
260-
def template(self):
268+
def template(self) -> str:
261269
""" The output should look something like:
262270
263271
[Round 0]
@@ -292,7 +300,7 @@ def match(self, name) -> bool:
292300
return name == "chatglm2"
293301

294302
@property
295-
def template(self):
303+
def template(self) -> str:
296304
""" The output should look something like:
297305
298306
[Round 1]
@@ -337,7 +345,7 @@ def match(self, name) -> bool:
337345
return name == "chatglm3"
338346

339347
@property
340-
def template(self):
348+
def template(self) -> str:
341349
"""
342350
The reference for this chat template is [this code
343351
snippet](https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py)
@@ -410,7 +418,7 @@ def parse_assistant_response(
410418
output: str,
411419
functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
412420
tools: Optional[List[Dict[str, Any]]] = None,
413-
) -> Tuple[str, Union[str, Dict[str, Any]]]:
421+
) -> Tuple[str, Optional[Union[str, Dict[str, Any]]]]:
414422
content = ""
415423
for response in output.split("<|assistant|>"):
416424
if "\n" in response:
@@ -471,7 +479,7 @@ class MossTemplate(BaseTemplate):
471479
}
472480

473481
@property
474-
def template(self):
482+
def template(self) -> str:
475483
""" The output should look something like:
476484
477485
<|Human|>: {Prompt}<eoh>
@@ -501,7 +509,7 @@ class PhoenixTemplate(BaseTemplate):
501509
allow_models = ["phoenix"]
502510

503511
@property
504-
def template(self):
512+
def template(self) -> str:
505513
""" The output should look something like:
506514
507515
Human: <s>{Prompt}</s>Assistant: <s>{Answer}</s>
@@ -536,7 +544,7 @@ class AlpacaTemplate(BaseTemplate):
536544
}
537545

538546
@property
539-
def template(self):
547+
def template(self) -> str:
540548
""" The output should look something like:
541549
542550
### Instruction:
@@ -573,7 +581,7 @@ class FireflyTemplate(BaseTemplate):
573581
allow_models = ["firefly"]
574582

575583
@property
576-
def template(self):
584+
def template(self) -> str:
577585
""" The output should look something like:
578586
579587
<s>{Prompt}</s>{Answer}</s>{Prompt}</s>
@@ -597,7 +605,7 @@ class FireflyForQwenTemplate(BaseTemplate):
597605
allow_models = ["firefly-qwen"]
598606

599607
@property
600-
def template(self):
608+
def template(self) -> str:
601609
""" The output should look something like:
602610
603611
<|endoftext|>{Prompt}<|endoftext|>{Answer}<|endoftext|>{Prompt}<|endoftext|>
@@ -620,7 +628,7 @@ class BelleTemplate(BaseTemplate):
620628
allow_models = ["belle"]
621629

622630
@property
623-
def template(self):
631+
def template(self) -> str:
624632
""" The output should look something like:
625633
626634
Human: {Prompt}
@@ -658,7 +666,7 @@ class OpenBuddyTemplate(BaseTemplate):
658666
"""
659667

660668
@property
661-
def template(self):
669+
def template(self) -> str:
662670
""" The output should look something like:
663671
664672
User: {Prompt}
@@ -692,7 +700,7 @@ class InternLMTemplate(BaseTemplate):
692700
}
693701

694702
@property
695-
def template(self):
703+
def template(self) -> str:
696704
""" The output should look something like:
697705
698706
<s><|User|>:{Prompt}<eoh>
@@ -721,7 +729,7 @@ class BaiChuanTemplate(BaseTemplate):
721729
}
722730

723731
@property
724-
def template(self):
732+
def template(self) -> str:
725733
""" The output should look something like:
726734
727735
<reserved_102>{Prompt}<reserved_103>{Answer}<reserved_102>{Prompt}<reserved_103>
@@ -747,7 +755,7 @@ class BaiChuan2Template(BaseTemplate):
747755
}
748756

749757
@property
750-
def template(self):
758+
def template(self) -> str:
751759
""" The output should look something like:
752760
753761
<reserved_106>{Prompt}<reserved_107>{Answer}<reserved_106>{Prompt}<reserved_107>
@@ -773,7 +781,7 @@ class StarChatTemplate(BaseTemplate):
773781
}
774782

775783
@property
776-
def template(self):
784+
def template(self) -> str:
777785
""" The output should look something like:
778786
779787
<|user|>
@@ -809,7 +817,7 @@ class AquilaChatTemplate(BaseTemplate):
809817
}
810818

811819
@property
812-
def template(self):
820+
def template(self) -> str:
813821
""" The output should look something like:
814822
815823
Human: {Prompt}###
@@ -850,7 +858,7 @@ class OctopackTemplate(BaseTemplate):
850858
allow_models = ["starcoder-self-instruct"]
851859

852860
@property
853-
def template(self):
861+
def template(self) -> str:
854862
""" The output should look something like:
855863
856864
Question:{Prompt}
@@ -878,7 +886,7 @@ class XverseTemplate(BaseTemplate):
878886
allow_models = ["xverse"]
879887

880888
@property
881-
def template(self):
889+
def template(self) -> str:
882890
""" The output should look something like:
883891
884892
Human: {Prompt}
@@ -905,7 +913,7 @@ class VicunaTemplate(BaseTemplate):
905913
allow_models = ["vicuna", "xwin"]
906914

907915
@property
908-
def template(self):
916+
def template(self) -> str:
909917
""" The output should look something like:
910918
911919
USER: {Prompt} ASSISTANT: {Answer}</s>USER: {Prompt} ASSISTANT:
@@ -933,7 +941,7 @@ class XuanYuanTemplate(BaseTemplate):
933941
allow_models = ["xuanyuan"]
934942

935943
@property
936-
def template(self):
944+
def template(self) -> str:
937945
""" The output should look something like:
938946
939947
Human: {Prompt} Assistant: {Answer}</s>Human: {Prompt} Assistant:
@@ -964,7 +972,7 @@ class PhindTemplate(BaseTemplate):
964972
}
965973

966974
@property
967-
def template(self):
975+
def template(self) -> str:
968976
return (
969977
"{% if messages[0]['role'] == 'system' %}"
970978
"{{ messages[0]['content'] }}"
@@ -1001,7 +1009,7 @@ def match(self, name) -> bool:
10011009
return name == "deepseek-coder"
10021010

10031011
@property
1004-
def template(self):
1012+
def template(self) -> str:
10051013
return (
10061014
"{% if messages[0]['role'] == 'system' %}"
10071015
"{{ messages[0]['content'] }}"
@@ -1028,7 +1036,7 @@ class DeepseekTemplate(BaseTemplate):
10281036
}
10291037

10301038
@property
1031-
def template(self):
1039+
def template(self) -> str:
10321040
return (
10331041
"{{ '<|begin▁of▁sentence|>' }}"
10341042
"{% for message in messages %}"
@@ -1052,7 +1060,7 @@ class BlueLMTemplate(BaseTemplate):
10521060
}
10531061

10541062
@property
1055-
def template(self):
1063+
def template(self) -> str:
10561064
return (
10571065
"{% for message in messages %}"
10581066
"{% if message['role'] == 'system' %}"
@@ -1072,7 +1080,7 @@ class ZephyrTemplate(BaseTemplate):
10721080
allow_models = ["zephyr"]
10731081

10741082
@property
1075-
def template(self):
1083+
def template(self) -> str:
10761084
return (
10771085
"{% for message in messages %}"
10781086
"{% if message['role'] == 'system' %}"
@@ -1100,7 +1108,7 @@ class HuatuoTemplate(BaseTemplate):
11001108
}
11011109

11021110
@property
1103-
def template(self):
1111+
def template(self) -> str:
11041112
return (
11051113
"{% if messages[0]['role'] == 'system' %}"
11061114
"{{ messages[0]['content'] }}"
@@ -1129,7 +1137,7 @@ class OrionStarTemplate(BaseTemplate):
11291137
}
11301138

11311139
@property
1132-
def template(self):
1140+
def template(self) -> str:
11331141
return (
11341142
"{{ '<|startoftext|>' }}"
11351143
"{% for message in messages %}"
@@ -1153,7 +1161,7 @@ class YiAITemplate(BaseTemplate):
11531161
}
11541162

11551163
@property
1156-
def template(self):
1164+
def template(self) -> str:
11571165
return (
11581166
"{% for message in messages %}"
11591167
"{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}"
@@ -1203,6 +1211,6 @@ def template(self):
12031211
{"role": "assistant", "content": "I'm doing great. How can I help you today?"},
12041212
{"role": "user", "content": "I'd like to show off how chat templating works!"},
12051213
]
1206-
template = get_prompt_adapter(prompt_name="deepseek")
1214+
template = get_prompt_adapter(prompt_name="yi")
12071215
messages = template.postprocess_messages(chat)
12081216
print(template.apply_chat_template(messages))

api/vllm_routes/completion.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ async def create_completion(
6464
request.max_tokens = request.max_tokens or 128
6565
request, stop_token_ids = await handle_request(request, engine.prompt_adapter.stop, chat=False)
6666

67+
if isinstance(request.prompt, list):
68+
request.prompt = request.prompt[0]
69+
6770
params = request.model_dump()
6871
params.update(dict(stop_token_ids=stop_token_ids, prompt_or_messages=request.prompt))
6972
logger.debug(f"==== request ====\n{params}")

0 commit comments

Comments
 (0)