Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add missing bos_token to example templates #11432

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ steps:
source_file_dependencies:
- vllm/entrypoints
- examples/
- tests/examples/
commands:
- pip install tensorizer # for tensorizer test
- python3 offline_inference.py
Expand All @@ -202,6 +203,8 @@ steps:
- python3 offline_inference_embedding.py
- python3 offline_inference_scoring.py
- python3 offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
- cd /vllm-workspace/tests
- pytest -v -s examples

- label: Prefix Caching Test # 9min
mirror_hardwares: [amd]
Expand Down
1 change: 1 addition & 0 deletions examples/template_alpaca.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}

{% for message in messages %}
Expand Down
1 change: 1 addition & 0 deletions examples/template_baichuan.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}

{%- for message in messages -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_blip2.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token }}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'Question: ' + message['content'] + ' ' -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatglm.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- set counter = namespace(index=0) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatglm2.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- set counter = namespace(index=1) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatml.jinja
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
{{ bos_token -}}
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
1 change: 1 addition & 0 deletions examples/template_dse_qwen2_vl.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{% raw %}<|im_start|>system
You are a helpful assistant.<|im_end|>
{% endraw %}{% endif %}<|im_start|>{{ message['role'] }}{% raw %}
Expand Down
1 change: 1 addition & 0 deletions examples/template_falcon.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'User: ' + message['content'] -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_falcon_180b.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{{- 'System: ' + message['content'] -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_inkbot.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
<#meta#>
- Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
- Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}
Expand Down
1 change: 1 addition & 0 deletions examples/template_vlm2vec.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- if messages | length > 1 -%}
{{ raise_exception('Embedding models should only embed one message at a time') }}
{%- endif -%}
Expand Down
1 change: 1 addition & 0 deletions examples/tool_chat_template_granite.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token }}
{%- if tools %}
{{- '<|start_of_role|>available_tools<|end_of_role|>
' }}
Expand Down
1 change: 1 addition & 0 deletions examples/tool_chat_template_granite_20b_fc.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token }}
{%- macro json_to_python_type(json_spec) %}
{%- set basic_type_map = {
"string": "str",
Expand Down
Empty file added tests/examples/__init__.py
Empty file.
48 changes: 48 additions & 0 deletions tests/examples/test_jinja.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from pathlib import Path

import pytest
import transformers

jinja_paths = [
pytest.param(path, id=path.stem)
for path in sorted((Path(__file__).parent.parent.parent /
DarkLight1337 marked this conversation as resolved.
Show resolved Hide resolved
"examples").glob("*.jinja"))
]


@pytest.mark.parametrize("path", jinja_paths)
@pytest.mark.parametrize("num_messages", [1, 3])
def test_bos(path: Path, num_messages: int) -> None:
with path.open("r", encoding="utf-8") as f:
chat_template = f.read()
# We might guess an appropriate tokenizer model from the file name but we
# don't maintain such list.
# Use arbitrary BOS for testing. It doesn't have to match the str in the
# correct tokenizer.
bos_token = "=BOS="
tokenizer = transformers.PreTrainedTokenizerBase(
chat_template=chat_template, bos_token=bos_token, eos_token="=EOS=")
conversation = [
{
"role": "user",
"content": "1"
},
{
"role": "assistant",
"content": "2"
},
{
"role": "user",
"content": "3"
},
][:num_messages]
try:
prompt: str = tokenizer.apply_chat_template(conversation=conversation,
tokenize=False)
except Exception as e:
if str(e
) == "Embedding models should only embed one message at a time":
pytest.skip(reason=str(e))
raise
assert prompt.startswith(bos_token)
assert prompt.count(bos_token) == 1
Loading