Skip to content

Commit 15c694b

Browse files
ahartelchoprahetarth
authored andcommitted
[Test]: Hermes tool parser stream output error in Qwen3 case (vllm-project#25203)
Signed-off-by: Andreas Hartel <andreas.hartel@aleph-alpha.com>
1 parent bf41ca3 commit 15c694b

File tree

1 file changed

+203
-6
lines changed

1 file changed

+203
-6
lines changed

tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py

Lines changed: 203 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55

66
import pytest
77

8+
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
9+
from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import (
10+
Hermes2ProToolParser)
11+
from vllm.transformers_utils.tokenizer import AnyTokenizer
12+
813
from ....utils import RemoteOpenAIServer
914

1015
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
@@ -37,7 +42,7 @@
3742
},
3843
"unit": {
3944
"type": "string",
40-
"enum": ["celsius", "fahrenheit"]
45+
"enum": ["celsius", "fahrenheit"],
4146
},
4247
},
4348
"required": ["location"],
@@ -75,7 +80,7 @@
7580
"user",
7681
"content":
7782
"Hi! Do you have any detailed information about the product id "
78-
"7355608 and inserted true?"
83+
"7355608 and inserted true?",
7984
}]
8085

8186

@@ -144,8 +149,8 @@ async def test_streaming_tool_call():
144149
if tool_chunk.function.name:
145150
tool_call_chunks[index]["name"] += tool_chunk.function.name
146151
if tool_chunk.function.arguments:
147-
tool_call_chunks[index][
148-
"arguments"] += tool_chunk.function.arguments
152+
tool_call_chunks[index]["arguments"] += (
153+
tool_chunk.function.arguments)
149154

150155
assert len(tool_call_chunks) == 1
151156
reconstructed_tool_call = tool_call_chunks[0]
@@ -234,8 +239,8 @@ async def test_streaming_product_tool_call():
234239
if tool_chunk.function.name:
235240
tool_call_chunks[index]["name"] += tool_chunk.function.name
236241
if tool_chunk.function.arguments:
237-
tool_call_chunks[index][
238-
"arguments"] += tool_chunk.function.arguments
242+
tool_call_chunks[index]["arguments"] += (
243+
tool_chunk.function.arguments)
239244

240245
assert len(tool_call_chunks) == 1
241246
reconstructed_tool_call = tool_call_chunks[0]
@@ -258,3 +263,195 @@ async def test_streaming_product_tool_call():
258263
print("\n[Streaming Product Test Passed]")
259264
print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}")
260265
print(f"Reconstructed Arguments: {arguments}")
266+
267+
268+
@pytest.fixture
269+
def qwen_tokenizer() -> AnyTokenizer:
270+
from vllm.transformers_utils.tokenizer import get_tokenizer
271+
272+
return get_tokenizer("Qwen/Qwen3-32B")
273+
274+
275+
@pytest.fixture
276+
def hermes_parser(qwen_tokenizer: AnyTokenizer) -> Hermes2ProToolParser:
277+
return Hermes2ProToolParser(qwen_tokenizer)
278+
279+
280+
@pytest.fixture
281+
def any_chat_request() -> ChatCompletionRequest:
282+
return ChatCompletionRequest(
283+
seed=42,
284+
model="Qwen/Qwen3-32B",
285+
messages=[],
286+
)
287+
288+
289+
def test_hermes_parser_streaming_just_forward_text(
290+
qwen_tokenizer: AnyTokenizer,
291+
hermes_parser: Hermes2ProToolParser,
292+
any_chat_request: ChatCompletionRequest,
293+
) -> None:
294+
text = (
295+
"""This is some prior text that has nothing to do with tool calling."""
296+
)
297+
tokens = qwen_tokenizer.encode(text)
298+
previous_text = ""
299+
delta_messages = []
300+
for token in tokens:
301+
delta_text = qwen_tokenizer.decode([token])
302+
current_text = previous_text + delta_text
303+
delta = hermes_parser.extract_tool_calls_streaming(
304+
previous_text=previous_text,
305+
current_text=current_text,
306+
delta_text=delta_text,
307+
previous_token_ids=[],
308+
current_token_ids=[],
309+
delta_token_ids=[],
310+
request=any_chat_request,
311+
)
312+
previous_text = current_text
313+
delta_messages.append(delta)
314+
315+
for delta in delta_messages:
316+
assert delta is not None
317+
assert not delta.tool_calls
318+
319+
print(delta_messages)
320+
assert "".join([delta.content for delta in delta_messages]) == text
321+
322+
323+
def test_hermes_parser_streaming_failure_case_bug_19056(
324+
qwen_tokenizer: AnyTokenizer,
325+
hermes_parser: Hermes2ProToolParser,
326+
any_chat_request: ChatCompletionRequest,
327+
) -> None:
328+
text = """<tool_call>
329+
{"name": "final_answer", "arguments": {"trigger": true}}
330+
</tool_call>"""
331+
tokens = qwen_tokenizer.encode(text)
332+
previous_text = ""
333+
delta_messages = []
334+
for token in tokens:
335+
text = qwen_tokenizer.decode([token])
336+
current_text = previous_text + text
337+
delta = hermes_parser.extract_tool_calls_streaming(
338+
previous_text=previous_text,
339+
current_text=current_text,
340+
delta_text=text,
341+
previous_token_ids=[],
342+
current_token_ids=[],
343+
delta_token_ids=[],
344+
request=any_chat_request,
345+
)
346+
previous_text = current_text
347+
if delta is not None:
348+
delta_messages.append(delta)
349+
350+
assert delta_messages[0].tool_calls[0].function.name == "final_answer"
351+
tool_call_args = "".join(delta.tool_calls[0].function.arguments or ""
352+
for delta in delta_messages)
353+
assert tool_call_args == '{"trigger": true}'
354+
355+
356+
def test_hermes_parser_streaming(
357+
qwen_tokenizer: AnyTokenizer,
358+
hermes_parser: Hermes2ProToolParser,
359+
any_chat_request: ChatCompletionRequest,
360+
) -> None:
361+
text = '<tool_call>\
362+
{"name": "get_current_temperature",\
363+
"arguments": {"location":\
364+
"San Francisco, California, United States", "unit": "celsius"}}\
365+
</tool_call>'
366+
367+
tokens = qwen_tokenizer.encode(text)
368+
previous_text = ""
369+
delta_messages = []
370+
for token in tokens:
371+
text = qwen_tokenizer.decode([token])
372+
current_text = previous_text + text
373+
delta = hermes_parser.extract_tool_calls_streaming(
374+
previous_text=previous_text,
375+
current_text=current_text,
376+
delta_text=text,
377+
previous_token_ids=[],
378+
current_token_ids=[],
379+
delta_token_ids=[],
380+
request=any_chat_request,
381+
)
382+
previous_text = current_text
383+
if delta is not None:
384+
delta_messages.append(delta)
385+
print(delta_messages)
386+
assert (delta_messages[0].tool_calls[0].function.name ==
387+
"get_current_temperature")
388+
tool_call_args = "".join(delta.tool_calls[0].function.arguments or ""
389+
for delta in delta_messages)
390+
assert tool_call_args == (
391+
'{"location":"San Francisco, California, United States", '
392+
'"unit": "celsius"}')
393+
394+
395+
def test_hermes_parser_non_streaming_no_tool_call(
396+
hermes_parser: Hermes2ProToolParser,
397+
any_chat_request: ChatCompletionRequest,
398+
) -> None:
399+
text = """This is not a tool call."""
400+
tool_call = hermes_parser.extract_tool_calls(
401+
model_output=text,
402+
request=any_chat_request,
403+
)
404+
405+
assert tool_call is not None
406+
assert not tool_call.tools_called
407+
408+
409+
def test_hermes_parser_non_streaming_tool_call_between_tags(
410+
hermes_parser: Hermes2ProToolParser,
411+
any_chat_request: ChatCompletionRequest,
412+
) -> None:
413+
text = """<tool_call>
414+
{"name": "final_answer", "arguments": {"trigger": true}}
415+
</tool_call>"""
416+
tool_call = hermes_parser.extract_tool_calls(
417+
model_output=text,
418+
request=any_chat_request,
419+
)
420+
421+
assert tool_call is not None
422+
assert tool_call.tools_called
423+
assert tool_call.tool_calls[0].function.name == "final_answer"
424+
assert tool_call.tool_calls[0].function.arguments == '{"trigger": true}'
425+
426+
427+
def test_hermes_parser_non_streaming_tool_call_until_eos(
428+
hermes_parser: Hermes2ProToolParser,
429+
any_chat_request: ChatCompletionRequest,
430+
) -> None:
431+
text = """<tool_call>
432+
{"name": "final_answer", "arguments": {"trigger": true}}"""
433+
tool_call = hermes_parser.extract_tool_calls(
434+
model_output=text,
435+
request=any_chat_request,
436+
)
437+
438+
assert tool_call is not None
439+
assert tool_call.tools_called
440+
assert tool_call.tool_calls[0].function.name == "final_answer"
441+
assert tool_call.tool_calls[0].function.arguments == '{"trigger": true}'
442+
443+
444+
def test_hermes_parser_non_streaming_tool_call_invalid_json(
445+
hermes_parser: Hermes2ProToolParser,
446+
any_chat_request: ChatCompletionRequest,
447+
) -> None:
448+
# Missing closing brace to trigger exception
449+
text = """<tool_call>
450+
{"name": "final_answer", "arguments": {"trigger": true}"""
451+
tool_call = hermes_parser.extract_tool_calls(
452+
model_output=text,
453+
request=any_chat_request,
454+
)
455+
456+
assert tool_call is not None
457+
assert not tool_call.tools_called

0 commit comments

Comments
 (0)