55
66import  pytest 
77
8+ from  vllm .entrypoints .openai .protocol  import  ChatCompletionRequest 
9+ from  vllm .entrypoints .openai .tool_parsers .hermes_tool_parser  import  (
10+     Hermes2ProToolParser )
11+ from  vllm .transformers_utils .tokenizer  import  AnyTokenizer 
12+ 
813from  ....utils  import  RemoteOpenAIServer 
914
1015MODEL_NAME  =  "meta-llama/Llama-3.2-1B-Instruct" 
3742                },
3843                "unit" : {
3944                    "type" : "string" ,
40-                     "enum" : ["celsius" , "fahrenheit" ]
45+                     "enum" : ["celsius" , "fahrenheit" ], 
4146                },
4247            },
4348            "required" : ["location" ],
7580    "user" ,
7681    "content" :
7782    "Hi! Do you have any detailed information about the product id " 
78-     "7355608 and inserted true?" 
83+     "7355608 and inserted true?" , 
7984}]
8085
8186
@@ -144,8 +149,8 @@ async def test_streaming_tool_call():
144149                if  tool_chunk .function .name :
145150                    tool_call_chunks [index ]["name" ] +=  tool_chunk .function .name 
146151                if  tool_chunk .function .arguments :
147-                     tool_call_chunks [index ][
148-                         "arguments" ]  +=   tool_chunk .function .arguments 
152+                     tool_call_chunks [index ]["arguments" ]  +=  ( 
153+                         tool_chunk .function .arguments ) 
149154
150155        assert  len (tool_call_chunks ) ==  1 
151156        reconstructed_tool_call  =  tool_call_chunks [0 ]
@@ -234,8 +239,8 @@ async def test_streaming_product_tool_call():
234239                if  tool_chunk .function .name :
235240                    tool_call_chunks [index ]["name" ] +=  tool_chunk .function .name 
236241                if  tool_chunk .function .arguments :
237-                     tool_call_chunks [index ][
238-                         "arguments" ]  +=   tool_chunk .function .arguments 
242+                     tool_call_chunks [index ]["arguments" ]  +=  ( 
243+                         tool_chunk .function .arguments ) 
239244
240245        assert  len (tool_call_chunks ) ==  1 
241246        reconstructed_tool_call  =  tool_call_chunks [0 ]
@@ -258,3 +263,195 @@ async def test_streaming_product_tool_call():
258263        print ("\n [Streaming Product Test Passed]" )
259264        print (f"Reconstructed Tool Call: { reconstructed_tool_call ['name' ]}  )
260265        print (f"Reconstructed Arguments: { arguments }  )
266+ 
267+ 
268+ @pytest .fixture  
269+ def  qwen_tokenizer () ->  AnyTokenizer :
270+     from  vllm .transformers_utils .tokenizer  import  get_tokenizer 
271+ 
272+     return  get_tokenizer ("Qwen/Qwen3-32B" )
273+ 
274+ 
275+ @pytest .fixture  
276+ def  hermes_parser (qwen_tokenizer : AnyTokenizer ) ->  Hermes2ProToolParser :
277+     return  Hermes2ProToolParser (qwen_tokenizer )
278+ 
279+ 
280+ @pytest .fixture  
281+ def  any_chat_request () ->  ChatCompletionRequest :
282+     return  ChatCompletionRequest (
283+         seed = 42 ,
284+         model = "Qwen/Qwen3-32B" ,
285+         messages = [],
286+     )
287+ 
288+ 
289+ def  test_hermes_parser_streaming_just_forward_text (
290+     qwen_tokenizer : AnyTokenizer ,
291+     hermes_parser : Hermes2ProToolParser ,
292+     any_chat_request : ChatCompletionRequest ,
293+ ) ->  None :
294+     text  =  (
295+         """This is some prior text that has nothing to do with tool calling.""" 
296+     )
297+     tokens  =  qwen_tokenizer .encode (text )
298+     previous_text  =  "" 
299+     delta_messages  =  []
300+     for  token  in  tokens :
301+         delta_text  =  qwen_tokenizer .decode ([token ])
302+         current_text  =  previous_text  +  delta_text 
303+         delta  =  hermes_parser .extract_tool_calls_streaming (
304+             previous_text = previous_text ,
305+             current_text = current_text ,
306+             delta_text = delta_text ,
307+             previous_token_ids = [],
308+             current_token_ids = [],
309+             delta_token_ids = [],
310+             request = any_chat_request ,
311+         )
312+         previous_text  =  current_text 
313+         delta_messages .append (delta )
314+ 
315+     for  delta  in  delta_messages :
316+         assert  delta  is  not None 
317+         assert  not  delta .tool_calls 
318+ 
319+     print (delta_messages )
320+     assert  "" .join ([delta .content  for  delta  in  delta_messages ]) ==  text 
321+ 
322+ 
323+ def  test_hermes_parser_streaming_failure_case_bug_19056 (
324+     qwen_tokenizer : AnyTokenizer ,
325+     hermes_parser : Hermes2ProToolParser ,
326+     any_chat_request : ChatCompletionRequest ,
327+ ) ->  None :
328+     text  =  """<tool_call> 
329+ {"name": "final_answer", "arguments": {"trigger": true}} 
330+ </tool_call>""" 
331+     tokens  =  qwen_tokenizer .encode (text )
332+     previous_text  =  "" 
333+     delta_messages  =  []
334+     for  token  in  tokens :
335+         text  =  qwen_tokenizer .decode ([token ])
336+         current_text  =  previous_text  +  text 
337+         delta  =  hermes_parser .extract_tool_calls_streaming (
338+             previous_text = previous_text ,
339+             current_text = current_text ,
340+             delta_text = text ,
341+             previous_token_ids = [],
342+             current_token_ids = [],
343+             delta_token_ids = [],
344+             request = any_chat_request ,
345+         )
346+         previous_text  =  current_text 
347+         if  delta  is  not None :
348+             delta_messages .append (delta )
349+ 
350+     assert  delta_messages [0 ].tool_calls [0 ].function .name  ==  "final_answer" 
351+     tool_call_args  =  "" .join (delta .tool_calls [0 ].function .arguments  or  "" 
352+                              for  delta  in  delta_messages )
353+     assert  tool_call_args  ==  '{"trigger": true}' 
354+ 
355+ 
356+ def  test_hermes_parser_streaming (
357+     qwen_tokenizer : AnyTokenizer ,
358+     hermes_parser : Hermes2ProToolParser ,
359+     any_chat_request : ChatCompletionRequest ,
360+ ) ->  None :
361+     text  =  '<tool_call>\  
362+ \ 
363+ \ 
364+ \ 
365+ 
366+ 
367+     tokens  =  qwen_tokenizer .encode (text )
368+     previous_text  =  "" 
369+     delta_messages  =  []
370+     for  token  in  tokens :
371+         text  =  qwen_tokenizer .decode ([token ])
372+         current_text  =  previous_text  +  text 
373+         delta  =  hermes_parser .extract_tool_calls_streaming (
374+             previous_text = previous_text ,
375+             current_text = current_text ,
376+             delta_text = text ,
377+             previous_token_ids = [],
378+             current_token_ids = [],
379+             delta_token_ids = [],
380+             request = any_chat_request ,
381+         )
382+         previous_text  =  current_text 
383+         if  delta  is  not None :
384+             delta_messages .append (delta )
385+     print (delta_messages )
386+     assert  (delta_messages [0 ].tool_calls [0 ].function .name  == 
387+             "get_current_temperature" )
388+     tool_call_args  =  "" .join (delta .tool_calls [0 ].function .arguments  or  "" 
389+                              for  delta  in  delta_messages )
390+     assert  tool_call_args  ==  (
391+         '{"location":"San Francisco, California, United States", ' 
392+         '"unit": "celsius"}' )
393+ 
394+ 
395+ def  test_hermes_parser_non_streaming_no_tool_call (
396+     hermes_parser : Hermes2ProToolParser ,
397+     any_chat_request : ChatCompletionRequest ,
398+ ) ->  None :
399+     text  =  """This is not a tool call.""" 
400+     tool_call  =  hermes_parser .extract_tool_calls (
401+         model_output = text ,
402+         request = any_chat_request ,
403+     )
404+ 
405+     assert  tool_call  is  not None 
406+     assert  not  tool_call .tools_called 
407+ 
408+ 
409+ def  test_hermes_parser_non_streaming_tool_call_between_tags (
410+     hermes_parser : Hermes2ProToolParser ,
411+     any_chat_request : ChatCompletionRequest ,
412+ ) ->  None :
413+     text  =  """<tool_call> 
414+ {"name": "final_answer", "arguments": {"trigger": true}} 
415+ </tool_call>""" 
416+     tool_call  =  hermes_parser .extract_tool_calls (
417+         model_output = text ,
418+         request = any_chat_request ,
419+     )
420+ 
421+     assert  tool_call  is  not None 
422+     assert  tool_call .tools_called 
423+     assert  tool_call .tool_calls [0 ].function .name  ==  "final_answer" 
424+     assert  tool_call .tool_calls [0 ].function .arguments  ==  '{"trigger": true}' 
425+ 
426+ 
427+ def  test_hermes_parser_non_streaming_tool_call_until_eos (
428+     hermes_parser : Hermes2ProToolParser ,
429+     any_chat_request : ChatCompletionRequest ,
430+ ) ->  None :
431+     text  =  """<tool_call> 
432+ {"name": "final_answer", "arguments": {"trigger": true}}""" 
433+     tool_call  =  hermes_parser .extract_tool_calls (
434+         model_output = text ,
435+         request = any_chat_request ,
436+     )
437+ 
438+     assert  tool_call  is  not None 
439+     assert  tool_call .tools_called 
440+     assert  tool_call .tool_calls [0 ].function .name  ==  "final_answer" 
441+     assert  tool_call .tool_calls [0 ].function .arguments  ==  '{"trigger": true}' 
442+ 
443+ 
444+ def  test_hermes_parser_non_streaming_tool_call_invalid_json (
445+     hermes_parser : Hermes2ProToolParser ,
446+     any_chat_request : ChatCompletionRequest ,
447+ ) ->  None :
448+     # Missing closing brace to trigger exception 
449+     text  =  """<tool_call> 
450+ {"name": "final_answer", "arguments": {"trigger": true}""" 
451+     tool_call  =  hermes_parser .extract_tool_calls (
452+         model_output = text ,
453+         request = any_chat_request ,
454+     )
455+ 
456+     assert  tool_call  is  not None 
457+     assert  not  tool_call .tools_called 
0 commit comments