Closed
Description
Your current environment
vllm serve ... --enable-auto-tool-choice --tool-call-parser hermes --enable-reasoning --reasoning-parser qwen3"
🐛 Describe the bug
The Qwen3 model supports switching between "think" and non-"think" modes. Therefore, if the output is in non-"think" mode and is streamed, the is_reasoning_end
function in Qwen3ReasoningParser
may return incorrect results, causing the function call parser to not be executed, and no function call result will be output.
Here is an example:
Function call will failed if we set stream=True and added "disable thinking" argument in request:
from openai import OpenAI
openai_api_base = "API_ENDPOINT"
openai_api_key = "API_KEY"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base + "/v1",
)
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
tools = [
{
"type": "function",
"function": {
"name": "get_current_temperature",
"description": "Get current temperature at a location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": 'The location to get the temperature for, in the format "City, State, Country".',
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": 'The unit to return the temperature in. Defaults to "celsius".',
},
},
"required": ["location"],
},
},
},
{
"type": "function",
"function": {
"name": "get_temperature_date",
"description": "Get temperature at a location and date.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": 'The location to get the temperature for, in the format "City, State, Country".',
},
"date": {
"type": "string",
"description": 'The date to get the temperature for, in the format "Year-Month-Day".',
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": 'The unit to return the temperature in. Defaults to "celsius".',
},
},
"required": ["location", "date"],
},
},
},
]
tool_calls_stream = client.chat.completions.create(
model=client.models.list().data[0].id,
messages=[
{
"role": "system",
"content": "You are a helpful assistant.\n\nCurrent Date: 2024-09-30",
},
{
"role": "user",
"content": "What's the temperature in San Francisco now? How about tomorrow?",
},
],
tools=tools,
tool_choice="auto",
stream=True,
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
)
chunks = []
for chunk in tool_calls_stream:
chunks.append(chunk)
if hasattr(chunk.choices[0].delta, "reasoning_content"):
reasoning_content = chunk.choices[0].delta.reasoning_content
if reasoning_content:
print(bcolors.OKBLUE + reasoning_content, end="", flush=True)
elif hasattr(chunk.choices[0].delta, "content"):
content = chunk.choices[0].delta.content
if content:
print(bcolors.OKGREEN + content, end="", flush=True)
print(bcolors.ENDC + "\n### end of reasoning content and content. ###\n")
arguments = []
tool_call_idx = -1
for chunk in chunks:
if chunk.choices[0].delta.tool_calls:
tool_call = chunk.choices[0].delta.tool_calls[0]
if tool_call.index != tool_call_idx:
if tool_call_idx >= 0:
print(f"streamed tool call arguments: {arguments[tool_call_idx]}")
tool_call_idx = chunk.choices[0].delta.tool_calls[0].index
arguments.append("")
if tool_call.id:
print(f"streamed tool call id: {tool_call.id} ")
if tool_call.function:
if tool_call.function.name:
print(f"streamed tool call name: {tool_call.function.name}")
if tool_call.function.arguments:
arguments[tool_call_idx] += tool_call.function.arguments
if len(arguments):
print(f"streamed tool call arguments: {arguments[-1]}")
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.