Skip to content

[Bugfix]: Fix the incompatibility issue with tool_choice 'required' when Thinking is enabled #19075

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ...utils import RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
MODEL_NAME = "Qwen/Qwen3-0.6B"


@pytest.fixture(scope="module")
Expand Down
18 changes: 15 additions & 3 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,10 +319,13 @@ def _filter_delta_text(delta_text: str,
def extract_tool_call_required_streaming(
self,
previous_text: str,
current_text: str,
current_text: Optional[str],
delta_text: str,
function_name_returned: bool,
) -> tuple[Optional[DeltaMessage], bool]:
if current_text is None or current_text == "":
# if the current text is empty, we cannot parse it
return None, function_name_returned
try:
obj = partial_json_parser.loads(current_text)
except partial_json_parser.core.exceptions.MalformedJSON:
Expand Down Expand Up @@ -649,10 +652,18 @@ async def chat_completion_stream_generator(
current_text = previous_text + delta_text
fn_name_returned = function_name_returned[i]

if self.reasoning_parser:
_, content = \
reasoning_parser.extract_reasoning_content(
current_text,
request
)
else:
content = current_text
delta_message, function_name_returned[i] = (
self.extract_tool_call_required_streaming(
previous_text=previous_text,
current_text=current_text,
current_text=content,
delta_text=delta_text,
function_name_returned=fn_name_returned))

Expand Down Expand Up @@ -980,8 +991,9 @@ async def chat_completion_full_generator(

# the fields of FunctionDefinition are a superset of the
# tool call outputs and can be used for parsing
assert content is not None
tool_calls = TypeAdapter(
list[FunctionDefinition]).validate_json(output.text)
list[FunctionDefinition]).validate_json(content)
message = ChatMessage(
role=role,
content="",
Expand Down