Skip to content

[Bugfix][Frontend] Eliminate regex based check in reasoning full generator #14821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions tests/reasoning/test_deepseekr1_reasoning_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,40 @@ def deepseek_r1_qwen_tokenizer():
"content": "This is the rest",
"is_reasoning_end": True,
}
THINK_NO_END = {
"output": "<think>This is a reasoning section",
"reasoning_content": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
EMPTY = {
"output": "",
"reasoning_content": "",
"content": None,
"is_reasoning_end": False,
}
EMPTY_STREAMING = {
"output": "",
"reasoning_content": None,
"content": None,
"is_reasoning_end": False,
}
NEW_LINE = {
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
"reasoning_content": "This is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
# Streaming cannot handle new lines at the beginning of the output
# because we need to support <think>...</think> and </think>...
# We cannot know if the text before <think> is reasoning content
# or not.
NEW_LINE_STREAMING = {
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
"reasoning_content": "\nThis is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}

TEST_CASES = [
pytest.param(
Expand Down Expand Up @@ -182,6 +216,36 @@ def deepseek_r1_qwen_tokenizer():
SHORTEST_REASONING_WITH_THINK,
id="shortest_with_think_streaming",
),
pytest.param(
False,
THINK_NO_END,
id="think_no_end",
),
pytest.param(
True,
THINK_NO_END,
id="think_no_end_streaming",
),
pytest.param(
False,
EMPTY,
id="empty",
),
pytest.param(
True,
EMPTY_STREAMING,
id="empty_streaming",
),
pytest.param(
False,
NEW_LINE,
id="new_line",
),
pytest.param(
True,
NEW_LINE_STREAMING,
id="new_line_streaming",
),
]


Expand Down
43 changes: 25 additions & 18 deletions vllm/reasoning/deepseek_r1_reasoning_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0

import re
from collections.abc import Sequence
from typing import Optional, Union

Expand Down Expand Up @@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer)

self.reasoning_regex = re.compile(
rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)

if not self.model_tokenizer:
raise ValueError(
"The model tokenizer must be passed to the ReasoningParser "
Expand Down Expand Up @@ -143,23 +139,34 @@ def extract_reasoning_content_streaming(
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content from the model output.

For text <think>abc</think>xyz:
- 'abc' goes to reasoning_content
- 'xyz' goes to content

Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""

# Check if the start token is present in the model output, remove it
# if it is present.
model_output_parts = model_output.partition(self.start_token)
model_output = model_output_parts[2] if model_output_parts[
1] else model_output_parts[0]

# DeepSeek R1 doesn't generate <think> now.
# Thus we assume the reasoning content is always at the start.
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
if self.end_token not in model_output:
return model_output, None
else:
# Add a start token if it's missing to keep compatibility.
if self.start_token not in model_output:
model_output = f"{self.start_token}{model_output}"
# Use a regex to find the reasoning content
reasoning_content = self.reasoning_regex.findall(model_output)[0]

end_index = len(
f"{self.start_token}{reasoning_content}{self.end_token}")
final_output = model_output[end_index:]

if len(final_output) == 0:
return reasoning_content, None

return reasoning_content, final_output
reasoning_content, _, content = model_output.partition(
self.end_token)
# If the end token is not found, return the model output as is.
# It should not happen since we already checked for the presence
# of the end token.
# If generation stops right after end-of-think, return null content
final_content = content or None
return reasoning_content, final_content