Skip to content

[V0][V1][Core] Add outlines integration for V1, and update V0 integration. #15975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
c0b6518
Squashed commit of the following:
unaidedelf8777 May 7, 2025
d2ea0dd
Add rollback and validate_tokens methods. Final touches.
unaidedelf8777 May 7, 2025
d428821
bump outlines version
unaidedelf8777 May 8, 2025
40f4e47
explain to pre-commit that its wrong
unaidedelf8777 May 8, 2025
729b5ed
pin newest outlines version
unaidedelf8777 May 12, 2025
4912c73
fix interface to be compliant with #16577
unaidedelf8777 May 12, 2025
db403cb
make pre-commit happy
unaidedelf8777 May 13, 2025
3f671e0
artifact from a prior merge
unaidedelf8777 May 13, 2025
b586e87
fix failing test
unaidedelf8777 May 14, 2025
8c38298
make pre-commit happy again
unaidedelf8777 May 14, 2025
615d4b0
fix test_guided_json_object
unaidedelf8777 May 17, 2025
cedbcbf
Fix test_guided_processors to be compatible with new integration
unaidedelf8777 May 17, 2025
b267170
pre-commit
unaidedelf8777 May 17, 2025
ce2c72e
fix
unaidedelf8777 May 17, 2025
7fd9777
fix tests
unaidedelf8777 May 18, 2025
1860bec
fix tool-use-tests build_regex_from_schema import path
unaidedelf8777 May 19, 2025
d7f9c1f
format and add clone to resolve conflicts
unaidedelf8777 May 24, 2025
44770f3
add fallback for outlines grammars
unaidedelf8777 May 29, 2025
d6ef242
fix spec config attr-error
unaidedelf8777 Jun 1, 2025
792a9c7
fix
unaidedelf8777 Jun 2, 2025
d4063e4
fix
unaidedelf8777 Jun 2, 2025
186eede
fix failing entrypoint test
unaidedelf8777 Jun 2, 2025
cb6e9f0
fix pre-commit
unaidedelf8777 Jun 2, 2025
c8548c4
pre-commit
unaidedelf8777 Jun 2, 2025
731cf7b
Merge branch 'main' into update-outlines-integration
unaidedelf8777 Jun 3, 2025
e0b1eea
re-run ci
unaidedelf8777 Jun 3, 2025
1587dd8
update tests
unaidedelf8777 Jun 7, 2025
75c7a46
pre-commit
unaidedelf8777 Jun 7, 2025
b07d315
fix failing test
unaidedelf8777 Jun 9, 2025
8ad4aa3
pre-commit fix
unaidedelf8777 Jun 9, 2025
8dc48d5
fix
unaidedelf8777 Jun 9, 2025
981b322
fix
unaidedelf8777 Jun 9, 2025
d4ab074
fix
unaidedelf8777 Jun 11, 2025
b9861c3
fix
unaidedelf8777 Jun 18, 2025
79bb600
pre-commit
unaidedelf8777 Jun 18, 2025
96b649b
pre-commit
unaidedelf8777 Jun 19, 2025
0254844
disable grammar and structured_tag tests for outlines
unaidedelf8777 Jun 20, 2025
f839448
fix final test
unaidedelf8777 Jun 20, 2025
f431efe
pre-commit
unaidedelf8777 Jun 20, 2025
96d2489
fix re_replacement_seq regex for mistral
unaidedelf8777 Jun 21, 2025
c0be222
regex
unaidedelf8777 Jun 21, 2025
54c6075
make replacement_seq regex catch larger.
unaidedelf8777 Jun 22, 2025
b363dda
fix mistral handling
unaidedelf8777 Jun 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer >= 0.10.11, < 0.11
llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
outlines == 0.1.11
outlines_core == 0.2.10
# required for outlines backend disk cache
diskcache == 5.6.3
lark == 1.2.2
xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64"
typing_extensions >= 4.10
Expand Down
33 changes: 20 additions & 13 deletions tests/entrypoints/llm/test_guided_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
from vllm.sampling_params import GuidedDecodingParams, SamplingParams

MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
GUIDED_DECODING_BACKENDS = [

# Separate backends which support grammars vs ones
# which only support regex based constraints in tests.
GRAMMAR_DECODING_BACKENDS = [
# (backend, disable_any_whitespace),
("outlines", False),
("lm-format-enforcer", False),
("xgrammar", True),
("guidance", True),
]

ALL_DECODING_BACKENDS = ([("outlines", False)] + GRAMMAR_DECODING_BACKENDS)


@pytest.fixture(scope="module")
def llm():
Expand All @@ -38,7 +42,7 @@ def llm():

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
disable_any_whitespace: bool):
sampling_params = SamplingParams(
Expand All @@ -48,6 +52,7 @@ def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
regex=sample_regex,
backend=guided_decoding_backend,
disable_any_whitespace=disable_any_whitespace))

outputs = llm.generate(prompts=[
f"Give an example IPv4 address with this regex: {sample_regex}"
] * 2,
Expand All @@ -68,7 +73,7 @@ def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_json_completion(sample_json_schema, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -102,7 +107,7 @@ def test_guided_json_completion(sample_json_schema, llm,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_complex_json_completion(sample_complex_json_schema, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -137,7 +142,7 @@ def test_guided_complex_json_completion(sample_complex_json_schema, llm,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_definition_json_completion(sample_definition_json_schema, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -172,7 +177,7 @@ def test_guided_definition_json_completion(sample_definition_json_schema, llm,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_enum_json_completion(sample_enum_json_schema, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -217,7 +222,7 @@ def test_guided_enum_json_completion(sample_enum_json_schema, llm,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_choice_completion(sample_guided_choice, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -247,7 +252,7 @@ def test_guided_choice_completion(sample_guided_choice, llm,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
GRAMMAR_DECODING_BACKENDS)
def test_guided_grammar(sample_sql_statements, llm,
guided_decoding_backend: str,
disable_any_whitespace: bool):
Expand Down Expand Up @@ -343,7 +348,7 @@ def test_disable_guided_decoding_fallback(sample_regex, llm):

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
GRAMMAR_DECODING_BACKENDS)
def test_guided_json_object(llm, guided_decoding_backend: str,
disable_any_whitespace: bool):
sampling_params = SamplingParams(
Expand Down Expand Up @@ -376,7 +381,9 @@ def test_guided_json_object(llm, guided_decoding_backend: str,

# Parse to verify it is valid JSON
parsed_json = json.loads(generated_text)
assert isinstance(parsed_json, dict)
# A list is not what was intended, but is still valid
# json.
assert isinstance(parsed_json, (dict, list))


class CarType(str, Enum):
Expand All @@ -394,7 +401,7 @@ class CarDescription(BaseModel):

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str,
disable_any_whitespace: bool):
json_schema = CarDescription.model_json_schema()
Expand Down Expand Up @@ -426,7 +433,7 @@ def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str,

@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
GUIDED_DECODING_BACKENDS)
ALL_DECODING_BACKENDS)
def test_guided_number_range_json_completion(llm, guided_decoding_backend: str,
disable_any_whitespace: bool):
sample_output_schema = {
Expand Down
30 changes: 9 additions & 21 deletions tests/model_executor/test_guided_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,15 @@ def test_guided_logits_processors(zephyr_7B_tokenzer, sample_regex,
whitespace_pattern=None,
reasoner=None)

token_ids = zephyr_7B_tokenzer.encode(
f"Give an example IPv4 address with this regex: {sample_regex}")
tensor = torch.rand(32000)
original_tensor = torch.clone(tensor)
regex_LP(token_ids, tensor)
tensor = regex_LP([], tensor)
assert tensor.shape == original_tensor.shape
assert not torch.allclose(tensor, original_tensor)

token_ids = zephyr_7B_tokenzer.encode(
f"Give an employee profile that fits this schema: {sample_json_schema}"
)
tensor = torch.rand(32000)
original_tensor = torch.clone(tensor)
json_LP(token_ids, tensor)
tensor = json_LP([], tensor)
assert tensor.shape == original_tensor.shape
assert not torch.allclose(tensor, original_tensor)

Expand All @@ -80,8 +75,6 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
seed=0,
dtype="bfloat16",
)
token_ids = zephyr_7B_tokenzer.encode(
f"Give an example IPv4 address with this regex: {sample_regex}")
regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)

regex_lp = get_local_guided_decoding_logits_processor(
Expand All @@ -91,21 +84,19 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
assert regex_lp is not None
tensor = torch.rand(32000)
original_tensor = torch.clone(tensor)
tensor = regex_lp(token_ids, tensor)
# allowed tokens at state 0
tensor = regex_lp([], tensor)
assert tensor.shape == original_tensor.shape
assert not torch.allclose(tensor, original_tensor)

token_ids = zephyr_7B_tokenzer.encode(
f"Give an employee profile that fits this schema: {sample_json_schema}"
)
json_request = GuidedDecodingParams(json=sample_json_schema,
backend=backend)
json_lp = await get_guided_decoding_logits_processor(
json_request, zephyr_7B_tokenzer, config)
assert json_lp is not None
tensor = torch.rand(32000)
original_tensor = torch.clone(tensor)
tensor = json_lp(token_ids, tensor)
tensor = json_lp([], tensor)
assert tensor.shape == original_tensor.shape
assert not torch.allclose(tensor, original_tensor)

Expand All @@ -129,7 +120,6 @@ async def test_guided_logits_processor_with_reasoning(
dtype="bfloat16",
)
token_ids = deepseek_r1_qwen_tokenizer.encode(
f"Give an example IPv4 address with this regex: {sample_regex}."
"<think>here is the thinking process")
regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)

Expand All @@ -140,14 +130,13 @@ async def test_guided_logits_processor_with_reasoning(
regex_request, deepseek_r1_qwen_tokenizer, config,
reasoning_backend)
assert regex_lp is not None
tensor = torch.rand(32000)
tensor = torch.rand(151664)
original_tensor = torch.clone(tensor)
tensor = regex_lp(token_ids, tensor)
assert tensor.shape == original_tensor.shape
assert torch.allclose(tensor, original_tensor)

token_ids = deepseek_r1_qwen_tokenizer.encode(
f"Give an employee profile that fits this schema: {sample_json_schema}."
"<think>here is the thinking process")
json_request = GuidedDecodingParams(json=sample_json_schema,
backend=backend)
Expand All @@ -157,16 +146,15 @@ async def test_guided_logits_processor_with_reasoning(
await get_guided_decoding_logits_processor(
json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
assert json_lp is not None
tensor = torch.rand(32000)
tensor = torch.rand(151664)
original_tensor = torch.clone(tensor)
tensor = json_lp(token_ids, tensor)
assert tensor.shape == original_tensor.shape
assert torch.allclose(tensor, original_tensor)

# Thinking is over, so the tensor should change.
token_ids = deepseek_r1_qwen_tokenizer.encode(
f"Give an employee profile that fits this schema: {sample_json_schema}."
"<think>here is the thinking process</think> Then")
"<think>here is the thinking process</think>")
json_request = GuidedDecodingParams(json=sample_json_schema,
backend=backend)
json_lp = get_local_guided_decoding_logits_processor(
Expand All @@ -175,7 +163,7 @@ async def test_guided_logits_processor_with_reasoning(
await get_guided_decoding_logits_processor(
json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
assert json_lp is not None
tensor = torch.rand(32000)
tensor = torch.rand(151664)
original_tensor = torch.clone(tensor)
tensor = json_lp(token_ids, tensor)
assert tensor.shape == original_tensor.shape
Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_tool_choice_required.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _compile_and_check(tools: list[ChatCompletionToolsParam], sample_output,
assert isinstance(schema, dict)

# use build_regex_from_schema used in JSONLogitsProcessor to create Guide
from outlines_core.fsm.json_schema import build_regex_from_schema
from outlines_core.json_schema import build_regex_from_schema
regex = build_regex_from_schema(json.dumps(schema))
compiled = re.compile(regex)
matches = compiled.fullmatch(json.dumps(sample_output)) is not None
Expand Down
Loading