vllm-project · unaidedelf8777 · May 7, 2025 · May 7, 2025 · May 8, 2025 · May 8, 2025
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -21,7 +21,9 @@ prometheus-fastapi-instrumentator >= 7.0.0
 tiktoken >= 0.6.0  # Required for DBRX tokenizer
 lm-format-enforcer >= 0.10.11, < 0.11
 llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
-outlines == 0.1.11
+outlines_core == 0.2.10
+# required for outlines backend disk cache
+diskcache == 5.6.3
 lark == 1.2.2
 xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64"
 typing_extensions >= 4.10

@@ -15,14 +15,18 @@
 from vllm.sampling_params import GuidedDecodingParams, SamplingParams
 
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
-GUIDED_DECODING_BACKENDS = [
+
+# Separate backends which support grammars vs ones
+# which only support regex based constraints in tests.
+GRAMMAR_DECODING_BACKENDS = [
     # (backend, disable_any_whitespace),
-    ("outlines", False),
     ("lm-format-enforcer", False),
     ("xgrammar", True),
     ("guidance", True),
 ]
 
+ALL_DECODING_BACKENDS = ([("outlines", False)] + GRAMMAR_DECODING_BACKENDS)
+
 
 @pytest.fixture(scope="module")
 def llm():
@@ -38,7 +42,7 @@ def llm():
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
                       disable_any_whitespace: bool):
     sampling_params = SamplingParams(
@@ -48,6 +52,7 @@ def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
             regex=sample_regex,
             backend=guided_decoding_backend,
             disable_any_whitespace=disable_any_whitespace))
+
     outputs = llm.generate(prompts=[
         f"Give an example IPv4 address with this regex: {sample_regex}"
     ] * 2,
@@ -68,7 +73,7 @@ def test_guided_regex(sample_regex, llm, guided_decoding_backend: str,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_json_completion(sample_json_schema, llm,
                                 guided_decoding_backend: str,
                                 disable_any_whitespace: bool):
@@ -102,7 +107,7 @@ def test_guided_json_completion(sample_json_schema, llm,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_complex_json_completion(sample_complex_json_schema, llm,
                                         guided_decoding_backend: str,
                                         disable_any_whitespace: bool):
@@ -137,7 +142,7 @@ def test_guided_complex_json_completion(sample_complex_json_schema, llm,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_definition_json_completion(sample_definition_json_schema, llm,
                                            guided_decoding_backend: str,
                                            disable_any_whitespace: bool):
@@ -172,7 +177,7 @@ def test_guided_definition_json_completion(sample_definition_json_schema, llm,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_enum_json_completion(sample_enum_json_schema, llm,
                                      guided_decoding_backend: str,
                                      disable_any_whitespace: bool):
@@ -217,7 +222,7 @@ def test_guided_enum_json_completion(sample_enum_json_schema, llm,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_choice_completion(sample_guided_choice, llm,
                                   guided_decoding_backend: str,
                                   disable_any_whitespace: bool):
@@ -247,7 +252,7 @@ def test_guided_choice_completion(sample_guided_choice, llm,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         GRAMMAR_DECODING_BACKENDS)
 def test_guided_grammar(sample_sql_statements, llm,
                         guided_decoding_backend: str,
                         disable_any_whitespace: bool):
@@ -343,7 +348,7 @@ def test_disable_guided_decoding_fallback(sample_regex, llm):
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         GRAMMAR_DECODING_BACKENDS)
 def test_guided_json_object(llm, guided_decoding_backend: str,
                             disable_any_whitespace: bool):
     sampling_params = SamplingParams(
@@ -376,7 +381,9 @@ def test_guided_json_object(llm, guided_decoding_backend: str,
 
             # Parse to verify it is valid JSON
             parsed_json = json.loads(generated_text)
-            assert isinstance(parsed_json, dict)
+            # A list is not what was intended, but is still valid
+            # json.
+            assert isinstance(parsed_json, (dict, list))
 
 
 class CarType(str, Enum):
@@ -394,7 +401,7 @@ class CarDescription(BaseModel):
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str,
                                           disable_any_whitespace: bool):
     json_schema = CarDescription.model_json_schema()
@@ -426,7 +433,7 @@ def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str,
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace",
-                         GUIDED_DECODING_BACKENDS)
+                         ALL_DECODING_BACKENDS)
 def test_guided_number_range_json_completion(llm, guided_decoding_backend: str,
                                              disable_any_whitespace: bool):
     sample_output_schema = {

@@ -45,20 +45,15 @@ def test_guided_logits_processors(zephyr_7B_tokenzer, sample_regex,
                                   whitespace_pattern=None,
                                   reasoner=None)
 
-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
     tensor = torch.rand(32000)
     original_tensor = torch.clone(tensor)
-    regex_LP(token_ids, tensor)
+    tensor = regex_LP([], tensor)
     assert tensor.shape == original_tensor.shape
     assert not torch.allclose(tensor, original_tensor)
 
-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
     tensor = torch.rand(32000)
     original_tensor = torch.clone(tensor)
-    json_LP(token_ids, tensor)
+    tensor = json_LP([], tensor)
     assert tensor.shape == original_tensor.shape
     assert not torch.allclose(tensor, original_tensor)
 
@@ -80,8 +75,6 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
         seed=0,
         dtype="bfloat16",
     )
-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
     regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)
 
     regex_lp = get_local_guided_decoding_logits_processor(
@@ -91,21 +84,19 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
     assert regex_lp is not None
     tensor = torch.rand(32000)
     original_tensor = torch.clone(tensor)
-    tensor = regex_lp(token_ids, tensor)
+    # allowed tokens at state 0
+    tensor = regex_lp([], tensor)
     assert tensor.shape == original_tensor.shape
     assert not torch.allclose(tensor, original_tensor)
 
-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
     json_request = GuidedDecodingParams(json=sample_json_schema,
                                         backend=backend)
     json_lp = await get_guided_decoding_logits_processor(
         json_request, zephyr_7B_tokenzer, config)
     assert json_lp is not None
     tensor = torch.rand(32000)
     original_tensor = torch.clone(tensor)
-    tensor = json_lp(token_ids, tensor)
+    tensor = json_lp([], tensor)
     assert tensor.shape == original_tensor.shape
     assert not torch.allclose(tensor, original_tensor)
 
@@ -129,7 +120,6 @@ async def test_guided_logits_processor_with_reasoning(
         dtype="bfloat16",
     )
     token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}."
         "<think>here is the thinking process")
     regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)
 
@@ -140,14 +130,13 @@ async def test_guided_logits_processor_with_reasoning(
                     regex_request, deepseek_r1_qwen_tokenizer, config,
                     reasoning_backend)
     assert regex_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
     original_tensor = torch.clone(tensor)
     tensor = regex_lp(token_ids, tensor)
     assert tensor.shape == original_tensor.shape
     assert torch.allclose(tensor, original_tensor)
 
     token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}."
         "<think>here is the thinking process")
     json_request = GuidedDecodingParams(json=sample_json_schema,
                                         backend=backend)
@@ -157,16 +146,15 @@ async def test_guided_logits_processor_with_reasoning(
         await get_guided_decoding_logits_processor(
             json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
     assert json_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
     original_tensor = torch.clone(tensor)
     tensor = json_lp(token_ids, tensor)
     assert tensor.shape == original_tensor.shape
     assert torch.allclose(tensor, original_tensor)
 
     # Thinking is over, so the tensor should change.
     token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}."
-        "<think>here is the thinking process</think> Then")
+        "<think>here is the thinking process</think>")
     json_request = GuidedDecodingParams(json=sample_json_schema,
                                         backend=backend)
     json_lp = get_local_guided_decoding_logits_processor(
@@ -175,7 +163,7 @@ async def test_guided_logits_processor_with_reasoning(
         await get_guided_decoding_logits_processor(
             json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
     assert json_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
     original_tensor = torch.clone(tensor)
     tensor = json_lp(token_ids, tensor)
     assert tensor.shape == original_tensor.shape

diff --git a/tests/tool_use/test_tool_choice_required.py b/tests/tool_use/test_tool_choice_required.py
@@ -71,7 +71,7 @@ def _compile_and_check(tools: list[ChatCompletionToolsParam], sample_output,
     assert isinstance(schema, dict)
 
     # use build_regex_from_schema used in JSONLogitsProcessor to create Guide
-    from outlines_core.fsm.json_schema import build_regex_from_schema
+    from outlines_core.json_schema import build_regex_from_schema
     regex = build_regex_from_schema(json.dumps(schema))
     compiled = re.compile(regex)
     matches = compiled.fullmatch(json.dumps(sample_output)) is not None