vllm-project · hmellor · May 22, 2025 · May 15, 2025 · May 20, 2025 · May 22, 2025
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -126,7 +126,7 @@ steps:
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
   - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
   - pytest -v -s entrypoints/test_chat_utils.py
   - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
 

@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
+from typing import Final
+
 import pytest
 import schemathesis
+from hypothesis import settings
 from schemathesis import GenerationConfig
 
 from ...utils import RemoteOpenAIServer
@@ -9,6 +12,8 @@
 
 MODEL_NAME = "HuggingFaceTB/SmolVLM-256M-Instruct"
 MAXIMUM_IMAGES = 2
+DEFAULT_TIMEOUT_SECONDS: Final[int] = 10
+LONG_TIMEOUT_SECONDS: Final[int] = 60
 
 
 @pytest.fixture(scope="module")
@@ -42,8 +47,58 @@ def get_schema(server):
 schema = schemathesis.from_pytest_fixture("get_schema")
 
 
+@schemathesis.hook
+def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
+    op = context.operation
+    assert op is not None
+
+    def no_file_type(case: schemathesis.models.Case):
+        """
+        This filter skips test cases for the `POST /tokenize` endpoint where the
+        HTTP request body uses `"type": "file"` in any message's content.
+        We expect these cases to fail because that type isn't implemented here
+        https://github.com/vllm-project/vllm/blob/0b34593017953051b3225b1483ce0f4670e3eb0e/vllm/entrypoints/chat_utils.py#L1038-L1095
+
+        Example test cases that are skipped:
+        curl -X POST -H 'Content-Type: application/json' \
+            -d '{"messages": [{"role": "assistant"}, {"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
+            http://localhost:8000/tokenize
+
+        curl -X POST -H 'Content-Type: application/json' \
+            -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
+            http://localhost:8000/tokenize
+        """  # noqa: E501
+        if (op.method.lower() == "post" and op.path == "/tokenize"
+                and hasattr(case, "body") and isinstance(case.body, dict)
+                and "messages" in case.body
+                and isinstance(case.body["messages"], list)
+                and len(case.body["messages"]) > 0):
+            for message in case.body["messages"]:
+                if not isinstance(message, dict):
+                    continue
+                content = message.get("content", [])
+                if not isinstance(content, list) or len(content) == 0:
+                    continue
+                if any(item.get("type") == "file" for item in content):
+                    return False
+        return True
+
+    return strategy.filter(no_file_type)
+
+
 @schema.parametrize()
 @schema.override(headers={"Content-Type": "application/json"})
+@settings(deadline=LONG_TIMEOUT_SECONDS * 1000)
 def test_openapi_stateless(case: schemathesis.Case):
+    key = (
+        case.operation.method.upper(),
+        case.operation.path,
+    )
+    timeout = {
+        # requires a longer timeout
+        ("POST", "/v1/chat/completions"):
+        LONG_TIMEOUT_SECONDS,
+    }.get(key, DEFAULT_TIMEOUT_SECONDS)
+
     #No need to verify SSL certificate for localhost
-    case.call_and_validate(verify=False)
+    case.call_and_validate(verify=False, timeout=timeout)