Open
Description
Your current environment
...
🐛 Describe the bug
main commit 51d7c6a
Seen in #15894
=================================== FAILURES ===================================
[2025-04-01T17:38:12Z] _ test_abort[engine_args0-Hello my name is Robert and-RequestOutputKind.DELTA] _
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7fd1fa052e70>
[2025-04-01T17:38:12Z] output_kind = <RequestOutputKind.DELTA: 1>
[2025-04-01T17:38:12Z] engine_args = AsyncEngineArgs(model='meta-llama/Llama-3.2-1B-Instruct', served_model_name=None, tokenizer='meta-llama/Llama-3.2-1B-I...additional_config=None, enable_reasoning=None, reasoning_parser=None, use_tqdm_on_load=True, disable_log_requests=True)
[2025-04-01T17:38:12Z] prompt = 'Hello my name is Robert and'
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] @pytest.mark.parametrize(
[2025-04-01T17:38:12Z] "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
[2025-04-01T17:38:12Z] @pytest.mark.parametrize("engine_args,prompt",
[2025-04-01T17:38:12Z] [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
[2025-04-01T17:38:12Z] (VISION_ENGINE_ARGS, VISION_PROMPT)])
[2025-04-01T17:38:12Z] @pytest.mark.asyncio
[2025-04-01T17:38:12Z] async def test_abort(monkeypatch: pytest.MonkeyPatch,
[2025-04-01T17:38:12Z] output_kind: RequestOutputKind,
[2025-04-01T17:38:12Z] engine_args: AsyncEngineArgs, prompt: PromptType):
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] with monkeypatch.context() as m, ExitStack() as after:
[2025-04-01T17:38:12Z] m.setenv("VLLM_USE_V1", "1")
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] engine = AsyncLLM.from_engine_args(engine_args)
[2025-04-01T17:38:12Z] after.callback(engine.shutdown)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] NUM_REQUESTS = 100
[2025-04-01T17:38:12Z] NUM_EXPECTED_TOKENS = 100
[2025-04-01T17:38:12Z] NUM_EXPECTED_TOKENS_LONG = 50000
[2025-04-01T17:38:12Z] REQUEST_IDS_TO_ABORT = range(1, 100, 10)
[2025-04-01T17:38:12Z] PARALLEL_SAMPLE_REQ_IDS = range(1, 100, 15)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] request_ids = [f"request-{i}" for i in range(NUM_REQUESTS)]
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Create concurrent requests.
[2025-04-01T17:38:12Z] tasks: list[asyncio.Task] = []
[2025-04-01T17:38:12Z] for idx, request_id in enumerate(request_ids):
[2025-04-01T17:38:12Z] max_tokens = NUM_EXPECTED_TOKENS_LONG if (
[2025-04-01T17:38:12Z] idx in REQUEST_IDS_TO_ABORT) else NUM_EXPECTED_TOKENS
[2025-04-01T17:38:12Z] n = 3 if idx in PARALLEL_SAMPLE_REQ_IDS else 1
[2025-04-01T17:38:12Z] tasks.append(
[2025-04-01T17:38:12Z] asyncio.create_task(
[2025-04-01T17:38:12Z] generate(engine, request_id, prompt, output_kind,
[2025-04-01T17:38:12Z] max_tokens, n)))
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # API server cancels requests when they disconnect.
[2025-04-01T17:38:12Z] for idx in REQUEST_IDS_TO_ABORT:
[2025-04-01T17:38:12Z] tasks[idx].cancel()
[2025-04-01T17:38:12Z] await asyncio.sleep(0.1)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Confirm the other requests are okay.
[2025-04-01T17:38:12Z] for idx, task in enumerate(tasks):
[2025-04-01T17:38:12Z] # Confirm that it was actually canceled.
[2025-04-01T17:38:12Z] if idx in REQUEST_IDS_TO_ABORT:
[2025-04-01T17:38:12Z] with pytest.raises(asyncio.CancelledError):
[2025-04-01T17:38:12Z] await task
[2025-04-01T17:38:12Z] else:
[2025-04-01T17:38:12Z] # Otherwise, make sure the request was not impacted.
[2025-04-01T17:38:12Z] num_generated_tokens, request_id = await task
[2025-04-01T17:38:12Z] n = 3 if idx in PARALLEL_SAMPLE_REQ_IDS else 1
[2025-04-01T17:38:12Z] expected_tokens = NUM_EXPECTED_TOKENS * n
[2025-04-01T17:38:12Z] assert num_generated_tokens == expected_tokens, (
[2025-04-01T17:38:12Z] f"{request_id} generated {num_generated_tokens} but "
[2025-04-01T17:38:12Z] f"expected {expected_tokens}")
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Make sure all aborted requests were really aborted.
[2025-04-01T17:38:12Z] > assert not engine.output_processor.has_unfinished_requests()
[2025-04-01T17:38:12Z] E assert not True
[2025-04-01T17:38:12Z] E + where True = has_unfinished_requests()
[2025-04-01T17:38:12Z] E + where has_unfinished_requests = <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0>.has_unfinished_requests
[2025-04-01T17:38:12Z] E + where <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0> = <vllm.v1.engine.async_llm.AsyncLLM object at 0x7fd1ef132750>.output_processor
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] v1/engine/test_async_llm.py:178: AssertionError
[2025-04-01T17:38:12Z] =============================== warnings summary ===============================
[2025-04-01T17:38:12Z] tests/v1/engine/test_async_llm.py: 12 warnings
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py: 1 warning
[2025-04-01T17:38:12Z] tests/v1/engine/test_llm_engine.py: 2 warnings
[2025-04-01T17:38:12Z] /usr/lib/python3.12/multiprocessing/popen_fork.py:66: DeprecationWarning: This process (pid=1700) is multi-threaded, use of fork() may lead to deadlocks in the child.
[2025-04-01T17:38:12Z] self.pid = os.fork()
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True]
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False]
[2025-04-01T17:38:12Z] /vllm-workspace/tests/utils.py:720: DeprecationWarning: This process (pid=1700) is multi-threaded, use of fork() may lead to deadlocks in the child.
[2025-04-01T17:38:12Z] pid = os.fork()
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
[2025-04-01T17:38:12Z] =========================== short test summary info ============================
[2025-04-01T17:38:12Z] FAILED v1/engine/test_async_llm.py::test_abort[engine_args0-Hello my name is Robert and-RequestOutputKind.DELTA] - assert not True
[2025-04-01T17:38:12Z] + where True = has_unfinished_requests()
[2025-04-01T17:38:12Z] + where has_unfinished_requests = <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0>.has_unfinished_requests
[2025-04-01T17:38:12Z] + where <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0> = <vllm.v1.engine.async_llm.AsyncLLM object at 0x7fd1ef132750>.output_processor
[2025-04-01T17:38:12Z] ============ 1 failed, 44 passed, 20 warnings in 1059.59s (0:17:39) ============
[2025-04-01T17:38:14Z] 🚨 Error: The command exited with status 1
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.