Closed
Description
Your current environment
The output of `python collect_env.py`
Your output of `python collect_env.py` here
- L20 x 3, 72 GPUs
- TP=8, PP=3
- vLLM commit: 811a46bf06f872c28147f957b3a9d18d97d1c1ad
- DeepSeek-R1
🐛 Describe the bug
[2025-02-27 21:37:38,998] [ERROR] [MainThread] [asyncio] >>> Exception in callback functools.partial(<function _log_task_completion at 0x148b0b8bbbe0>, error_callback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm
_engine.AsyncLLMEngine object at 0x148b005c3400>>)
handle: <Handle functools.partial(<function _log_task_completion at 0x148b0b8bbbe0>, error_callback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x148b005c3400>>)>
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 58, in _log_task_completion
return_value = task.result()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 825, in run_engine_loop
result = task.result()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 748, in engine_step
request_outputs = await self.engine.step_async(virtual_engine)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 353, in step_async
outputs = await self.model_executor.execute_model_async(
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/ray_distributed_executor.py", line 591, in execute_model_async
return await super().execute_model_async(execute_model_req)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 354, in execute_model_async
return await self._driver_execute_model_async(execute_model_req)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/ray_distributed_executor.py", line 633, in _driver_execute_model_async
results = await asyncio.gather(*tasks)
File "/usr/local/lib/python3.10/dist-packages/vllm/utils.py", line 1354, in _run_task_with_lock
return await task(*args, **kwargs)
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py", line 594, in execute_method
raise e
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py", line 585, in execute_method
return run_method(self, method, args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/utils.py", line 2238, in run_method
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py", line 420, in execute_model
output = self.model_runner.execute_model(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 1727, in execute_model
hidden_or_intermediate_states = model_executable(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/deepseek_v2.py", line 660, in forward
hidden_states = self.model(input_ids, positions, intermediate_tensors,
File "/usr/local/lib/python3.10/dist-packages/vllm/compilation/decorators.py", line 172, in __call__
return self.forward(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/deepseek_v2.py", line 617, in forward
hidden_states, residual = layer(positions, hidden_states, residual)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/deepseek_v2.py", line 539, in forward
hidden_states = self.self_attn(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/deepseek_v2.py", line 461, in forward
return self.mla_attn(hidden_states_or_q_c, kv_c_normed, k_pe)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/attention/layer.py", line 197, in forward
return torch.ops.vllm.unified_attention(
File "/usr/local/lib/python3.10/dist-packages/torch/_ops.py", line 1116, in __call__
return self._op(*args, **(kwargs or {}))
File "/usr/local/lib/python3.10/dist-packages/vllm/attention/layer.py", line 303, in unified_attention
return self.impl.forward(self, query, key, value, kv_cache, attn_metadata)
File "/usr/local/lib/python3.10/dist-packages/vllm/attention/backends/mla/common.py", line 1513, in forward
output[:num_prefill_tokens] =***
File "/usr/local/lib/python3.10/dist-packages/vllm/attention/backends/mla/common.py", line 1407, in _forward_prefill
context_output, context_lse = self._compute_prefill_context( \
File "/usr/local/lib/python3.10/dist-packages/vllm/attention/backends/mla/common.py", line 1270, in _compute_prefill_context
assert prefill_metadata.context_chunk_seq_tot is not None
AssertionError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "uvloop/cbhandles.pyx", line 63, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 70, in _log_task_completion
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Task finished unexpectedly. This should never happen! Please open an issue on Github. See stack trace above for the actual cause
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.