We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c79cdc4 commit c745540Copy full SHA for c745540
vllm/v1/worker/gpu_model_runner.py
@@ -3047,8 +3047,10 @@ def _dummy_run(
3047
BatchDescriptor(num_tokens=num_tokens,
3048
uniform_decode=uniform_decode))
3049
if cudagraph_runtime_mode is not None:
3050
- # sanity check
3051
- assert cudagraph_runtime_mode == _cg_mode, (
+ # we allow forcing NONE when the dispatcher disagrees to support
+ # warm ups for cudagraph capture
3052
+ assert cudagraph_runtime_mode == CUDAGraphMode.NONE or \
3053
+ cudagraph_runtime_mode == _cg_mode, (
3054
f"Cudagraph runtime mode mismatch at dummy_run. "
3055
f"Expected {_cg_mode}, but got {cudagraph_runtime_mode}.")
3056
else:
0 commit comments