We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b952f4d commit 0044c40Copy full SHA for 0044c40
vllm/platforms/cuda.py
@@ -182,8 +182,8 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
182
183
if vllm_config.attention_config.backend is None:
184
# Default case
185
- if cls.is_device_capability(100):
186
- # Blackwell => Force CutlassMLA.
+ if cls.is_device_capability(100) and not use_sparse:
+ # Blackwell => Force CutlassMLA (unless sparse, i.e. DSv3.2).
187
use_cutlass_mla = True
188
# Set the backend in AttentionConfig so it's used during
189
# backend selection
0 commit comments