[BugFix][DeepSeek-V3.2] Fix backend selection logic for Blackwell (#30195)

LucasWilkinson · web-flow · commit 0044c4038c57 · 2025-12-07T10:53:51.000-05:00
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -182,8 +182,8 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
 
             if vllm_config.attention_config.backend is None:
                 # Default case
-                if cls.is_device_capability(100):
-                    # Blackwell => Force CutlassMLA.
+                if cls.is_device_capability(100) and not use_sparse:
+                    # Blackwell => Force CutlassMLA (unless sparse, i.e. DSv3.2).
                     use_cutlass_mla = True
                     # Set the backend in AttentionConfig so it's used during
                     # backend selection