Remove FORCE_OLD_CUTLASS_MLA option

MatthewBonanni · MatthewBonanni · commit fb8446493761 · 2025-09-16T12:40:39.000-04:00
Signed-off-by: Matthew Bonanni &lt;mbonanni001@gmail.com&gt;
diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py
@@ -109,12 +109,6 @@ def __init__(
                                       "are not implemented for "
                                       "CutlassMLAImpl")
 
-        self._use_old_cutlass_mla = False
-        force_old_cutlass = os.environ.get("FORCE_OLD_CUTLASS_MLA", None)
-        if force_old_cutlass:
-            logger.warning_once("Forcing old cutlass mla kernel")
-            self._use_old_cutlass_mla = True
-
         # TODO: Currently, num_kv_splits is limited to 16 to avoid hanging
         #       issues. In case the code hangs, use:
         #       FORCE_NUM_KV_SPLITS=1