Skip to content

Commit 2a03f93

Browse files
[Attention] Register FLASHMLA_SPARSE (#26441)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
1 parent da36461 commit 2a03f93

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

vllm/attention/backends/registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class _Backend(enum.Enum):
2121
TRITON_MLA = enum.auto()
2222
CUTLASS_MLA = enum.auto()
2323
FLASHMLA = enum.auto()
24+
FLASHMLA_SPARSE = enum.auto()
2425
FLASH_ATTN_MLA = enum.auto()
2526
PALLAS = enum.auto()
2627
IPEX = enum.auto()
@@ -43,6 +44,7 @@ class _Backend(enum.Enum):
4344
_Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend", # noqa: E501
4445
_Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend", # noqa: E501
4546
_Backend.FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend", # noqa: E501
47+
_Backend.FLASHMLA_SPARSE: "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend", # noqa: E501
4648
_Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend", # noqa: E501
4749
_Backend.PALLAS: "vllm.v1.attention.backends.pallas.PallasAttentionBackend", # noqa: E501
4850
_Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend", # noqa: E501

vllm/v1/attention/backends/mla/flashmla_sparse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
5555

5656
@staticmethod
5757
def get_name() -> str:
58-
return "FLASHMLA_SPARSE_VLLM_V1"
58+
return "FLASHMLA_SPARSE"
5959

6060
@staticmethod
6161
def get_metadata_cls() -> type[AttentionMetadata]:

0 commit comments

Comments
 (0)