File tree Expand file tree Collapse file tree 2 files changed +3
-1
lines changed
v1/attention/backends/mla Expand file tree Collapse file tree 2 files changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ class _Backend(enum.Enum):
2121 TRITON_MLA = enum .auto ()
2222 CUTLASS_MLA = enum .auto ()
2323 FLASHMLA = enum .auto ()
24+ FLASHMLA_SPARSE = enum .auto ()
2425 FLASH_ATTN_MLA = enum .auto ()
2526 PALLAS = enum .auto ()
2627 IPEX = enum .auto ()
@@ -43,6 +44,7 @@ class _Backend(enum.Enum):
4344 _Backend .TRITON_MLA : "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend" , # noqa: E501
4445 _Backend .CUTLASS_MLA : "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend" , # noqa: E501
4546 _Backend .FLASHMLA : "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend" , # noqa: E501
47+ _Backend .FLASHMLA_SPARSE : "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend" , # noqa: E501
4648 _Backend .FLASH_ATTN_MLA : "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend" , # noqa: E501
4749 _Backend .PALLAS : "vllm.v1.attention.backends.pallas.PallasAttentionBackend" , # noqa: E501
4850 _Backend .FLEX_ATTENTION : "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend" , # noqa: E501
Original file line number Diff line number Diff line change @@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
5555
5656 @staticmethod
5757 def get_name () -> str :
58- return "FLASHMLA_SPARSE_VLLM_V1 "
58+ return "FLASHMLA_SPARSE "
5959
6060 @staticmethod
6161 def get_metadata_cls () -> type [AttentionMetadata ]:
You can’t perform that action at this time.
0 commit comments