fix AiterFlashAttentionImpl init (#20103)

yuguo68 · facebook-github-bot · commit fa88b1415423 · 2025-06-25T17:13:54.000-07:00
Summary: Signed-off-by: Yu Guo <yuguo@meta.com> get error ```TypeError: AiterFlashAttentionImpl.__init__() got multiple values for argument 'use_irope'``` for llama4, AiterFlashAttentionImpl.__init__() is missing the `kv_sharing_target_layer_name` arg, https://github.com/vllm-project/vllm/blob/296ce95d8e72f4c6680bda539058f48dbe0f340a/vllm/attention/layer.py#L54 Test Plan: launch a llama4 server with this fix Rollback Plan: Differential Revision: D77340637
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -387,11 +387,15 @@ def __init__(
         blocksparse_params: Optional[dict[str, Any]] = None,
         logits_soft_cap: Optional[float] = None,
         attn_type: AttentionType = AttentionType.DECODER,
+        kv_sharing_target_layer_name: Optional[str] = None,
         use_irope: bool = False,
     ) -> None:
         if blocksparse_params is not None:
             raise ValueError(
                 "AiterFlashAttention does not support block-sparse attention.")
+        if kv_sharing_target_layer_name is not None:
+            raise NotImplementedError(
+                "KV sharing is not supported in AiterFlashAttention.")
         self.num_heads = num_heads
         self.head_size = head_size
         self.scale = float(scale)