Adapt to new npu flash_attention api (#9735)

PaddlePaddle · Jan 6, 2025 · 1f07325 · 1f07325
1 parent a83e8fa
commit 1f07325
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/paddlenlp/transformers/llama/fusion_ops.py b/paddlenlp/transformers/llama/fusion_ops.py
@@ -206,11 +206,14 @@ def fusion_flash_attention(
                 value_states,
                 None,
                 attention_mask,
+                [],
+                [],
                 0.0,
                 attention_mask is None,
                 True,
                 False,
                 npu_is_casual,
+                False,
             )[0]
         elif get_env_device() == "gcu":
             if config.context_parallel_degree > 1: