We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f0f78ed commit ea19f20Copy full SHA for ea19f20
lmdeploy/pytorch/kernels/ascend/paged_attention_fwd.py
@@ -33,7 +33,7 @@ def flash_context_attention(
33
q_seq_len[i:i + 1],
34
num_q_heads,
35
num_kv_heads,
36
- context.attention_mask[i:i + 1],
+ attn_mask=context.attention_mask[i:i + 1],
37
attn_output=attn_output,
38
)
39
else:
@@ -51,7 +51,7 @@ def flash_context_attention(
51
kv_seq_len[i:i + 1],
52
53
54
55
56
57
0 commit comments