We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 722c2a8 commit 98f93dbCopy full SHA for 98f93db
examples/flash_decoding/example_gqa_decode.py
@@ -169,7 +169,7 @@ def flash_attn_split(
169
T.fill(scores_max, -T.infinity(accum_dtype))
170
171
loop_range = T.ceildiv((seqlen_kv // num_split), block_N)
172
- T.fill(K_shared, 0)
+
173
for k in T.Pipelined(loop_range, num_stages=num_stages):
174
T.copy(
175
K[bid, (seqlen_kv // num_split) * sid +
0 commit comments