We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents c2b4a1b + fae4f82 commit b36d574Copy full SHA for b36d574
vllm/attention/ops/triton_flash_attention.py
@@ -415,7 +415,11 @@ def attn_fwd(
415
return
416
417
is_mqa = hq != hk
418
- off_h_k = off_h_q % hk if is_mqa else off_h_q
+ if is_mqa: # noqa: SIM108
419
+ off_h_k = off_h_q % hk
420
+ else:
421
+ off_h_k = off_h_q
422
+
423
n_extra_tokens = 0
424
if seqlen_k < BLOCK_N:
425
n_extra_tokens = BLOCK_N - seqlen_k
0 commit comments