Fix triton compilation issue (vllm-project#3984)

Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
vikhyat · Apr 12, 2024 · d04973a · d04973a
1 parent fbb9d9e
commit d04973a
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
@@ -415,7 +415,11 @@ def attn_fwd(
             return
 
     is_mqa = hq != hk
-    off_h_k = off_h_q % hk if is_mqa else off_h_q
+    if is_mqa:  # noqa: SIM108
+        off_h_k = off_h_q % hk
+    else:
+        off_h_k = off_h_q
+
     n_extra_tokens = 0
     if seqlen_k < BLOCK_N:
         n_extra_tokens = BLOCK_N - seqlen_k