feat: add JIT compilation support for FA3 templates (#672)

Follow up work of #667
flashinfer-ai · Dec 17, 2024 · d4e8d79 · d4e8d79
1 parent d2ebd1e
commit d4e8d79
Show file tree

Hide file tree

Showing 6 changed files with 610 additions and 23 deletions.
diff --git a/flashinfer/jit/attention.py b/flashinfer/jit/attention.py
@@ -28,7 +28,7 @@
     batch_prefill_sm90_templ,
 )
 from .batch_prefill_templ import batch_prefill_suffix, batch_prefill_templ
-from .core import load_cuda_ops
+from .core import load_cuda_ops, sm90a_nvcc_flags
 from .env import FLASHINFER_GEN_SRC_DIR
 from .single_decode_templ import (
     customizable_single_decode_templ,
@@ -333,7 +333,7 @@ def gen_single_prefill_sm90_module(*args):
         source_paths.append(path)
         write_if_different(path, source)
 
-    return load_cuda_ops(uri, source_paths)
+    return load_cuda_ops(uri, source_paths, extra_cuda_cflags=sm90a_nvcc_flags)
 
 
 def get_batch_prefill_sources(
@@ -445,7 +445,7 @@ def gen_batch_prefill_sm90_module(*args):
         source_paths.append(path)
         write_if_different(path, source)
 
-    return load_cuda_ops(uri, source_paths)
+    return load_cuda_ops(uri, source_paths, extra_cuda_cflags=sm90a_nvcc_flags)
 
 
 def get_customize_single_decode_sources(