Set warp size to 32

hyoon1 · hyoon1 · commit 6e6439b78df2 · 2025-05-07T17:24:50.000-04:00
Signed-off-by: Hosang Yoon &lt;hosang.yoon@amd.com&gt;
diff --git a/csrc/attention/attention_kernels.cuh b/csrc/attention/attention_kernels.cuh
@@ -36,7 +36,7 @@ typedef __hip_bfloat16 __nv_bfloat16;
 #ifndef USE_ROCM
   #define WARP_SIZE 32
 #else
-  #define WARP_SIZE warpSize
+  #define WARP_SIZE 32
 #endif
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
diff --git a/csrc/attention/paged_attention_v1.cu b/csrc/attention/paged_attention_v1.cu
@@ -22,7 +22,7 @@
 #ifndef USE_ROCM
   #define WARP_SIZE 32
 #else
-  #define WARP_SIZE warpSize
+  #define WARP_SIZE 32
 #endif
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -212,4 +212,4 @@ void paged_attention_v1(
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
-#undef DIVIDE_ROUND_UP
+#undef DIVIDE_ROUND_UP
diff --git a/csrc/attention/paged_attention_v2.cu b/csrc/attention/paged_attention_v2.cu
@@ -22,7 +22,7 @@
 #ifndef USE_ROCM
   #define WARP_SIZE 32
 #else
-  #define WARP_SIZE warpSize
+  #define WARP_SIZE 32
 #endif
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -222,4 +222,4 @@ void paged_attention_v2(
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
-#undef DIVIDE_ROUND_UP
+#undef DIVIDE_ROUND_UP
diff --git a/csrc/cuda_compat.h b/csrc/cuda_compat.h
@@ -7,7 +7,7 @@
 #ifndef USE_ROCM
   #define WARP_SIZE 32
 #else
-  #define WARP_SIZE warpSize
+  #define WARP_SIZE 32
 #endif
 
 #ifndef USE_ROCM