Make OSS fbgemm compatible with CUDA 12 (#1603)

jianyuh · facebook-github-bot · commit ea9b159b9449 · 2023-02-21T12:47:42.000-08:00
Summary: Pull Request resolved: #1603 There are some error messages on CUB namespace when building fbgemm with CUDA 12: ``` /usr/local/cuda-12.0/include/cub/agent/../util_math.cuh(103): error: namespace "fbgemm_gpu::cub" has no member "min" /usr/local/cuda-12.0/include/cub/agent/../util_math.cuh(104): error: namespace "fbgemm_gpu::cub" has no member "max" /usr/local/cuda-12.0/include/cub/agent/agent_radix_sort_onesweep.cuh(76): error: RegBoundScaling is not a template /usr/local/cuda-12.0/include/cub/agent/agent_radix_sort_upsweep.cuh(60): error: RegBoundScaling is not a template /usr/local/cuda-12.0/include/cub/agent/single_pass_scan_operators.cuh(325): error: namespace "fbgemm_gpu::cub" has no member "Debug" /usr/local/cuda-12.0/include/cub/agent/single_pass_scan_operators.cuh(354): error: namespace "fbgemm_gpu::cub" has no member "Debug" ``` This Diff fixed the issue. Reviewed By: q10 Differential Revision: D43423185 fbshipit-source-id: 9545835a9847aabeec4bc2a8dda8511771910fb6
diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt
@@ -157,6 +157,7 @@ set(codegen_dependencies
     ${CMAKE_CODEGEN_DIR}/lookup_args.py
     ${CMAKE_CODEGEN_DIR}/split_embedding_codegen_lookup_invoker.template
     ${CMAKE_CURRENT_SOURCE_DIR}/include/fbgemm_gpu/cpu_utils.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/fbgemm_gpu/cub_namespace_prefix.cuh
     ${CMAKE_CURRENT_SOURCE_DIR}/include/fbgemm_gpu/cub_namespace_postfix.cuh
     ${CMAKE_CURRENT_SOURCE_DIR}/include/fbgemm_gpu/dispatch_macros.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/fbgemm_gpu/embedding_backward_template_helpers.cuh
@@ -409,9 +410,10 @@ else()
   set_property(TARGET fbgemm_gpu_py PROPERTY CUDA_ARCHITECTURES
                                              "${cuda_architectures}")
 
-  if(NOT FBGEMM_CPU_ONLY)
-    target_compile_definitions(fbgemm_gpu_py PRIVATE FBGEMM_CUB_USE_NAMESPACE)
-  endif()
+  # FBGEMM_CUB_USE_NAMESPACE will cause compilation errors on CUB for CUDA 12+
+  # if(NOT FBGEMM_CPU_ONLY)
+  #   target_compile_definitions(fbgemm_gpu_py PRIVATE FBGEMM_CUB_USE_NAMESPACE)
+  # endif()
 endif()
 
 set_target_properties(fbgemm_gpu_py PROPERTIES PREFIX "")
diff --git a/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.cuh b/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.cuh
@@ -16,6 +16,6 @@
 
 // clang-format off
 #include "./cub_namespace_prefix.cuh"
-#include "cub/block/block_reduce.cuh"
+#include <cub/block/block_reduce.cuh>
 #include "./cub_namespace_postfix.cuh"
 // clang-format on
diff --git a/fbgemm_gpu/src/jagged_tensor_ops.cu b/fbgemm_gpu/src/jagged_tensor_ops.cu
@@ -15,7 +15,7 @@
 
 // clang-format off
 #include "fbgemm_gpu/cub_namespace_prefix.cuh"
-#include "cub/device/device_scan.cuh"
+#include <cub/device/device_scan.cuh>
 #include "fbgemm_gpu/cub_namespace_postfix.cuh"
 // clang-format on
 
diff --git a/fbgemm_gpu/src/layout_transform_ops.cu b/fbgemm_gpu/src/layout_transform_ops.cu
@@ -7,7 +7,7 @@
 
 // clang-format off
 #include "fbgemm_gpu/cub_namespace_prefix.cuh"
-#include "cub/device/device_scan.cuh"
+#include <cub/device/device_scan.cuh>
 #include "fbgemm_gpu/cub_namespace_postfix.cuh"
 // clang-format on
 
diff --git a/fbgemm_gpu/src/sparse_ops.cu b/fbgemm_gpu/src/sparse_ops.cu
@@ -18,7 +18,7 @@
 
 // clang-format off
 #include "fbgemm_gpu/cub_namespace_prefix.cuh"
-#include "cub/device/device_scan.cuh"
+#include <cub/device/device_scan.cuh>
 #include "fbgemm_gpu/cub_namespace_postfix.cuh"
 // clang-format on
 
diff --git a/fbgemm_gpu/src/split_embeddings_cache_cuda.cu b/fbgemm_gpu/src/split_embeddings_cache_cuda.cu
@@ -7,9 +7,9 @@
 
 // clang-format off
 #include "fbgemm_gpu/cub_namespace_prefix.cuh"
-#include "cub/device/device_radix_sort.cuh"
-#include "cub/device/device_run_length_encode.cuh"
-#include "cub/device/device_select.cuh"
+#include <cub/device/device_radix_sort.cuh>
+#include <cub/device/device_run_length_encode.cuh>
+#include <cub/device/device_select.cuh>
 #include "fbgemm_gpu/cub_namespace_postfix.cuh"
 // clang-format on