pytorch
diff --git a/‎fbgemm_gpu/codegen/training/forward/embedding_forward_split_meta_template.cpp
+1-1 b/‎fbgemm_gpu/codegen/training/forward/embedding_forward_split_meta_template.cpp
+1-1
diff --git a/‎fbgemm_gpu/include/fbgemm_gpu/utils/cuda_prelude.cuh
+1-1 b/‎fbgemm_gpu/include/fbgemm_gpu/utils/cuda_prelude.cuh
+1-1
@@ -33,7 +33,7 @@
 using namespace fbgemm_gpu;
 using Tensor = at::Tensor;
 
-[[maybe_unused]] static constexpr float kINT8QparamsBytes = 8;
+[[maybe_unused]] static constexpr int32_t kINT8QparamsBytes = 8;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel Definitions
 
@@ -85,7 +85,7 @@ static constexpr float kQParamEps = 1e-8f;
 will be stored at the end of each row in FP32 formats, appending a total of
 8 bytes to each row.
 */
-static constexpr float kINT8QparamsBytes = 8;
+static constexpr int32_t kINT8QparamsBytes = 8;
 
 template <typename T>
 DEVICE_INLINE T shfl_xor(