Deprecate old quanzation / dequantization util functions. (#630)

caogao · facebook-github-bot · commit 1bad64bc0d18 · 2021-06-17T10:43:51.000-07:00
Summary: Pull Request resolved: #630 Deprecate FloatToFusedNBitRowwiseQuantizedSBHalf, FusedNBitRowwiseQuantizedSBHalfToFloat, FloatToFused8BitRowwiseQuantizedSBFloat, and Fused8BitRowwiseQuantizedSBFloatToFloat. Reviewed By: dskhudia Differential Revision: D29121252 fbshipit-source-id: ea7eac6c0402f2a91a0319092ed5f3fc3ff53516
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h
@@ -254,23 +254,6 @@ FBGEMM_API void Requantize(
     int thread_id = 0,
     int num_threads = 1);
 
-/**
- * Convert float inputs to rowwise quantized outputs.
- * bitrate specifies the number of bits in quantized output.
- * Scale and Bias are in fp16. Each row's Scale and Bias are stored in
- * the row itself (fused) at the end.
- *
- * @param bit_rate can be 2, 4, or 8
- * TODO(T91361248): deprecate and replace with
- * FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf.
- */
-FBGEMM_API void FloatToFusedNBitRowwiseQuantizedSBHalf(
-    int bit_rate,
-    const float* input,
-    int input_rows,
-    int input_columns,
-    std::uint8_t* output);
-
 /**
  * Convert float (fp32 or fp16) inputs to rowwise quantized outputs.
  * bitrate specifies the number of bits in quantized output.
@@ -287,22 +270,6 @@ FBGEMM_API void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(
     int input_columns,
     std::uint8_t* output);
 
-/**
- * Convert fused rowwise quantized inputs to float.
- * bitrate specifies the number of bits in quantized input.
- * Scale and Bias are in fp16. Each row's Scale and Bias are stored in
- * the row itself (fused) at the end.
- *
- * @param bit_rate can be 2, 4, or 8
- * TODO(T91361248): deprecate and replace with FusedNBitRowwiseQuantizedSBToFloatOrHalf.
- */
-FBGEMM_API void FusedNBitRowwiseQuantizedSBHalfToFloat(
-    int bit_rate,
-    const uint8_t* input,
-    int input_rows,
-    int input_columns,
-    float* output);
-
 /**
  * Convert fused rowwise quantized inputs to float (fp32 or fp16).
  * bitrate specifies the number of bits in quantized input.
@@ -319,22 +286,6 @@ FBGEMM_API void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf(
     int input_columns,
     OutputType* output);
 
-/**
- * Convert float inputs to rowwise quantized (8-bit) outputs.
- * Scale and Bias are in float. Each row's Scale and Bias are stored in
- * the row itself (fused) at the end.
- *
- * This version intentionally supports only 8-bit because we want to discourage
- * the usage of float scale and bias with 2 and 4 bit cases as that diminishes
- * the overall memory savings.
- * TODO(T91361248): deprecate and replace with FloatOrHalfToFused8BitRowwiseQuantizedSBFloat.
- */
-FBGEMM_API void FloatToFused8BitRowwiseQuantizedSBFloat(
-    const float* input,
-    int input_rows,
-    int input_columns,
-    std::uint8_t* output);
-
 /**
  * Convert float or half inputs to rowwise quantized (8-bit) outputs.
  * Scale and Bias are in float. Each row's Scale and Bias are stored in
@@ -351,21 +302,6 @@ FBGEMM_API void FloatOrHalfToFused8BitRowwiseQuantizedSBFloat(
     int input_columns,
     std::uint8_t* output);
 
-/**
- * Convert fused rowwise quantized (8-bit) inputs to float outputs.
- * Scale and Bias are in float. Each row's Scale and Bias are stored in
- * the row itself (fused) at the end.
- *
- * This version intentionally supports only 8-bit because
- * the corresponding quantize version only supports 8-bit.
- * TODO(T91361248): deprecate and replace with Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf.
- */
-FBGEMM_API void Fused8BitRowwiseQuantizedSBFloatToFloat(
-    const uint8_t* input,
-    int input_rows,
-    int input_columns,
-    float* output);
-
 /**
  * Convert fused rowwise quantized (8-bit) inputs to float or half outputs.
  * Scale and Bias are in float. Each row's Scale and Bias are stored in
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
@@ -609,16 +609,6 @@ void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(
   }
 }
 
-void FloatToFusedNBitRowwiseQuantizedSBHalf(
-    int bit_rate,
-    const float* input,
-    int input_rows,
-    int input_columns,
-    std::uint8_t* output) {
-  FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf<float>(
-      bit_rate, input, input_rows, input_columns, output);
-}
-
 template <typename InputType>
 void FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef(
     const InputType* input,
@@ -674,15 +664,6 @@ void FloatOrHalfToFused8BitRowwiseQuantizedSBFloat(
   }
 }
 
-void FloatToFused8BitRowwiseQuantizedSBFloat(
-    const float* input,
-    int input_rows,
-    int input_columns,
-    std::uint8_t* output) {
-  FloatOrHalfToFused8BitRowwiseQuantizedSBFloat<float>(
-      input, input_rows, input_columns, output);
-}
-
 template <typename OutputType>
 void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef(
     int bit_rate,
@@ -751,16 +732,6 @@ void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf(
   }
 }
 
-void FusedNBitRowwiseQuantizedSBHalfToFloat(
-    int bit_rate,
-    const uint8_t* input,
-    int input_rows,
-    int input_columns,
-    float* output) {
-  FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf<float>(
-      bit_rate, input, input_rows, input_columns, output);
-}
-
 template <typename OutputType>
 void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef(
     const std::uint8_t* input,
@@ -802,15 +773,6 @@ void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf(
   }
 }
 
-void Fused8BitRowwiseQuantizedSBFloatToFloat(
-    const uint8_t* input,
-    int input_rows,
-    int input_columns,
-    float* output) {
-  Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf<float>(
-      input, input_rows, input_columns, output);
-}
-
 #define INSTANTIATE_QuantizationFunctions(type)                                \
   template FBGEMM_API void                                                     \
   FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef<type>(                       \