@@ -254,23 +254,6 @@ FBGEMM_API void Requantize(
254
254
int thread_id = 0 ,
255
255
int num_threads = 1 );
256
256
257
- /* *
258
- * Convert float inputs to rowwise quantized outputs.
259
- * bitrate specifies the number of bits in quantized output.
260
- * Scale and Bias are in fp16. Each row's Scale and Bias are stored in
261
- * the row itself (fused) at the end.
262
- *
263
- * @param bit_rate can be 2, 4, or 8
264
- * TODO(T91361248): deprecate and replace with
265
- * FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf.
266
- */
267
- FBGEMM_API void FloatToFusedNBitRowwiseQuantizedSBHalf (
268
- int bit_rate,
269
- const float * input,
270
- int input_rows,
271
- int input_columns,
272
- std::uint8_t * output);
273
-
274
257
/* *
275
258
* Convert float (fp32 or fp16) inputs to rowwise quantized outputs.
276
259
* bitrate specifies the number of bits in quantized output.
@@ -287,22 +270,6 @@ FBGEMM_API void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(
287
270
int input_columns,
288
271
std::uint8_t * output);
289
272
290
- /* *
291
- * Convert fused rowwise quantized inputs to float.
292
- * bitrate specifies the number of bits in quantized input.
293
- * Scale and Bias are in fp16. Each row's Scale and Bias are stored in
294
- * the row itself (fused) at the end.
295
- *
296
- * @param bit_rate can be 2, 4, or 8
297
- * TODO(T91361248): deprecate and replace with FusedNBitRowwiseQuantizedSBToFloatOrHalf.
298
- */
299
- FBGEMM_API void FusedNBitRowwiseQuantizedSBHalfToFloat (
300
- int bit_rate,
301
- const uint8_t * input,
302
- int input_rows,
303
- int input_columns,
304
- float * output);
305
-
306
273
/* *
307
274
* Convert fused rowwise quantized inputs to float (fp32 or fp16).
308
275
* bitrate specifies the number of bits in quantized input.
@@ -319,22 +286,6 @@ FBGEMM_API void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf(
319
286
int input_columns,
320
287
OutputType* output);
321
288
322
- /* *
323
- * Convert float inputs to rowwise quantized (8-bit) outputs.
324
- * Scale and Bias are in float. Each row's Scale and Bias are stored in
325
- * the row itself (fused) at the end.
326
- *
327
- * This version intentionally supports only 8-bit because we want to discourage
328
- * the usage of float scale and bias with 2 and 4 bit cases as that diminishes
329
- * the overall memory savings.
330
- * TODO(T91361248): deprecate and replace with FloatOrHalfToFused8BitRowwiseQuantizedSBFloat.
331
- */
332
- FBGEMM_API void FloatToFused8BitRowwiseQuantizedSBFloat (
333
- const float * input,
334
- int input_rows,
335
- int input_columns,
336
- std::uint8_t * output);
337
-
338
289
/* *
339
290
* Convert float or half inputs to rowwise quantized (8-bit) outputs.
340
291
* Scale and Bias are in float. Each row's Scale and Bias are stored in
@@ -351,21 +302,6 @@ FBGEMM_API void FloatOrHalfToFused8BitRowwiseQuantizedSBFloat(
351
302
int input_columns,
352
303
std::uint8_t * output);
353
304
354
- /* *
355
- * Convert fused rowwise quantized (8-bit) inputs to float outputs.
356
- * Scale and Bias are in float. Each row's Scale and Bias are stored in
357
- * the row itself (fused) at the end.
358
- *
359
- * This version intentionally supports only 8-bit because
360
- * the corresponding quantize version only supports 8-bit.
361
- * TODO(T91361248): deprecate and replace with Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf.
362
- */
363
- FBGEMM_API void Fused8BitRowwiseQuantizedSBFloatToFloat (
364
- const uint8_t * input,
365
- int input_rows,
366
- int input_columns,
367
- float * output);
368
-
369
305
/* *
370
306
* Convert fused rowwise quantized (8-bit) inputs to float or half outputs.
371
307
* Scale and Bias are in float. Each row's Scale and Bias are stored in
0 commit comments