Enable specifying output dtype for fp8 quantized communication

Armand Sauzay · facebook-github-bot · commit f1f413b17584 · 2025-11-20T10:16:54.000-08:00
Summary: X-link: pytorch/FBGEMM#5154 X-link: facebookresearch/FBGEMM#2154 Adding fp8_output_dtype parameter to the qcomms config allowing fp8 to dequantize in different float formats as opposed to only FP32 Reviewed By: spcyppt Differential Revision: D86890315
diff --git a/torchrec/distributed/fbgemm_qcomm_codec.py b/torchrec/distributed/fbgemm_qcomm_codec.py
@@ -69,6 +69,7 @@ class QCommsConfig:
     fp8_quantize_dim: Optional[int] = None
     fp8_quantize_dim_bwd: Optional[int] = None
     fp8_bwd_uses_143: Optional[bool] = False
+    fp8_output_dtype: Optional[SparseType] = None
     mx4_quantize_dim: Optional[int] = None
     mx4_quantize_dim_bwd: Optional[int] = None
     mx4_rounding_mode: Optional[RoundingMode] = None
@@ -137,6 +138,7 @@ def get_qcomm_codecs(qcomms_config: Optional[QCommsConfig]) -> QuantizedCommCode
                 is_fwd=True,
                 row_dim=row_dim,
                 rounding_mode=rounding_mode,
+                fp8_output_dtype=qcomms_config.fp8_output_dtype,
             ),
         )
         row_dim_bwd = None
@@ -157,6 +159,7 @@ def get_qcomm_codecs(qcomms_config: Optional[QCommsConfig]) -> QuantizedCommCode
                 # if fp8_bwd_uses_143 is False/None, bwd will use 1-5-2
                 row_dim=row_dim_bwd,
                 rounding_mode=rounding_mode,
+                fp8_output_dtype=qcomms_config.fp8_output_dtype,
             ),
         )
     return codecs