Reduce memory usage for fp8 scaled op. (#10531)

comfyanonymous · web-flow · commit 1a58087ac2eb · 2025-10-29T15:43:51.000-04:00
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
@@ -358,7 +358,7 @@ def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn):
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
         lp_amax = torch.finfo(dtype).max
-        tensor_scaled = tensor.float() / scale
+        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
         torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
         qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)