use torch.float8_e8m0fnu in mx_formats

vkuzo · vkuzo · commit 20c77e2836a5 · 2025-03-13T10:28:04.000-07:00
Summary: Switches our MX code to use the new `torch.float8_e8m0fnu` dtype directly where appropriate. This will allow for easier numerical debugging of scales, as we can easily see the numerical values when they are printed. Test Plan: ``` pytest test/prototype/mx_formats/ -s -x ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 1c0c2ba ghstack-comment-id: 2721705057 Pull Request resolved: #1882
diff --git a/test/prototype/mx_formats/test_mx_linear.py b/test/prototype/mx_formats/test_mx_linear.py
@@ -25,14 +25,14 @@
 )
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_5,
+    TORCH_VERSION_AT_LEAST_2_7,
     is_sm_at_least_89,
     is_sm_at_least_100,
 )
 
 torch.manual_seed(2)
 
-if not TORCH_VERSION_AT_LEAST_2_5:
+if not TORCH_VERSION_AT_LEAST_2_7:
     pytest.skip("Unsupported PyTorch version", allow_module_level=True)
 
 
diff --git a/test/prototype/mx_formats/test_mx_mm.py b/test/prototype/mx_formats/test_mx_mm.py
@@ -5,9 +5,9 @@
 from torchao.ops import mx_fp4_bf16, mx_fp8_bf16
 from torchao.prototype.mx_formats.mx_tensor import DTYPE_FP4, MXTensor
 from torchao.prototype.mx_formats.utils import to_blocked
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_4, is_sm_at_least_100
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_7, is_sm_at_least_100
 
-if not TORCH_VERSION_AT_LEAST_2_4:
+if not TORCH_VERSION_AT_LEAST_2_7:
     pytest.skip("Unsupported PyTorch version", allow_module_level=True)
 
 
diff --git a/test/prototype/mx_formats/test_mx_tensor.py b/test/prototype/mx_formats/test_mx_tensor.py
@@ -18,21 +18,20 @@
 )
 from torchao.prototype.mx_formats.custom_cast import pack_uint4, pack_uint6
 from torchao.prototype.mx_formats.mx_tensor import (
-    E8M0_EXPONENT_NAN_VAL,
     MXTensor,
     ScaleCalculationMode,
     to_dtype,
 )
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
-    TORCH_VERSION_AT_LEAST_2_4,
+    TORCH_VERSION_AT_LEAST_2_7,
     is_sm_at_least_89,
     is_sm_at_least_100,
 )
 
 torch.manual_seed(2)
 
-if not TORCH_VERSION_AT_LEAST_2_4:
+if not TORCH_VERSION_AT_LEAST_2_7:
     pytest.skip("Unsupported PyTorch version", allow_module_level=True)
 
 
@@ -118,8 +117,8 @@ def test_exponent_nan_in(elem_dtype):
     )
     block_size = 4
     tensor_mx = MXTensor.to_mx(tensor_hp, elem_dtype, block_size)
-    assert torch.all(tensor_mx._scale_e8m0[0] == E8M0_EXPONENT_NAN_VAL)
-    assert not torch.any(tensor_mx._scale_e8m0[1:] == E8M0_EXPONENT_NAN_VAL)
+    assert torch.all(torch.isnan(tensor_mx._scale_e8m0[0]))
+    assert not torch.any(torch.isnan(tensor_mx._scale_e8m0[1:]))
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -129,8 +128,11 @@ def test_exponent_nan_out(elem_dtype, pack_fp6):
     """
     If block exponent value is NaN, the MX tensor block value is NaN
     """
-    scale_e8m0_bits = torch.tensor(
-        [E8M0_EXPONENT_NAN_VAL, 23], dtype=torch.uint8, device="cuda"
+    if pack_fp6 and elem_dtype not in (DTYPE_FP6_E2M3, DTYPE_FP6_E3M2):
+        pytest.skip("invalid configuration")
+
+    scale_e8m0 = torch.tensor(
+        [float("nan"), 1.0], dtype=torch.float8_e8m0fnu, device="cuda"
     )
 
     block_size = 4
@@ -156,7 +158,7 @@ def test_exponent_nan_out(elem_dtype, pack_fp6):
     block_size = 4
     use_fp4_custom_triton_dequant_kernel = False
     tensor_mx = MXTensor(
-        scale_e8m0_bits,
+        scale_e8m0,
         data_bits,
         elem_dtype,
         block_size,
diff --git a/torchao/prototype/mx_formats/custom_cast.py b/torchao/prototype/mx_formats/custom_cast.py
@@ -743,6 +743,7 @@ def triton_f4_to_scaled_bf16(
       size is currently assumed to be 32.
     Output: a tensor of bfloat16 values, multiplied by the encoded scale
     """
+    s_e8m0 = s_e8m0.view(torch.uint8)
     assert TORCH_VERSION_AT_LEAST_2_4, "unsupported"
     new_shape = (*x.shape[:-1], x.shape[-1] * 2)
     output = torch.empty(*new_shape, device=x.device, dtype=torch.bfloat16)
@@ -859,6 +860,7 @@ def triton_f6_e2m3_to_scaled_bf16(
         size is currently assumed to be 32.
         Output: a tensor of bfloat16 values, multiplied by the encoded scale
         """
+        s_e8m0 = s_e8m0.view(torch.uint8)
 
         packed_mx_block_size = 3 * mx_block_size // 4
 
@@ -900,6 +902,7 @@ def triton_f6_e3m2_to_scaled_bf16(
         size is currently assumed to be 32.
         Output: a tensor of bfloat16 values, multiplied by the encoded scale
         """
+        s_e8m0 = s_e8m0.view(torch.uint8)
 
         packed_mx_block_size = 3 * mx_block_size // 4
 
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -240,10 +240,15 @@ def to_mx(
     else:
         raise AssertionError("unsupported")
 
+    scale_e8m0_biased = scale_e8m0_biased.view(torch.float8_e8m0fnu)
+
     return scale_e8m0_biased, data_lp
 
 
+# TODO(future PR): delete this function once casting from e8m0 to float works
+# in triton + torchinductor
 def get_fp_scale(scale_e8m0):
+    scale_e8m0 = scale_e8m0.view(torch.uint8)
     s_offset = scale_e8m0.to(torch.int16) - E8M0_EXPONENT_BIAS
     # TODO(later): it would be nice if there was a way to do the 2^x operation
     # in PyTorch without creating a tensor of twos
@@ -476,7 +481,9 @@ def __new__(
             dtype=orig_dtype,
             device=data_bits.device,
         )
-        assert scale_e8m0_bits.dtype == torch.uint8, "unsupported"
+        assert (
+            scale_e8m0_bits.dtype == torch.float8_e8m0fnu
+        ), f"scale_e8m0_bits.dtype must be `torch.float8_e8m0fnu`, got {scale_e8m0_bits.dtype}"
         assert len(scale_e8m0_bits.shape) == 1, "unsupported"
         assert data_bits.dtype in (
             torch.float8_e4m3fn,

Original file line number	Diff line number	Diff line change
`@@ -25,14 +25,14 @@`
`25`	`25`	`)`
`26`	`26`	`from torchao.quantization.utils import compute_error`
`27`	`27`	`from torchao.utils import (`
`28`		`- TORCH_VERSION_AT_LEAST_2_5,`
	`28`	`+ TORCH_VERSION_AT_LEAST_2_7,`
`29`	`29`	`is_sm_at_least_89,`
`30`	`30`	`is_sm_at_least_100,`
`31`	`31`	`)`
`32`	`32`
`33`	`33`	`torch.manual_seed(2)`
`34`	`34`
`35`		`-if not TORCH_VERSION_AT_LEAST_2_5:`
	`35`	`+if not TORCH_VERSION_AT_LEAST_2_7:`
`36`	`36`	`pytest.skip("Unsupported PyTorch version", allow_module_level=True)`
`37`	`37`
`38`	`38`