pytorch-labs · vkuzo · Aug 4, 2023 · Aug 4, 2023
diff --git a/float8_playground/float8_linear.py b/float8_playground/float8_linear.py
@@ -113,6 +113,13 @@ def __init__(self, *args, **kwargs):
 
     def forward(self, x):
         if not isinstance(x, Float8Tensor):
+            # Duplicate the autocast logic for F.linear, so that the output
+            # of our module has the right original precision
+            if torch.is_autocast_enabled():
+                # For now, hardcode to GPU's autocast dtype
+                # if we need CPU support in the future, we can add it
+                x = x.to(torch.get_autocast_gpu_dtype())
+
             # TODO(future): switch to delayed scaling
             self.fp8_s_in.fill_(tensor_to_scale(x, torch.float8_e4m3fn))
             x_fp8 = Float8Tensor.to_float8(x, self.fp8_s_in, torch.float8_e4m3fn)

diff --git a/tests/test.py b/tests/test.py
@@ -60,8 +60,8 @@ def _test_linear_impl(self, x, m_ref):
         g_sqnr = compute_error(m_ref.weight.grad, m_fp8.weight.grad)
 
         # verify sqnr is reasonable
-        self.assertTrue(y_sqnr >= 22.0)
-        self.assertTrue(g_sqnr >= 22.0)
+        self.assertTrue(y_sqnr >= 18.0)
+        self.assertTrue(g_sqnr >= 18.0)
         if m_ref.bias is not None:
             torch.testing.assert_close(m_ref.bias.grad, m_fp8.bias.grad)
 
@@ -96,6 +96,24 @@ def test_linear_bias(self):
             m_ref = nn.Linear(3, 4, bias=True, device='cuda')
             self._test_linear_impl(x, m_ref)
 
+    def test_autocast(self):
+        # for now the support is very simple:
+        # 1. if autocast is off, output of Float8Linear has _orig_precision set to float
+        # 2. if autocast is on, output of Float8Linear has _orig_precision set to half
+
+        m = nn.Linear(4, 4, device='cuda')
+        m = Float8Linear.from_float(m)
+
+        # autocast off
+        x = torch.randn(4, 4, device='cuda')
+        y = m(x)
+        self.assertTrue(y._orig_dtype == torch.float)
+
+        # autocast on
+        with torch.autocast('cuda'):
+            y = m(x)
+        self.assertTrue(y._orig_dtype == torch.half)
+
 
 if __name__ == '__main__':
     unittest.main()