Daily arc lint --take BLACK

generatedunixname89002005367269 · facebook-github-bot · commit 791b2bd60928 · 2023-10-30T08:53:43.000-07:00
Reviewed By: martintrojer

Differential Revision: D50790931

fbshipit-source-id: 45afa339d95f2fef1c63e71572b5d93de8c5b582
diff --git a/float8_experimental/__init__.py b/float8_experimental/__init__.py
@@ -1,5 +1,5 @@
 # Lets define a few top level things here
-from float8_experimental.float8_tensor import Float8Tensor
 from float8_experimental.float8_linear import Float8Linear
+from float8_experimental.float8_tensor import Float8Tensor
 
 __all__ = ["Float8Tensor", "Float8Linear"]
diff --git a/float8_experimental/float8_linear.py b/float8_experimental/float8_linear.py
@@ -174,7 +174,9 @@ class DelayedScalingRecipe:
 
 class Float8LinearMixin(object):
     def __init__(self, *args, **kwargs):
-        delayed_scaling_recipe = kwargs.pop("delayed_scaling_recipe", DelayedScalingRecipe())
+        delayed_scaling_recipe = kwargs.pop(
+            "delayed_scaling_recipe", DelayedScalingRecipe()
+        )
         super().__init__(*args, **kwargs)
 
         # TODO(future): have a unique recipe per buffer instead of one per
@@ -268,7 +270,9 @@ def cast_y_to_float8_in_bw(self, y):
 
     def float8_mm(self, x_fp8, w_fp8, is_amax_initialized):
         scale_fn_name = self.recipe.scale_fn_name
-        y = float8_linear.apply(x_fp8, w_fp8, is_amax_initialized, scale_fn_name, self.emulate)
+        y = float8_linear.apply(
+            x_fp8, w_fp8, is_amax_initialized, scale_fn_name, self.emulate
+        )
         return y
 
     def float8_pre_forward(self, x):
@@ -407,7 +411,9 @@ def sync_float8_amax_and_scale_history(model: torch.nn.Module) -> None:
         #
         _update_history_with_new_amax(child.fp8_amax_x, child.fp8_amax_history_x)
         _update_history_with_new_amax(child.fp8_amax_w, child.fp8_amax_history_w)
-        _update_history_with_new_amax(child.fp8_amax_dL_dY, child.fp8_amax_history_dL_dY)
+        _update_history_with_new_amax(
+            child.fp8_amax_dL_dY, child.fp8_amax_history_dL_dY
+        )
 
         #
         # 3. calculate the scales
diff --git a/float8_experimental/float8_tensor.py b/float8_experimental/float8_tensor.py
@@ -75,8 +75,13 @@ def backward(ctx, g):
             return g, None, None, None
 
 
-def to_float8(tensor: torch.Tensor, scale: torch.Tensor, float8_dtype: torch.dtype, amax_buffer:torch.Tensor =None) -> "Float8Tensor":
-    """ Converts a higher precision tensor to float8 in a differentiable way.
+def to_float8(
+    tensor: torch.Tensor,
+    scale: torch.Tensor,
+    float8_dtype: torch.dtype,
+    amax_buffer: torch.Tensor = None,
+) -> "Float8Tensor":
+    """Converts a higher precision tensor to float8 in a differentiable way.
 
     Args:
         tensor: the tensor to convert
@@ -89,6 +94,7 @@ def to_float8(tensor: torch.Tensor, scale: torch.Tensor, float8_dtype: torch.dty
     """
     return ToFloat8ConstrFunc.apply(tensor, scale, float8_dtype, amax_buffer)
 
+
 class FromFloat8ConstrFunc(torch.autograd.Function):
     """
     A differentiable conversion from fp8