lint

danielvegamyhre · danielvegamyhre · commit e38ccee70cde · 2025-04-16T12:33:13.000-07:00
diff --git a/torchao/prototype/scaled_grouped_mm/kernels/jagged_float8_scales.py b/torchao/prototype/scaled_grouped_mm/kernels/jagged_float8_scales.py
@@ -5,9 +5,11 @@
 # LICENSE file in the root directory of this source tree.
 
 """
-Triton kernels for scaling high precision tensors to float8.
+Triton kernels for scaling high precision tensors to float8 using "jagged"
+rowwise scales (i.e., separate scales for each group/subtensor as determined by
+the offsets).
 """
-import itertools
+
 from typing import Tuple
 
 import torch
@@ -33,7 +35,9 @@
 
 block_sizes = [128, 256]
 kernel_configs_2D = [
-    triton.Config({"BLOCK_SIZE_ROWS": block_size_rows, "BLOCK_SIZE_COLS": block_size_cols})
+    triton.Config(
+        {"BLOCK_SIZE_ROWS": block_size_rows, "BLOCK_SIZE_COLS": block_size_cols}
+    )
     for block_size_rows in block_sizes
     for block_size_cols in block_sizes
 ]
diff --git a/torchao/prototype/scaled_grouped_mm/scaled_grouped_mm.py b/torchao/prototype/scaled_grouped_mm/scaled_grouped_mm.py
@@ -54,26 +54,26 @@ def forward(
         assert A.ndim == 2, "A must be 2D"
         assert B_t.ndim == 3, "B must be 3D"
 
-        assert A.size(-1) % 16 == 0, (
-            f"A must have a last dim divisible by 16, but got shape: {A.shape}"
-        )
-        assert B_t.size(-2) % 16 == 0 and B_t.size(-1) % 16 == 0, (
-            f"B must have last 2 dims divisible by 16, but got shape: {B_t.shape}"
-        )
+        assert (
+            A.size(-1) % 16 == 0
+        ), f"A must have a last dim divisible by 16, but got shape: {A.shape}"
+        assert (
+            B_t.size(-2) % 16 == 0 and B_t.size(-1) % 16 == 0
+        ), f"B must have last 2 dims divisible by 16, but got shape: {B_t.shape}"
 
         # Assert input tensors are in high-precision dtypes.
-        assert A.dtype == torch.float32 or A.dtype == torch.bfloat16, (
-            "A must be float32 or bfloat16"
-        )
-        assert B_t.dtype == torch.float32 or B_t.dtype == torch.bfloat16, (
-            "B must be float32 or bfloat16"
-        )
+        assert (
+            A.dtype == torch.float32 or A.dtype == torch.bfloat16
+        ), "A must be float32 or bfloat16"
+        assert (
+            B_t.dtype == torch.float32 or B_t.dtype == torch.bfloat16
+        ), "B must be float32 or bfloat16"
         assert offs.dtype == torch.int32, "offs must be int32"
 
         # Assert A and B dims are compatible for a scaled grouped GEMM.
-        assert A.size(-1) == B_t.size(-2), (
-            f"shape {A.shape} and {B_t.shape} are not compatible for _scaled_grouped_mm"
-        )
+        assert A.size(-1) == B_t.size(
+            -2
+        ), f"shape {A.shape} and {B_t.shape} are not compatible for _scaled_grouped_mm"
 
         # The left operand in the scaled grouped GEMM must be row-major due to hardware requirements.
         assert not _is_column_major(A), "A must be row-major"