lint fix

LeiWang1999 · LeiWang1999 · commit 0f11130f220e · 2025-08-22T14:34:16.000+08:00
diff --git a/src/op/copy.cc b/src/op/copy.cc
@@ -364,7 +364,8 @@ bool Copy::CheckBulkLoad(Target target) const {
   if (!TargetHasBulkCopy(target))
     return false;
   // 2. src and dst must be global and shared
-  if (src.scope() != "global" || (dst.scope() != "shared.dyn" && dst.scope() != "shared"))
+  if (src.scope() != "global" ||
+      (dst.scope() != "shared.dyn" && dst.scope() != "shared"))
     return false;
   // 3. check shape.
   // TODO(lei): validate if we can utilize tma under this shape.
@@ -391,7 +392,8 @@ bool Copy::CheckBulkStore(Target target) const {
   if (!TargetHasBulkCopy(target))
     return false;
   // 2. src and dst must be shared.dyn and local.fragment
-  if ((src.scope() != "shared.dyn" && src.scope() != "shared") || dst.scope() != "global")
+  if ((src.scope() != "shared.dyn" && src.scope() != "shared") ||
+      dst.scope() != "global")
     return false;
   // 3. check shape.
   // TODO(lei): validate if we can utilize tma under this shape.
@@ -414,7 +416,8 @@ bool Copy::CheckBulkStore(Target target) const {
  * otherwise.
  */
 bool Copy::CheckLDSMCopy(Target target) const {
-  return TargetHasLdmatrix(target) && (src.scope() == "shared.dyn" || src.scope() == "shared") &&
+  return TargetHasLdmatrix(target) &&
+         (src.scope() == "shared.dyn" || src.scope() == "shared") &&
          dst.scope() == "local.fragment";
 }
 
@@ -883,10 +886,9 @@ Stmt Copy::LowerBulkCopy(const LowerArgs &T, arith::Analyzer *analyzer,
     ICHECK(stride != nullptr && continuous != nullptr);
     // We also need to check if the shape satisfies the following doc:
     // https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TENSOR__MEMORY.html#group__CUDA__TENSOR__MEMORY_1ga7c7d2aaac9e49294304e755e6f341d7
-    if (StructuralEqual()(
-                   shared_layout,
-                   makeQuarterBankSwizzleLayout(*stride, *continuous,
-                                                shared_tensor->dtype.bits()))) {
+    if (StructuralEqual()(shared_layout, makeQuarterBankSwizzleLayout(
+                                             *stride, *continuous,
+                                             shared_tensor->dtype.bits()))) {
       desc.swizzle = static_cast<int>(CU_TENSOR_MAP_SWIZZLE_32B);
     } else if (StructuralEqual()(
                    shared_layout,
@@ -898,18 +900,18 @@ Stmt Copy::LowerBulkCopy(const LowerArgs &T, arith::Analyzer *analyzer,
                    makeFullBankSwizzleLayout(*stride, *continuous,
                                              shared_tensor->dtype.bits()))) {
       desc.swizzle = static_cast<int>(CU_TENSOR_MAP_SWIZZLE_128B);
-    } else if (StructuralEqual()(shared_layout, makeGemmABLayoutPadded(
-      *stride, *continuous,
-      shared_tensor->dtype.bits()))) {
-      LOG(WARNING) << "Bulk copy cannot support a padded layout for src: " 
-                   << src->name << ", dst: " << dst->name 
+    } else if (StructuralEqual()(
+                   shared_layout,
+                   makeGemmABLayoutPadded(*stride, *continuous,
+                                          shared_tensor->dtype.bits()))) {
+      LOG(WARNING) << "Bulk copy cannot support a padded layout for src: "
+                   << src->name << ", dst: " << dst->name
                    << ", fallback to normal copy";
       return LowerNormalCopy(T, analyzer);
     } else {
-      LOG(WARNING)
-          << "Came across unsupported swizzle layout for src: " 
-          << src->name << ", dst: " << dst->name 
-          << ", fallback to normal copy";
+      LOG(WARNING) << "Came across unsupported swizzle layout for src: "
+                   << src->name << ", dst: " << dst->name
+                   << ", fallback to normal copy";
       return LowerNormalCopy(T, analyzer);
     }
   }
diff --git a/tilelang/language/builtin.py b/tilelang/language/builtin.py
@@ -159,11 +159,13 @@ def no_set_max_nreg():
     """
     return tir.call_intrin("handle", tir.op.Op.get("tl.no_set_max_nreg"))
 
+
 def disable_warp_group_reg_alloc():
     """Disable the warp group reg alloc.
     """
     return no_set_max_nreg()
 
+
 def mbarrier_wait_parity(mbarrier: Union[int, PrimExpr, tir.Call], parity: Union[int, Var]):
     """Wait for memory barrier parity condition.