PadConcat to (MaybePad)ConcatPad (EnzymeAD#639)

vimarsh6739 · web-flow · commit eada1e357a68 · 2025-04-05T13:30:46.000-05:00
* initial commit

* it comiles

* used untyped constructor for pad

* bugfix + cleanup

* added test

* fix test

* add checks

* flippin bits

* keep clangformat happy
diff --git a/src/enzyme_ad/jax/Passes/EnzymeHLOOpt.cpp b/src/enzyme_ad/jax/Passes/EnzymeHLOOpt.cpp
@@ -12440,6 +12440,119 @@ struct BroadcastInDimIsReshape final
   }
 };
 
+struct PadConcatToConcatPad
+    : public OpRewritePattern<stablehlo::ConcatenateOp> {
+  using OpRewritePattern<stablehlo::ConcatenateOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(stablehlo::ConcatenateOp concatOp,
+                                PatternRewriter &rewriter) const override {
+
+    if (concatOp.getNumOperands() <= 1) {
+      return failure();
+    }
+
+    // Check if all operands are pad ops with the same padding value
+    SmallVector<stablehlo::PadOp> padOps;
+    Value padValue;
+
+    for (Value operand : concatOp.getOperands()) {
+      auto padOp = operand.getDefiningOp<stablehlo::PadOp>();
+      if (!padOp)
+        return failure();
+
+      if (padOps.empty()) {
+        padValue = padOp.getPaddingValue();
+      } else if (padValue != padOp.getPaddingValue()) {
+        return failure(); // Different padding values not supported
+      }
+
+      padOps.push_back(padOp);
+    }
+
+    int64_t concatDim = concatOp.getDimension();
+    int64_t rank = padOps[0].getEdgePaddingLow().size();
+
+    // Compute smallest common padding for all tensors
+    SmallVector<int64_t> commonLowPadding(rank,
+                                          std::numeric_limits<int64_t>::max());
+    SmallVector<int64_t> commonHighPadding(rank,
+                                           std::numeric_limits<int64_t>::max());
+    SmallVector<int64_t> interiorPadding(rank, 0);
+
+    // Find minimum padding across all inputs (conservative common padding)
+    for (auto padOp : padOps) {
+      for (int64_t dim = 0; dim < rank; ++dim) {
+        commonLowPadding[dim] =
+            std::min(commonLowPadding[dim], padOp.getEdgePaddingLow()[dim]);
+        commonHighPadding[dim] =
+            std::min(commonHighPadding[dim], padOp.getEdgePaddingHigh()[dim]);
+      }
+    }
+
+    bool commonPad = false;
+
+    for (int64_t dim = 0; dim < rank; ++dim) {
+      if (commonLowPadding[dim] != 0 || commonHighPadding[dim] != 0) {
+        commonPad = true;
+        break;
+      }
+    }
+
+    if (!commonPad) {
+      return failure();
+    }
+
+    // Collect original operands with adjusted padding
+    SmallVector<Value> adjOperands;
+
+    for (auto padOp : padOps) {
+
+      SmallVector<int64_t> diffLowPadding(rank);
+      SmallVector<int64_t> diffHighPadding(rank);
+
+      for (int64_t dim = 0; dim < rank; ++dim) {
+        diffLowPadding[dim] =
+            padOp.getEdgePaddingLow()[dim] - commonLowPadding[dim];
+        diffHighPadding[dim] =
+            padOp.getEdgePaddingHigh()[dim] - commonHighPadding[dim];
+      }
+
+      bool needsExtraPad = false;
+      for (int64_t dim = 0; dim < rank; ++dim) {
+        if (diffLowPadding[dim] > 0 || diffHighPadding[dim] > 0) {
+          needsExtraPad = true;
+          break;
+        }
+      }
+
+      if (needsExtraPad) {
+
+        auto adjustedOp = rewriter.create<stablehlo::PadOp>(
+            padOp.getLoc(),
+            padOp.getOperand(), // we pad the input operand
+            padOp.getPaddingValue(), diffLowPadding, diffHighPadding,
+            padOp.getInteriorPaddingAttr());
+
+        adjOperands.push_back(adjustedOp);
+      } else {
+        // No extra padding needed, use original tensor
+        adjOperands.push_back(padOp.getOperand());
+      }
+    }
+
+    auto newConcatOp = rewriter.create<stablehlo::ConcatenateOp>(
+        concatOp.getLoc(), adjOperands, concatDim);
+
+    // Apply the common padding to get the final result
+    auto result = rewriter.create<stablehlo::PadOp>(
+        concatOp.getLoc(), newConcatOp, padValue, commonLowPadding,
+        commonHighPadding, interiorPadding);
+
+    rewriter.replaceOp(concatOp, result);
+    return success();
+  }
+};
+
 struct ConstPadConcatToConcat : public OpRewritePattern<stablehlo::PadOp> {
   using OpRewritePattern<stablehlo::PadOp>::OpRewritePattern;
 
@@ -12705,10 +12818,9 @@ struct EnzymeHLOOptPass
                  AssociativeBinaryOpReordering<stablehlo::AndOp>,
                  AssociativeBinaryOpReordering<stablehlo::OrOp>>(context);
 
-    patterns
-        .add<BinopPadToConcat<stablehlo::AddOp>,
-             BinopPadToConcat<stablehlo::MulOp>, ConcatPad, PadReduceWindow>(
-            context);
+    patterns.add<BinopPadToConcat<stablehlo::AddOp>,
+                 BinopPadToConcat<stablehlo::MulOp>, ConcatPad,
+                 PadConcatToConcatPad, PadReduceWindow>(context);
 
     if (passses & 512) {
       patterns.add<TransposeDotReorder, DotTranspose, ConvolutionTranspose,
diff --git a/src/enzyme_ad/jax/TransformOps/TransformOps.td b/src/enzyme_ad/jax/TransformOps/TransformOps.td
@@ -1222,6 +1222,10 @@ def ConstPadConcatToConcat : EnzymeHLOPatternOp<
   let patterns = ["ConstPadConcatToConcat"];
 }
 
+def PadConcatToConcatPad : EnzymeHLOPatternOp<
+    "pad_concat_to_concat_pad"> {
+  let patterns = ["PadConcatToConcatPad"];
+}
 // TODO: better naming for parameters requires a static interface for
 // constructing them in search.
 
diff --git a/test/lit_tests/padconcat_to_concatpad.mlir b/test/lit_tests/padconcat_to_concatpad.mlir
@@ -0,0 +1,38 @@
+// RUN: enzymexlamlir-opt %s --pass-pipeline="builtin.module(enzyme-hlo-generate-td{patterns=pad_concat_to_concat_pad},transform-interpreter,enzyme-hlo-remove-transform)" | FileCheck %s
+
+func.func @test_pad_leftover(%arg0 : tensor<128x2031x2032xf64>, %arg1 : tensor<1x2032x2032xf64>, %arg2: tensor<1x2032x2032xf64>) -> tensor<130x2033x2032xf64> {
+  %cst_29 = stablehlo.constant dense<0.5> : tensor<f64>
+  %p1 = stablehlo.pad %arg0, %cst_29, low = [0, 1, 0], high = [0, 1, 0], interior = [0, 0, 0] : (tensor<128x2031x2032xf64>, tensor<f64>) -> tensor<128x2033x2032xf64>
+  %p2 = stablehlo.pad %arg1, %cst_29, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<1x2032x2032xf64>, tensor<f64>) -> tensor<1x2033x2032xf64> 
+  %p3 = stablehlo.pad %arg2, %cst_29, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<1x2032x2032xf64>, tensor<f64>) -> tensor<1x2033x2032xf64> 
+
+  %concat = stablehlo.concatenate %p2,%p1,%p3, dim = 0 : (tensor<1x2033x2032xf64>, tensor<128x2033x2032xf64>, tensor<1x2033x2032xf64>) -> tensor<130x2033x2032xf64>
+  return %concat : tensor<130x2033x2032xf64>
+}
+
+
+// CHECK: func.func @test_pad_leftover(%arg0: tensor<128x2031x2032xf64>, %arg1: tensor<1x2032x2032xf64>, %arg2: tensor<1x2032x2032xf64>) -> tensor<130x2033x2032xf64> {
+// CHECK-NEXT:   %cst = stablehlo.constant dense<5.000000e-01> : tensor<f64>
+// CHECK-NEXT:   %0 = stablehlo.pad %arg0, %cst, low = [0, 0, 0], high = [0, 1, 0], interior = [0, 0, 0] : (tensor<128x2031x2032xf64>, tensor<f64>) -> tensor<128x2032x2032xf64>
+// CHECK-NEXT:   %1 = stablehlo.concatenate %arg1, %0, %arg2, dim = 0 : (tensor<1x2032x2032xf64>, tensor<128x2032x2032xf64>, tensor<1x2032x2032xf64>) -> tensor<130x2032x2032xf64>
+// CHECK-NEXT:   %2 = stablehlo.pad %1, %cst, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<130x2032x2032xf64>, tensor<f64>) -> tensor<130x2033x2032xf64>
+// CHECK-NEXT:   return %2 : tensor<130x2033x2032xf64>
+// CHECK-NEXT: }
+
+func.func @test_pad_clean(%arg0 : tensor<128x2032x2032xf64>, %arg1 : tensor<1x2032x2032xf64>, %arg2: tensor<1x2032x2032xf64>) -> tensor<130x2033x2032xf64> {
+  %cst_29 = stablehlo.constant dense<0.5> : tensor<f64>
+  %p1 = stablehlo.pad %arg0, %cst_29, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<128x2032x2032xf64>, tensor<f64>) -> tensor<128x2033x2032xf64>
+  %p2 = stablehlo.pad %arg1, %cst_29, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<1x2032x2032xf64>, tensor<f64>) -> tensor<1x2033x2032xf64> 
+  %p3 = stablehlo.pad %arg2, %cst_29, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<1x2032x2032xf64>, tensor<f64>) -> tensor<1x2033x2032xf64> 
+
+  %concat = stablehlo.concatenate %p2,%p1,%p3, dim = 0 : (tensor<1x2033x2032xf64>, tensor<128x2033x2032xf64>, tensor<1x2033x2032xf64>) -> tensor<130x2033x2032xf64>
+  return %concat : tensor<130x2033x2032xf64>
+}
+
+
+// CHECK-NEXT: func.func @test_pad_clean(%arg0: tensor<128x2032x2032xf64>, %arg1: tensor<1x2032x2032xf64>, %arg2: tensor<1x2032x2032xf64>) -> tensor<130x2033x2032xf64> {
+// CHECK-NEXT:   %cst = stablehlo.constant dense<5.000000e-01> : tensor<f64>
+// CHECK-NEXT:   %0 = stablehlo.concatenate %arg1, %arg0, %arg2, dim = 0 : (tensor<1x2032x2032xf64>, tensor<128x2032x2032xf64>, tensor<1x2032x2032xf64>) -> tensor<130x2032x2032xf64>
+// CHECK-NEXT:   %1 = stablehlo.pad %0, %cst, low = [0, 1, 0], high = [0, 0, 0], interior = [0, 0, 0] : (tensor<130x2032x2032xf64>, tensor<f64>) -> tensor<130x2033x2032xf64>
+// CHECK-NEXT:   return %1 : tensor<130x2033x2032xf64>
+  // CHECK-NEXT: }

Original file line number	Diff line number	Diff line change
`@@ -1222,6 +1222,10 @@ def ConstPadConcatToConcat : EnzymeHLOPatternOp<`
`1222`	`1222`	`let patterns = ["ConstPadConcatToConcat"];`
`1223`	`1223`	`}`
`1224`	`1224`
	`1225`	`+def PadConcatToConcatPad : EnzymeHLOPatternOp<`
	`1226`	`+ "pad_concat_to_concat_pad"> {`
	`1227`	`+ let patterns = ["PadConcatToConcatPad"];`
	`1228`	`+}`
`1225`	`1229`	`// TODO: better naming for parameters requires a static interface for`
`1226`	`1230`	`// constructing them in search.`
`1227`	`1231`