[mlir][affine] add remove-single-iteration-loop pass. #129270

linuxlonelyeagle · 2025-02-28T17:02:09Z

Add remove-single-iteration-loop pass to affine dialect. It migrated upstream from iree. The PR also improves the logic in it, and it is now able to handle more generalized situations. And added relevant tests.

llvmbot · 2025-02-28T17:02:46Z

@llvm/pr-subscribers-mlir-affine

@llvm/pr-subscribers-mlir

Author: lonely eagle (linuxlonelyeagle)

Changes

Add remove-single-iteration-loop pass to affine dialect.It migrated upstream from ireeThe PR also improves the logic in it, and it is now able to handle more generalized situations.And added relevant tests.

Full diff: https://github.com/llvm/llvm-project/pull/129270.diff

6 Files Affected:

(modified) mlir/include/mlir/Dialect/Affine/Passes.h (+5)
(modified) mlir/include/mlir/Dialect/Affine/Passes.td (+5)
(modified) mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h (+4)
(modified) mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt (+1)
(added) mlir/lib/Dialect/Affine/Transforms/RemoveSingleIterationLoop.cpp (+136)
(added) mlir/test/Dialect/Affine/remove-single-iteration-loop.mlir (+48)

diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 96bd3c6a9a7bc..a592114db4325 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -117,6 +117,11 @@ std::unique_ptr<Pass> createAffineExpandIndexOpsPass();
 /// operations.
 std::unique_ptr<Pass> createAffineExpandIndexOpsAsAffinePass();
 
+/// Creates a pass in order to remove invalid loops or to move the IR out of the
+/// loop when the loop is only iterated once.
+std::unique_ptr<InterfacePass<FunctionOpInterface>>
+createRemoveSingleIterationLoopPass();
+
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 728b8d25efcf2..f715ff6839fd5 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -229,6 +229,11 @@ def AffineLoopUnrollAndJam : InterfacePass<"affine-loop-unroll-jam", "FunctionOp
   ];
 }
 
+def RemoveSingleIterationLoop : InterfacePass<"remove-single-iteration-loop", "FunctionOpInterface"> {
+  let summary = "Remove distributed loop with single iteration.";
+  let constructor = "mlir::affine::createRemoveSingleIterationLoopPass()";
+}
+
 def AffinePipelineDataTransfer
     : Pass<"affine-pipeline-data-transfer", "func::FuncOp"> {
   let summary = "Pipeline non-blocking data transfers between explicitly "
diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
index bf830a29613fd..fb61f9333fac6 100644
--- a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
@@ -41,6 +41,10 @@ void populateAffineExpandIndexOpsPatterns(RewritePatternSet &patterns);
 /// `affine.apply` representations.
 void populateAffineExpandIndexOpsAsAffinePatterns(RewritePatternSet &patterns);
 
+/// Insert pattern to remove single iteration loop. The pattern will detect
+/// single iteration loops based on the range returned ValueBoundsOpInterface.
+void populateRemoveSingleIterationLoopPattern(RewritePatternSet &patterns);
+
 /// Helper function to rewrite `op`'s affine map and reorder its operands such
 /// that they are in increasing order of hoistability (i.e. the least hoistable)
 /// operands come first in the operand list.
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index c42789b01bc9f..317e28b542565 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRAffineTransforms
   ReifyValueBounds.cpp
   SuperVectorize.cpp
   SimplifyAffineStructures.cpp
+  RemoveSingleIterationLoop.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
diff --git a/mlir/lib/Dialect/Affine/Transforms/RemoveSingleIterationLoop.cpp b/mlir/lib/Dialect/Affine/Transforms/RemoveSingleIterationLoop.cpp
new file mode 100644
index 0000000000000..e8bbc2b6d8948
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/Transforms/RemoveSingleIterationLoop.cpp
@@ -0,0 +1,136 @@
+//===- RemoveSingleIterationLoop.cpp --- remove single iteration loop ---*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Affine/Passes.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Transforms/Transforms.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "llvm/Support/Debug.h"
+
+namespace mlir {
+namespace affine {
+#define GEN_PASS_DEF_REMOVESINGLEITERATIONLOOP
+#include "mlir/Dialect/Affine/Passes.h.inc"
+} // namespace affine
+} // namespace mlir
+
+#define DEBUG_TYPE "affine-remove-single-iteration"
+
+using namespace mlir;
+using namespace affine;
+
+/// Replaces the given op with the contents of the given single-block region,
+/// using the operands of the block terminator to replace operation results.
+static void replaceOpWithRegion(PatternRewriter &rewriter, Operation *op,
+                                Region &region, ValueRange blockArgs = {}) {
+  assert(llvm::hasSingleElement(region) && "expected single-region block");
+  Block *block = &region.front();
+  Operation *terminator = block->getTerminator();
+  ValueRange results = terminator->getOperands();
+  rewriter.inlineBlockBefore(block, op, blockArgs);
+  rewriter.replaceOp(op, results);
+  rewriter.eraseOp(terminator);
+}
+
+/// Return true if we can prove that the we always run at least the first
+/// iteration of the ForOp.
+static bool alwaysRunsFirstIteration(AffineForOp op) {
+  // Can't perform the analysis if the loops's bounds aren't index-typed.
+  if (!op.getInductionVar().getType().isIndex())
+    return false;
+  SmallVector<Value> lowerMapOperands = op.getLowerBoundOperands();
+  SmallVector<Value> upperMapOperands = op.getUpperBoundOperands();
+  ValueBoundsConstraintSet::Variable lower(op.getLowerBoundMap(),
+                                           lowerMapOperands);
+  ValueBoundsConstraintSet::Variable upper(op.getUpperBoundMap(),
+                                           upperMapOperands);
+  FailureOr<bool> isLb = ValueBoundsConstraintSet::compare(
+      lower, ValueBoundsConstraintSet::LT, upper);
+  return isLb.value_or(false);
+}
+
+/// Return true if we can prove that the we never run more than one iteration of
+/// the ForOp.
+static bool neverRunsSecondIteration(AffineForOp op) {
+  // Can't perform the analysis if the loops's bounds aren't index-typed.
+  if (!op.getInductionVar().getType().isIndex())
+    return false;
+
+  // The loop will only loop once if the inducation variable for the next time
+  // in the loop is greater than or equal to upper.
+  MLIRContext *context = op.getContext();
+  SmallVector<Value> lowerMapOperands = op.getLowerBoundOperands();
+  SmallVector<Value> upperMapOperands = op.getUpperBoundOperands();
+  SmallVector<AffineExpr> results;
+  AffineMap lowerMap = op.getLowerBoundMap();
+  for (AffineExpr expr : lowerMap.getResults()) {
+    results.push_back(expr + op.getStep().getSExtValue());
+  }
+  AffineMap nextItMap = AffineMap::get(
+      lowerMap.getNumDims(), lowerMap.getNumSymbols(), results, context);
+  ValueBoundsConstraintSet::Variable nextItVar(nextItMap, lowerMapOperands);
+  ValueBoundsConstraintSet::Variable upperVar(op.getUpperBoundMap(),
+                                              upperMapOperands);
+  FailureOr<bool> isUpperUnderNextIter = ValueBoundsConstraintSet::compare(
+      nextItVar, ValueBoundsConstraintSet::LE, upperVar);
+  return isUpperUnderNextIter.value_or(false);
+}
+
+namespace {
+
+/// Rewriting pattern that replaces single-iteration loops with their bodies.
+struct SimplifyTrivialLoops : public OpRewritePattern<AffineForOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AffineForOp op,
+                                PatternRewriter &rewriter) const override {
+    if (!(alwaysRunsFirstIteration(op) && neverRunsSecondIteration(op))) {
+      return failure();
+    }
+
+    // The first iteration is always run and the second iteration is never run
+    // so the loop always have 1 iteration. Inline its body and remove the loop.
+    SmallVector<Value> blockArgs;
+    blockArgs.reserve(op.getInits().size() + 1);
+    rewriter.setInsertionPointToStart(op.getBody());
+    Value lower = rewriter.create<AffineApplyOp>(
+        op.getLoc(), op.getLowerBoundMap(), op.getLowerBoundOperands());
+    op.getInductionVar().replaceAllUsesWith(lower);
+    blockArgs.push_back(lower);
+    llvm::append_range(blockArgs, op.getInits());
+    replaceOpWithRegion(rewriter, op, op.getRegion(), blockArgs);
+    return success();
+  }
+};
+
+struct RemoveSingleIterationLoop
+    : public affine::impl::RemoveSingleIterationLoopBase<
+          RemoveSingleIterationLoop> {
+  void runOnOperation() override {
+    auto funcOp = getOperation();
+    RewritePatternSet patterns(funcOp.getContext());
+    populateRemoveSingleIterationLoopPattern(patterns);
+    if (failed(applyPatternsGreedily(funcOp, std::move(patterns))))
+      return signalPassFailure();
+  }
+};
+
+} // namespace
+
+void mlir::affine::populateRemoveSingleIterationLoopPattern(
+    RewritePatternSet &patterns) {
+  patterns.add<SimplifyTrivialLoops>(patterns.getContext());
+}
+
+std::unique_ptr<InterfacePass<FunctionOpInterface>>
+mlir::affine::createRemoveSingleIterationLoopPass() {
+  return std::make_unique<RemoveSingleIterationLoop>();
+}
diff --git a/mlir/test/Dialect/Affine/remove-single-iteration-loop.mlir b/mlir/test/Dialect/Affine/remove-single-iteration-loop.mlir
new file mode 100644
index 0000000000000..c8c321242f626
--- /dev/null
+++ b/mlir/test/Dialect/Affine/remove-single-iteration-loop.mlir
@@ -0,0 +1,48 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(remove-single-iteration-loop))' -split-input-file | FileCheck %s
+
+// -----
+
+// CHECK-LABEL: func @loop_once(
+func.func @loop_once(%arg : index) -> index{
+  %0 = affine.for %iv = 2 to 3 step 1 iter_args(%arg1 = %arg) -> index {
+    %sum = arith.addi %arg1, %iv : index
+    affine.yield %sum : index
+  }
+  return %0 : index
+}
+// CHECK-SAME: %[[ARG:.*]]: index)
+// CHECK: %[[C2:.*]] = arith.constant 2 : index
+// CHECK: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C2]] : index
+// CHECK: return %[[SUM]] : index
+
+// -----
+
+// CHECK-LABEL: func @invalid_loop(
+func.func @invalid_loop(%arg : index) -> index{
+  %0 = affine.for %iv = 4 to 3 step 1 iter_args(%arg1 = %arg) -> index {
+    %sum = arith.addi %arg1, %iv : index
+    affine.yield %sum : index
+  }
+  return %0 : index
+}
+// CHECK-SAME: %[[ARG:.*]]: index)
+// CHECK: return %[[ARG]] : index
+
+// -----
+
+// CHECK-LABEL: func @gpu_invalid_loop
+func.func @gpu_invalid_loop() {
+  %0 = arith.constant 0 :index
+  %1 = arith.constant 2 : index
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
+             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
+    %threadid = gpu.thread_id x
+    affine.for %iv = %tx to 0 step 2 iter_args(%arg = %0) -> index {
+      %3 = arith.addi %arg, %0 : index
+      affine.yield %3 : index
+    }
+    gpu.terminator
+  }
+  // CHECK-NEXT: return
+  return
+}

linuxlonelyeagle · 2025-02-28T17:04:26Z

Related discussions #128113 (review)

krzysz00

Yeah, looks good to me. I'll probably ping you for this pass's SCF counterpart later. Thanks for making this happen!

ftynse

How is this different from -affine-loop-normalize with the promote-single-iter option?

linuxlonelyeagle · 2025-03-01T04:24:07Z

How is this different from -affine-loop-normalize with the promote-single-iter option?

1.promote-single-iter moves the IR in the loop out of the loop by getting tripCount, if the value of tripCount is 1.remove-single-iteration-loop Use ValueBoundInterface to move out of IR if lower + step >= upper.This implementation should be more secure.
2.promote-single-iter can't remove invalid loops, remove-single-iteration-loop can do this.You can see the test I added.
3.The feature should not be found in the MLIR documentation (it may be hidden and included in other passes), and displaying the implementation of it is more conducive to people using it.
For the above reasons, I think the pass makes sense.

linuxlonelyeagle · 2025-03-02T11:41:14Z

In my spare time, I spent some time learn affine-loop-normalize='promote-single-iter=1'.
The current situation is:
Cannot process such IR

func.func @invalid_loop(%arg : index) -> index{
  %0 = affine.for %iv = 4 to 3 step 1 iter_args(%arg1 = %arg) -> index {
    %sum = arith.addi %arg1, %iv : index
    affine.yield %sum : index
  }
  return %0 : index
}

func.func @gpu_invalid_loop() {
  %0 = arith.constant 0 :index
  %1 = arith.constant 2 : index
  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
    %threadid = gpu.thread_id x
    affine.for %iv = %tx to 0 step 2 iter_args(%arg = %0) -> index {
      %3 = arith.addi %arg, %0 : index
      affine.yield %3 : index
    }
    gpu.terminator
  }
  return
}

Can handle such IR.This IR is based on my last PR (I modified the logic of getTripCount so it can be processed now)

func.func @gpu_loop() {
  %0 = arith.constant 0 :index
  %1 = arith.constant 2 : index
  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
    %threadid = gpu.thread_id x
    affine.for %iv = %tx to 2 step 2 iter_args(%arg = %0) -> index {
      %3 = arith.addi %arg, %0 : index
      affine.yield %3 : index
    }
    gpu.terminator
  }
  return
}

Such functionality is also available in affine-loop-unroll.In fact, it can be seen that some pass features in Affine are repeated.
I think it is normal for the functions of the passes to overlap, but I think the most important thing about this pass is that it can eliminate invalid loops, and the name of this pass is very clear, which I think is the biggest benefit.

linuxlonelyeagle · 2025-03-06T01:45:41Z

@bondhugula No one seems to be replying to me, do you think the pass here makes sense?

linuxlonelyeagle · 2025-04-13T07:18:38Z

@krzysz00 @ftynse It seems that people don't relate to this PR anymore, I have a good idea, I think it's possible to implement the pattern in this pass in canonicalize.Please tell me what you think?

krzysz00 · 2025-04-15T15:26:55Z

I don't think this is a canonicalization, no, and I'd be happy to land this, though it's got a hold on it. I think the hold is "why not affine-loop-normalize" - which might be a good question, given that the pass that inspired this is on SCF

linuxlonelyeagle · 2025-04-15T16:10:41Z

I don't think this is a canonicalization, no, and I'd be happy to land this, though it's got a hold on it. I think the hold is "why not affine-loop-normalize" - which might be a good question, given that the pass that inspired this is on SCF

My English is not very good.I'm not sure I understand what you mean by the word “hold.”What does “good question” mean here? I've had time to think about it.Make my point. I think it's great when this Pass is introduced to handle situations where the SSA value has a range.
My understanding is that

-affine-loop-normalize is blocking content in this PR.
This pass was inspired from SCF.
Thank you.

joker-eph · 2025-04-15T16:38:11Z

I don't think this is a canonicalization, no

Why not?

joker-eph · 2025-04-15T16:39:11Z

I'm not sure I understand what you mean by the word “hold.”

"hold" means there is a pending discussion that needs a resolution. In this case "why a separate pass instead of extending the existing passes to handle the cases needed here"?

krzysz00 · 2025-04-15T16:44:17Z

Re "why not a canonicalization", I'm more thinking that running a value bounds analysis in -canonicalize is a bit dubious, but if we're happy with that ... yeah, inlining loops that provably run exactly once is a canonicalization

joker-eph · 2025-04-15T16:48:38Z

, I'm more thinking that running a value bounds analysis in -canonicalize is a bit dubious,

Thanks for clarifying, I don't know the cost of it, but I agree it's edgy to bring a complex analysis there.

linuxlonelyeagle · 2025-04-15T18:08:08Z

In my opinion, the key reason is still that the existing pass on affine.for relies too much on trips, i.e., the number of iterations, for implementations based on the number of iterations. And the implementation of this PR no longer relies on trip.

These are two different implementations.

But now it looks like if you want to implement this PR in -affine-loop-normalize it's not impossible, but there should be a higher performance loss because if the trip is a range, you have to make sure that both the lower bound and the upper bound are 1. Or that the lower bound is equal to 0 ( Or the lower bound is equal to 0 (just delete the loop), which would be more costly. And this feature is only one of the features of the pass.

The point of the pass in this PR is that the name is much simpler and more straightforward, and it should only require a simple analysis.

How about we remove -affine-loop-normalize that involves functionality in this PR?

add remove-single-iteration-loop pass.

f12aea1

llvmbot added mlir:affine mlir labels Feb 28, 2025

linuxlonelyeagle requested review from krzysz00, ftynse and bondhugula February 28, 2025 17:03

linuxlonelyeagle requested a review from Groverkss February 28, 2025 17:06

krzysz00 approved these changes Feb 28, 2025

View reviewed changes

ftynse requested changes Feb 28, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[mlir][affine] add remove-single-iteration-loop pass. #129270

[mlir][affine] add remove-single-iteration-loop pass. #129270

linuxlonelyeagle commented Feb 28, 2025 •

edited

Loading

llvmbot commented Feb 28, 2025 •

edited

Loading

linuxlonelyeagle commented Feb 28, 2025

krzysz00 left a comment

ftynse left a comment

linuxlonelyeagle commented Mar 1, 2025 •

edited

Loading

linuxlonelyeagle commented Mar 2, 2025

linuxlonelyeagle commented Mar 6, 2025

linuxlonelyeagle commented Apr 13, 2025

krzysz00 commented Apr 15, 2025

linuxlonelyeagle commented Apr 15, 2025

joker-eph commented Apr 15, 2025

joker-eph commented Apr 15, 2025

krzysz00 commented Apr 15, 2025

joker-eph commented Apr 15, 2025

linuxlonelyeagle commented Apr 15, 2025 •

edited

Loading

[mlir][affine] add remove-single-iteration-loop pass. #129270

Are you sure you want to change the base?

[mlir][affine] add remove-single-iteration-loop pass. #129270

Conversation

linuxlonelyeagle commented Feb 28, 2025 • edited Loading

llvmbot commented Feb 28, 2025 • edited Loading

linuxlonelyeagle commented Feb 28, 2025

krzysz00 left a comment

Choose a reason for hiding this comment

ftynse left a comment

Choose a reason for hiding this comment

linuxlonelyeagle commented Mar 1, 2025 • edited Loading

linuxlonelyeagle commented Mar 2, 2025

linuxlonelyeagle commented Mar 6, 2025

linuxlonelyeagle commented Apr 13, 2025

krzysz00 commented Apr 15, 2025

linuxlonelyeagle commented Apr 15, 2025

joker-eph commented Apr 15, 2025

joker-eph commented Apr 15, 2025

krzysz00 commented Apr 15, 2025

joker-eph commented Apr 15, 2025

linuxlonelyeagle commented Apr 15, 2025 • edited Loading

linuxlonelyeagle commented Feb 28, 2025 •

edited

Loading

llvmbot commented Feb 28, 2025 •

edited

Loading

linuxlonelyeagle commented Mar 1, 2025 •

edited

Loading

linuxlonelyeagle commented Apr 15, 2025 •

edited

Loading