improve doc and nit.

linuxlonelyeagle · linuxlonelyeagle · commit 0aafe719d411 · 2025-04-09T11:47:26.000+08:00
diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -214,19 +214,21 @@ void mlir::affine::getTripCountMapAndOperands(
                             tripCountValueMap.getOperands().end());
 }
 
-/// Take the min if all trip counts are constant.
+/// The function make map be computed with the given operands to get the value
+/// of trip, which may have a range when a range exists for either operand.
+/// If type is equal to BoundType::LB get the minimum value of the trip, if type
+/// is equal to BoundType::UB get the maximum value of the trip.
 static std::optional<uint64_t>
 getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
                        presburger::BoundType type) {
   std::optional<uint64_t> tripCount;
-  for (auto resultExpr : map.getResults()) {
-    AffineMap subMap =
-        AffineMap::get(map.getNumDims(), map.getNumSymbols(), resultExpr);
+  for (unsigned i = 0, e = map.getResults().size(); i < e; ++i) {
+    AffineMap subMap = map.getSubMap(i);
     ValueBoundsConstraintSet::Variable var(subMap, operands);
     auto lbBound = ValueBoundsConstraintSet::computeConstantBound(
         mlir::presburger::BoundType::LB, var);
     auto ubBound = ValueBoundsConstraintSet::computeConstantBound(
-        mlir::presburger::BoundType::UB, var, nullptr, true);
+        mlir::presburger::BoundType::UB, var, nullptr, /*closedUB*/ true);
     if (failed(lbBound) || failed(ubBound))
       return std::nullopt;
     if (type == presburger::BoundType::LB) {
@@ -238,7 +240,7 @@ getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
     } else if (type == presburger::BoundType::UB) {
       if (tripCount.has_value())
         tripCount =
-            std::min(*tripCount, static_cast<uint64_t>(ubBound.value()));
+            std::max(*tripCount, static_cast<uint64_t>(ubBound.value()));
       else
         tripCount = ubBound.value();
     } else {
@@ -253,7 +255,7 @@ getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
 /// getTripCount) and is able to determine constant trip count in non-trivial
 /// cases.
 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
-  SmallVector<Value, 4> operands;
+  SmallVector<Value> operands;
   AffineMap map;
   getTripCountMapAndOperands(forOp, &map, &operands);
 
@@ -262,12 +264,12 @@ std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
   return getKnownTripCountBound(map, operands, presburger::BoundType::LB);
 }
 
-/// Returns the maximum trip count when the operand of forOp has a range. If the
-/// operand of forOp is a constant, the return value is the same as
+/// Returns the maximum trip count when the operand of forOp has a range.
+/// If the operand of forOp is a constant, the return value is the same as
 /// `getConstantTripCount`.
 std::optional<uint64_t>
 mlir::affine::getUpperBoundOnTripCount(AffineForOp forOp) {
-  SmallVector<Value, 4> operands;
+  SmallVector<Value> operands;
   AffineMap map;
   getTripCountMapAndOperands(forOp, &map, &operands);
 
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -162,8 +162,7 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
   forOp.getBody()->back().erase();
   parentBlock->getOperations().splice(Block::iterator(forOp),
                                       forOp.getBody()->getOperations());
-  IRRewriter b(forOp.getContext());
-  b.eraseOp(forOp);
+  forOp.erase();
   return success();
 }
 
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
@@ -261,14 +261,14 @@ gpu.module @unroll_full {
 
 // UNROLL-FULL-LABEL: func @thread_partial_execution
 func.func @thread_partial_execution() {
-  %0 = arith.constant 0 :index
-  %1 = arith.constant 2 : index    
+  %c0 = arith.constant 0 :index
+  %c2 = arith.constant 2 : index    
   // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
-  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
-             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
-    affine.for %iv = %tx to 3 step 2 iter_args(%arg = %0) -> index {
-      %3 = arith.addi %arg, %0 : index
-      affine.yield %3 : index
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
+             threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
+    affine.for %iv = %tx to 3 step 2 iter_args(%arg = %c0) -> index {
+      %sum = arith.addi %arg, %c0 : index
+      affine.yield %sum : index
     }
     // UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
     // UNROLL-FULL-NEXT:   %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
@@ -281,15 +281,15 @@ func.func @thread_partial_execution() {
 
 // UNROLL-FULL-LABEL: func @unroll_all_thread
 func.func @unroll_all_thread() {
-  %0 = arith.constant 0 :index
-  %1 = arith.constant 2 : index
+  %c0 = arith.constant 0 :index
+  %c2 = arith.constant 2 : index
   // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
-  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
-             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
+             threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
     %threadid = gpu.thread_id x
-    %4 = affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %0) -> index {
-      %3 = arith.addi %arg, %0 : index
-      affine.yield %3 : index
+    affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %c0) -> index {
+      %sum = arith.addi %arg, %c0 : index
+      affine.yield %sum : index
     }
     // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
     // UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
@@ -301,15 +301,15 @@ func.func @unroll_all_thread() {
 
 // UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
 func.func @partial_unroll_factor_4() {
-  %0 = arith.constant 0 :index
-  %1 = arith.constant 2 : index
+  %c0 = arith.constant 0 :index
+  %c2 = arith.constant 2 : index
   // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
-  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
-             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
+             threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
     %threadid = gpu.thread_id x
-    affine.for %iv = %threadid to 9 step 2 iter_args(%arg = %0) -> index {
-      %3 = arith.addi %arg, %0 : index
-      affine.yield %3 : index
+    affine.for %iv = %threadid to 9 step 2 iter_args(%arg = %c0) -> index {
+      %sum = arith.addi %arg, %c0 : index
+      affine.yield %sum : index
     }
     gpu.terminator
   }
@@ -769,15 +769,15 @@ func.func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32
 
 // UNROLL-BY-4-LABEL: func @gpu_launch_unroll_by_factor_4
 func.func @gpu_launch_unroll_by_factor_4() {
-  %0 = arith.constant 0 :index
-  %1 = arith.constant 2 : index
+  %c0 = arith.constant 0 :index
+  %c2 = arith.constant 2 : index
   // UNROLL-BY-4: %[[C0:.*]] = arith.constant 0 : index
-  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
-             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
+  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
+             threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
     %threadid = gpu.thread_id x
-    affine.for %iv = %threadid to 11 step 2 iter_args(%arg = %0) -> index {
-      %3 = arith.addi %arg, %0 : index
-      affine.yield %3 : index
+    affine.for %iv = %threadid to 11 step 2 iter_args(%arg = %c0) -> index {
+      %sum = arith.addi %arg, %c0 : index
+      affine.yield %sum : index
     }
     gpu.terminator
   }

Original file line number	Diff line number	Diff line change
`@@ -162,8 +162,7 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {`
`162`	`162`	`forOp.getBody()->back().erase();`
`163`	`163`	`parentBlock->getOperations().splice(Block::iterator(forOp),`
`164`	`164`	`forOp.getBody()->getOperations());`
`165`		`- IRRewriter b(forOp.getContext());`
`166`		`- b.eraseOp(forOp);`
	`165`	`+ forOp.erase();`
`167`	`166`	`return success();`
`168`	`167`	`}`
`169`	`168`