Skip to content

Commit 0aafe71

Browse files
improve doc and nit.
1 parent 82e48ee commit 0aafe71

File tree

3 files changed

+41
-40
lines changed

3 files changed

+41
-40
lines changed

mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -214,19 +214,21 @@ void mlir::affine::getTripCountMapAndOperands(
214214
tripCountValueMap.getOperands().end());
215215
}
216216

217-
/// Take the min if all trip counts are constant.
217+
/// The function make map be computed with the given operands to get the value
218+
/// of trip, which may have a range when a range exists for either operand.
219+
/// If type is equal to BoundType::LB get the minimum value of the trip, if type
220+
/// is equal to BoundType::UB get the maximum value of the trip.
218221
static std::optional<uint64_t>
219222
getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
220223
presburger::BoundType type) {
221224
std::optional<uint64_t> tripCount;
222-
for (auto resultExpr : map.getResults()) {
223-
AffineMap subMap =
224-
AffineMap::get(map.getNumDims(), map.getNumSymbols(), resultExpr);
225+
for (unsigned i = 0, e = map.getResults().size(); i < e; ++i) {
226+
AffineMap subMap = map.getSubMap(i);
225227
ValueBoundsConstraintSet::Variable var(subMap, operands);
226228
auto lbBound = ValueBoundsConstraintSet::computeConstantBound(
227229
mlir::presburger::BoundType::LB, var);
228230
auto ubBound = ValueBoundsConstraintSet::computeConstantBound(
229-
mlir::presburger::BoundType::UB, var, nullptr, true);
231+
mlir::presburger::BoundType::UB, var, nullptr, /*closedUB*/ true);
230232
if (failed(lbBound) || failed(ubBound))
231233
return std::nullopt;
232234
if (type == presburger::BoundType::LB) {
@@ -238,7 +240,7 @@ getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
238240
} else if (type == presburger::BoundType::UB) {
239241
if (tripCount.has_value())
240242
tripCount =
241-
std::min(*tripCount, static_cast<uint64_t>(ubBound.value()));
243+
std::max(*tripCount, static_cast<uint64_t>(ubBound.value()));
242244
else
243245
tripCount = ubBound.value();
244246
} else {
@@ -253,7 +255,7 @@ getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
253255
/// getTripCount) and is able to determine constant trip count in non-trivial
254256
/// cases.
255257
std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
256-
SmallVector<Value, 4> operands;
258+
SmallVector<Value> operands;
257259
AffineMap map;
258260
getTripCountMapAndOperands(forOp, &map, &operands);
259261

@@ -262,12 +264,12 @@ std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
262264
return getKnownTripCountBound(map, operands, presburger::BoundType::LB);
263265
}
264266

265-
/// Returns the maximum trip count when the operand of forOp has a range. If the
266-
/// operand of forOp is a constant, the return value is the same as
267+
/// Returns the maximum trip count when the operand of forOp has a range.
268+
/// If the operand of forOp is a constant, the return value is the same as
267269
/// `getConstantTripCount`.
268270
std::optional<uint64_t>
269271
mlir::affine::getUpperBoundOnTripCount(AffineForOp forOp) {
270-
SmallVector<Value, 4> operands;
272+
SmallVector<Value> operands;
271273
AffineMap map;
272274
getTripCountMapAndOperands(forOp, &map, &operands);
273275

mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,7 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
162162
forOp.getBody()->back().erase();
163163
parentBlock->getOperations().splice(Block::iterator(forOp),
164164
forOp.getBody()->getOperations());
165-
IRRewriter b(forOp.getContext());
166-
b.eraseOp(forOp);
165+
forOp.erase();
167166
return success();
168167
}
169168

mlir/test/Dialect/Affine/unroll.mlir

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,14 @@ gpu.module @unroll_full {
261261

262262
// UNROLL-FULL-LABEL: func @thread_partial_execution
263263
func.func @thread_partial_execution() {
264-
%0 = arith.constant 0 :index
265-
%1 = arith.constant 2 : index
264+
%c0 = arith.constant 0 :index
265+
%c2 = arith.constant 2 : index
266266
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
267-
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
268-
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
269-
affine.for %iv = %tx to 3 step 2 iter_args(%arg = %0) -> index {
270-
%3 = arith.addi %arg, %0 : index
271-
affine.yield %3 : index
267+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
268+
threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
269+
affine.for %iv = %tx to 3 step 2 iter_args(%arg = %c0) -> index {
270+
%sum = arith.addi %arg, %c0 : index
271+
affine.yield %sum : index
272272
}
273273
// UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274274
// UNROLL-FULL-NEXT: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
@@ -281,15 +281,15 @@ func.func @thread_partial_execution() {
281281

282282
// UNROLL-FULL-LABEL: func @unroll_all_thread
283283
func.func @unroll_all_thread() {
284-
%0 = arith.constant 0 :index
285-
%1 = arith.constant 2 : index
284+
%c0 = arith.constant 0 :index
285+
%c2 = arith.constant 2 : index
286286
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
287-
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
288-
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
287+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
288+
threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
289289
%threadid = gpu.thread_id x
290-
%4 = affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %0) -> index {
291-
%3 = arith.addi %arg, %0 : index
292-
affine.yield %3 : index
290+
affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %c0) -> index {
291+
%sum = arith.addi %arg, %c0 : index
292+
affine.yield %sum : index
293293
}
294294
// UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
295295
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
@@ -301,15 +301,15 @@ func.func @unroll_all_thread() {
301301

302302
// UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
303303
func.func @partial_unroll_factor_4() {
304-
%0 = arith.constant 0 :index
305-
%1 = arith.constant 2 : index
304+
%c0 = arith.constant 0 :index
305+
%c2 = arith.constant 2 : index
306306
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
307-
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
308-
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
307+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
308+
threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
309309
%threadid = gpu.thread_id x
310-
affine.for %iv = %threadid to 9 step 2 iter_args(%arg = %0) -> index {
311-
%3 = arith.addi %arg, %0 : index
312-
affine.yield %3 : index
310+
affine.for %iv = %threadid to 9 step 2 iter_args(%arg = %c0) -> index {
311+
%sum = arith.addi %arg, %c0 : index
312+
affine.yield %sum : index
313313
}
314314
gpu.terminator
315315
}
@@ -769,15 +769,15 @@ func.func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32
769769

770770
// UNROLL-BY-4-LABEL: func @gpu_launch_unroll_by_factor_4
771771
func.func @gpu_launch_unroll_by_factor_4() {
772-
%0 = arith.constant 0 :index
773-
%1 = arith.constant 2 : index
772+
%c0 = arith.constant 0 :index
773+
%c2 = arith.constant 2 : index
774774
// UNROLL-BY-4: %[[C0:.*]] = arith.constant 0 : index
775-
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
776-
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
775+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c2)
776+
threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c2, %sz_tz = %c2) {
777777
%threadid = gpu.thread_id x
778-
affine.for %iv = %threadid to 11 step 2 iter_args(%arg = %0) -> index {
779-
%3 = arith.addi %arg, %0 : index
780-
affine.yield %3 : index
778+
affine.for %iv = %threadid to 11 step 2 iter_args(%arg = %c0) -> index {
779+
%sum = arith.addi %arg, %c0 : index
780+
affine.yield %sum : index
781781
}
782782
gpu.terminator
783783
}

0 commit comments

Comments
 (0)