Skip to content
Open
5 changes: 5 additions & 0 deletions mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
/// constant trip count in non-trivial cases.
std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);

/// Returns the maximum trip count when the operand of forOp has a range. If the
/// operand of forOp is a constant, the return value is the same as
/// `getConstantTripCount`.
std::optional<uint64_t> getUpperBoundOnTripCount(AffineForOp forOp);

/// Returns the greatest known integral divisor of the trip count. Affine
/// expression analysis is used (indirectly through getTripCount), and
/// this method is thus able to determine non-trivial divisors.
Expand Down
86 changes: 64 additions & 22 deletions mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "llvm/Support/MathExtras.h"

#include "llvm/ADT/DenseSet.h"
Expand Down Expand Up @@ -58,8 +60,8 @@ class DirectedOpGraph {

void printEdges() {
for (auto &en : edges) {
llvm::dbgs() << *en.first << " (" << en.first << ")"
<< " has " << en.second.size() << " edges:\n";
llvm::dbgs() << *en.first << " (" << en.first << ")" << " has "
<< en.second.size() << " edges:\n";
for (auto *node : en.second) {
llvm::dbgs() << '\t' << *node->op << '\n';
}
Expand All @@ -70,7 +72,7 @@ class DirectedOpGraph {
/// A node of a directed graph between MLIR Operations to model various
/// relationships. This is meant to be used internally.
struct DGNode {
DGNode(Operation *op) : op(op) {};
DGNode(Operation *op) : op(op){};
Operation *op;

// Start and finish visit numbers are standard in DFS to implement things
Expand Down Expand Up @@ -212,32 +214,68 @@ void mlir::affine::getTripCountMapAndOperands(
tripCountValueMap.getOperands().end());
}

/// The function make map be computed with the given operands to get the value
/// of trip, which may have a range when a range exists for either operand.
/// If type is equal to BoundType::LB get the minimum value of the trip, if type
/// is equal to BoundType::UB get the maximum value of the trip.
static std::optional<uint64_t>
getKnownTripCountBound(AffineMap map, SmallVectorImpl<Value> &operands,
presburger::BoundType type) {
std::optional<uint64_t> tripCount;
for (unsigned i = 0, e = map.getResults().size(); i < e; ++i) {
AffineMap subMap = map.getSubMap(i);
ValueBoundsConstraintSet::Variable var(subMap, operands);
auto lbBound = ValueBoundsConstraintSet::computeConstantBound(
mlir::presburger::BoundType::LB, var);
auto ubBound = ValueBoundsConstraintSet::computeConstantBound(
mlir::presburger::BoundType::UB, var, nullptr, /*closedUB*/ true);
if (failed(lbBound) || failed(ubBound))
return std::nullopt;
if (type == presburger::BoundType::LB) {
if (tripCount.has_value())
tripCount =
std::min(*tripCount, static_cast<uint64_t>(lbBound.value()));
else
tripCount = lbBound.value();
} else if (type == presburger::BoundType::UB) {
if (tripCount.has_value())
tripCount =
std::max(*tripCount, static_cast<uint64_t>(ubBound.value()));
else
tripCount = ubBound.value();
} else {
return std::nullopt;
}
}
return tripCount;
}

/// Returns the trip count of the loop if it's a constant, std::nullopt
/// otherwise. This method uses affine expression analysis (in turn using
/// getTripCount) and is able to determine constant trip count in non-trivial
/// cases.
std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
SmallVector<Value, 4> operands;
SmallVector<Value> operands;
AffineMap map;
getTripCountMapAndOperands(forOp, &map, &operands);

if (!map)
return std::nullopt;
return getKnownTripCountBound(map, operands, presburger::BoundType::LB);
}

// Take the min if all trip counts are constant.
std::optional<uint64_t> tripCount;
for (auto resultExpr : map.getResults()) {
if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
if (tripCount.has_value())
tripCount =
std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
else
tripCount = constExpr.getValue();
} else {
return std::nullopt;
}
}
return tripCount;
/// Returns the maximum trip count when the operand of forOp has a range.
/// If the operand of forOp is a constant, the return value is the same as
/// `getConstantTripCount`.
std::optional<uint64_t>
mlir::affine::getUpperBoundOnTripCount(AffineForOp forOp) {
SmallVector<Value> operands;
AffineMap map;
getTripCountMapAndOperands(forOp, &map, &operands);

if (!map)
return std::nullopt;
return getKnownTripCountBound(map, operands, presburger::BoundType::UB);
}

/// Returns the greatest known integral divisor of the trip count. Affine
Expand All @@ -255,10 +293,14 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
// divisors.
assert(map.getNumResults() >= 1 && "expected one or more results");
std::optional<uint64_t> gcd;
for (auto resultExpr : map.getResults()) {
for (unsigned i = 0, e = map.getResults().size(); i < e; ++i) {
uint64_t thisGcd;
if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
uint64_t tripCount = constExpr.getValue();
AffineMap subMap = map.getSubMap(i);
ValueBoundsConstraintSet::Variable var(subMap, operands);
auto lbBound = ValueBoundsConstraintSet::computeConstantBound(
mlir::presburger::BoundType::LB, var);
if (!failed(lbBound)) {
uint64_t tripCount = lbBound.value();
// 0 iteration loops (greatest divisor is 2^64 - 1).
if (tripCount == 0)
thisGcd = std::numeric_limits<uint64_t>::max();
Expand All @@ -267,7 +309,7 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
thisGcd = tripCount;
} else {
// Trip count is not a known constant; return its largest known divisor.
thisGcd = resultExpr.getLargestKnownDivisor();
thisGcd = map.getResult(i).getLargestKnownDivisor();
}
if (gcd.has_value())
gcd = std::gcd(*gcd, thisGcd);
Expand Down
43 changes: 30 additions & 13 deletions mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
/// Promotes the loop body of a forOp to its containing block if the forOp
/// was known to have a single iteration.
LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
if (!tripCount || *tripCount != 1)
std::optional<uint64_t> minTripCount = getConstantTripCount(forOp);
std::optional<uint64_t> maxTripCount = getUpperBoundOnTripCount(forOp);
if (!minTripCount || *minTripCount != 1 || !maxTripCount ||
*maxTripCount != 1)
return failure();

// TODO: extend this for arbitrary affine bounds.
Expand Down Expand Up @@ -885,15 +887,23 @@ void mlir::affine::getTileableBands(
/// Unrolls this loop completely.
LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) {
std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
if (mayBeConstantTripCount.has_value()) {
uint64_t tripCount = *mayBeConstantTripCount;
if (tripCount == 0)
return success();
if (tripCount == 1)
return promoteIfSingleIteration(forOp);
return loopUnrollByFactor(forOp, tripCount);
}
return failure();
std::optional<uint64_t> maxMayBeConstantTripCount =
getUpperBoundOnTripCount(forOp);

if (!mayBeConstantTripCount.has_value() &&
!maxMayBeConstantTripCount.has_value())
return failure();

uint64_t tripCount = *mayBeConstantTripCount;

// Trip equals 0, this loop cannot unroll.
if (tripCount <= 0)
return success();

if (succeeded(promoteIfSingleIteration(forOp)))
return success();

return loopUnrollByFactor(forOp, tripCount);
}

/// Unrolls this loop by the specified factor or by the trip count (if constant)
Expand Down Expand Up @@ -1014,8 +1024,12 @@ LogicalResult mlir::affine::loopUnrollByFactor(
assert(unrollFactor > 0 && "unroll factor should be positive");

std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
std::optional<uint64_t> maxMayBeConstantTripCount =
getUpperBoundOnTripCount(forOp);
if (unrollFactor == 1) {
if (mayBeConstantTripCount == 1 && failed(promoteIfSingleIteration(forOp)))
if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
maxMayBeConstantTripCount && *maxMayBeConstantTripCount == 1 &&
failed(promoteIfSingleIteration(forOp)))
return failure();
return success();
}
Expand All @@ -1035,7 +1049,10 @@ LogicalResult mlir::affine::loopUnrollByFactor(
}

// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
// If the trip count has a range, a clean up loop needs to be generated.
if ((mayBeConstantTripCount && maxMayBeConstantTripCount &&
*mayBeConstantTripCount != *maxMayBeConstantTripCount) ||
getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
// Loops where the lower bound is a max expression or the upper bound is
// a min expression and the trip count doesn't divide the unroll factor
// can't be unrolled since the lower bound of the cleanup loop in such cases
Expand Down
3 changes: 2 additions & 1 deletion mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,8 @@ FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
// Compute constant bound for `valueDim`.
int64_t ubAdjustment = closedUB ? 0 : 1;
if (auto bound = cstr.cstr.getConstantBound64(type, pos))
return type == BoundType::UB ? *bound + ubAdjustment : *bound;
if (bound.has_value())
return type == BoundType::UB ? *bound + ubAdjustment : *bound;
return failure();
}

Expand Down
83 changes: 80 additions & 3 deletions mlir/test/Dialect/Affine/unroll.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// UNROLL-FULL-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)>
// UNROLL-FULL-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
// UNROLL-FULL-DAG: [[$MAP7:#map[0-9]*]] = affine_map<()[s0] -> (s0 + (((-s0 + 9) ceildiv 2) floordiv 4) * 8)>

// SHORT-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>

Expand All @@ -22,7 +23,8 @@
// UNROLL-BY-4-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)>
// UNROLL-BY-4-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
// UNROLL-BY-4-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
// UNROLL-BY-4-DAG: [[$MAP11:#map[0-9]*]] = affine_map<(d0) -> (d0)>
// UNROLL-BY-4-DAG: [[$MAP7:#map[0-9]*]] = affine_map<(d0) -> (d0)>
// UNROLL-BY-4-DAG: [[$MAP8:#map[0-9]*]] = affine_map<()[s0] -> (s0 + (((-s0 + 11) ceildiv 2) floordiv 4) * 8)>

// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
func.func @loop_nest_simplest() {
Expand Down Expand Up @@ -258,6 +260,59 @@ gpu.module @unroll_full {
}
}

// UNROLL-FULL-LABEL: func @bound_unroll_partial
func.func @bound_unroll_partial() {
%c0 = arith.constant 0 :index
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
%bound = test.value_with_bounds { min = 0 : index, max = 1 : index}
affine.for %iv = %bound to 3 step 2 iter_args(%arg = %c0) -> index {
%sum = arith.addi %arg, %c0 : index
affine.yield %sum : index
}
// UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
// UNROLL-FULL-NEXT: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
// UNROLL-FULL-NEXT: affine.yield %[[SUM]] : index
// UNROLL-FULL-NEXT: }
return
}

// UNROLL-FULL-LABEL: func @bound_unroll_all
func.func @bound_unroll_all() {
%c0 = arith.constant 0 :index
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
%bound = test.value_with_bounds { min = 0 : index, max = 1 : index}
affine.for %iv = %bound to 6 step 2 iter_args(%arg = %c0) -> index {
%sum = arith.addi %arg, %c0 : index
affine.yield %sum : index
}
// UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
return
}

// UNROLL-FULL-LABEL: func.func @bound_partial_unroll_factor_4
func.func @bound_partial_unroll_factor_4() {
%c0 = arith.constant 0 :index
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
// UNROLL-FULL: %[[Bound:.*]] = test.value_with_bounds {max = 1 : index, min = 0 : index}
%bound = test.value_with_bounds { min = 0 : index, max = 1 : index}
affine.for %iv = %bound to 9 step 2 iter_args(%arg = %c0) -> index {
%sum = arith.addi %arg, %c0 : index
affine.yield %sum : index
}
// UNROLL-FULL-NEXT: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
// UNROLL-FULL-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
// UNROLL-FULL-NEXT: affine.for %{{.*}} = [[$MAP7]]()[%[[Bound]]] to 9 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
// UNROLL-FULL-NEXT: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
// UNROLL-FULL-NEXT: affine.yield %[[SUM_4]] : index
// UNROLL-FULL-NEXT: }
return
}


// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
Expand Down Expand Up @@ -470,7 +525,7 @@ func.func @loop_nest_operand1() {
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
func.func @loop_nest_operand2() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
// UNROLL-BY-4-NEXT: affine.for %arg1 = [[$MAP7]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
Expand Down Expand Up @@ -516,7 +571,7 @@ func.func @floordiv_mod_ub(%M : index, %N : index) {
func.func @loop_nest_operand3() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-BY-4: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
// UNROLL-BY-4: affine.for %arg1 = [[$MAP7]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
Expand Down Expand Up @@ -701,6 +756,28 @@ func.func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32
return %sum : f32
}

// UNROLL-BY-4-LABEL: func @bound_unroll_by_factor_4
func.func @bound_unroll_by_factor_4() {
%c0 = arith.constant 0 :index
// UNROLL-BY-4: %[[C0:.*]] = arith.constant 0 : index
%bound = test.value_with_bounds { min = 0 : index, max = 1 : index}
// UNROLL-BY-4: %[[Bound:.*]] = test.value_with_bounds {max = 1 : index, min = 0 : index}
affine.for %iv = %bound to 11 step 2 iter_args(%arg = %c0) -> index {
%sum = arith.addi %arg, %c0 : index
affine.yield %sum : index
}
// UNROLL-BY-4-NEXT: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
// UNROLL-BY-4-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
// UNROLL-BY-4-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
// UNROLL-BY-4-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
// UNROLL-BY-4-NEXT: affine.for %[[VAL_20:.*]] = [[$MAP8]](){{\[}}%[[Bound]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
// UNROLL-BY-4-NEXT: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
// UNROLL-BY-4-NEXT: affine.yield %[[SUM_4]] : index
// UNROLL-BY-4-NEXT: }
return
}


// UNROLL-FULL: func @unroll_zero_trip_count_case
func.func @unroll_zero_trip_count_case() {
// CHECK-NEXT: affine.for %{{.*}} = 0 to 0
Expand Down
Loading