12
12
13
13
#include " mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
14
14
15
+ #include " mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
16
+ #include " mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
15
17
#include " mlir/Analysis/SliceAnalysis.h"
16
18
#include " mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17
19
#include " mlir/Dialect/Affine/Analysis/AffineStructures.h"
18
20
#include " mlir/Dialect/Affine/Analysis/NestedMatcher.h"
19
21
#include " mlir/Dialect/Affine/IR/AffineOps.h"
20
22
#include " mlir/Dialect/Affine/IR/AffineValueMap.h"
21
- #include " mlir/Dialect/GPU/IR/GPUDialect .h"
23
+ #include " mlir/Interfaces/FunctionInterfaces .h"
22
24
#include " llvm/Support/MathExtras.h"
23
25
24
26
#include " llvm/ADT/DenseSet.h"
31
33
32
34
using namespace mlir ;
33
35
using namespace mlir ::affine;
36
+ using namespace mlir ::dataflow;
34
37
35
38
#define DEBUG_TYPE " affine-loop-analysis"
36
39
@@ -85,48 +88,54 @@ void mlir::affine::getTripCountMapAndOperands(
85
88
tripCountValueMap.getOperands ().end ());
86
89
}
87
90
88
- // / Replace thread_id with its maximum value, if `replaceWithZero` is true,
89
- // / thread_id will be replaced by its minimum value 0.
90
- static void replaceGPUOperands (AffineForOp forOp,
91
- SmallVectorImpl<Value> &operands,
92
- SmallVectorImpl<AffineExpr> &symReplacements,
93
- unsigned numDim, bool replaceWithZero = false ) {
94
- auto launchOp = forOp->getParentOfType <gpu::LaunchOp>();
95
- if (!launchOp)
91
+ // / By running `IntegerRangeAnalysis` to get the ranges of operand, then fill
92
+ // / the `symReplacements` with range. If `replaceByMin` is set to true,
93
+ // / construct `replacement` using the smallest value.By default, the largest
94
+ // / value will be used for constructing `replacement`.
95
+ static void replaceOperandByRange (AffineForOp forOp,
96
+ SmallVectorImpl<Value> &operands,
97
+ SmallVectorImpl<AffineExpr> &symReplacements,
98
+ unsigned numDim, bool replaceByMin = false ) {
99
+ DataFlowSolver solver;
100
+ solver.load <DeadCodeAnalysis>();
101
+ solver.load <IntegerRangeAnalysis>();
102
+ if (failed (solver.initializeAndRun (
103
+ forOp->getParentOfType <FunctionOpInterface>())))
96
104
return ;
97
105
98
- // `b` is only used to create `AffineExpr`.
106
+ // `b` is used to create affineExpr
99
107
Builder b (forOp.getContext ());
100
- unsigned idx = 0 ;
101
-
102
108
for (unsigned i = numDim, e = operands.size (); i < e; ++i) {
103
109
Value operand = operands[i];
104
- if (Value blockSize = launchOp.getBlockSizeOnAxis (operand)) {
105
- operands[i] = blockSize;
106
- if (!replaceWithZero)
107
- symReplacements.push_back (b.getAffineSymbolExpr (idx++) - 1 );
108
- else
109
- symReplacements.push_back (b.getAffineConstantExpr (0 ));
110
+ auto lattice =
111
+ solver.lookupState <dataflow::IntegerValueRangeLattice>(operand);
112
+ if (!lattice) {
113
+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
110
114
continue ;
111
115
}
112
116
113
- Operation *defOp = operand.getDefiningOp ();
114
- if (!defOp) {
115
- ++idx;
117
+ if (lattice->getValue ().isUninitialized ()) {
118
+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
116
119
continue ;
117
120
}
118
121
119
- if (auto threadIdOp = mlir::dyn_cast<gpu::ThreadIdOp>(defOp)) {
120
- gpu::Dimension dimension = threadIdOp.getDimension ();
121
- operands[i] = launchOp.getBlockSizeOnAxis (dimension);
122
- if (!replaceWithZero)
123
- symReplacements.push_back (b.getAffineSymbolExpr (idx++) - 1 );
124
- else
125
- symReplacements.push_back (b.getAffineConstantExpr (0 ));
122
+ ConstantIntRanges range = lattice->getValue ().getValue ();
123
+ APInt max = range.smax ();
124
+ APInt min = range.smin ();
125
+ unsigned bitNums = max.getBitWidth ();
126
+
127
+ if (APInt::getSignedMaxValue (bitNums) == max &&
128
+ APInt::getSignedMinValue (bitNums) == min) {
129
+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
126
130
continue ;
127
131
}
128
- ++idx;
132
+
133
+ if (!replaceByMin)
134
+ symReplacements.push_back (b.getAffineConstantExpr (max.getZExtValue ()));
135
+ else
136
+ symReplacements.push_back (b.getAffineConstantExpr (min.getZExtValue ()));
129
137
}
138
+ return ;
130
139
}
131
140
132
141
// / Take the min if all trip counts are constant.
@@ -158,32 +167,28 @@ std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
158
167
if (!map)
159
168
return std::nullopt;
160
169
SmallVector<AffineExpr, 4 > symReplacements;
161
- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims ());
170
+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims ());
162
171
map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
163
172
map.getNumSymbols ());
164
- affine::AffineValueMap valueMap (map, operands);
165
- (void )valueMap.canonicalize ();
166
- map = valueMap.getAffineMap ();
167
173
return getConstantTripCountFromAffineMap (map);
168
174
}
169
175
170
- // / In some scenarios, such as GPU, the number of trip of each thread in the
171
- // / loop is inconsistent. This function returns the maximum number of trip.
176
+ // / Returns the maximum trip count when the operand of forOp has a range. If the
177
+ // / operand of forOp is a constant, the return value is the same as
178
+ // / `getConstantTripCount`.
172
179
std::optional<uint64_t >
173
- mlir::affine::getMaxConstantTripCount (AffineForOp forOp) {
180
+ mlir::affine::getUpperBoundOnTripCount (AffineForOp forOp) {
174
181
SmallVector<Value, 4 > operands;
175
182
AffineMap map;
176
183
getTripCountMapAndOperands (forOp, &map, &operands);
177
184
178
185
if (!map)
179
186
return std::nullopt;
180
187
SmallVector<AffineExpr, 4 > symReplacements;
181
- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims (), true );
188
+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims (),
189
+ true );
182
190
map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
183
191
map.getNumSymbols ());
184
- affine::AffineValueMap valueMap (map, operands);
185
- (void )valueMap.canonicalize ();
186
- map = valueMap.getAffineMap ();
187
192
return getConstantTripCountFromAffineMap (map);
188
193
}
189
194
@@ -198,12 +203,9 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
198
203
if (!map)
199
204
return 1 ;
200
205
SmallVector<AffineExpr, 4 > symReplacements;
201
- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims ());
206
+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims ());
202
207
map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
203
208
map.getNumSymbols ());
204
- affine::AffineValueMap valueMap (map, operands);
205
- (void )valueMap.canonicalize ();
206
- map = valueMap.getAffineMap ();
207
209
// The largest divisor of the trip count is the GCD of the individual largest
208
210
// divisors.
209
211
assert (map.getNumResults () >= 1 && " expected one or more results" );
0 commit comments