@@ -28,64 +28,80 @@ namespace looputils {
28
28
// / Stores info needed about the induction/iteration variable for each `do
29
29
// / concurrent` in a loop nest.
30
30
struct InductionVariableInfo {
31
+ InductionVariableInfo (fir::DoLoopOp doLoop) { populateInfo (doLoop); }
32
+
31
33
// / The operation allocating memory for iteration variable.
32
34
mlir::Operation *iterVarMemDef;
33
- };
35
+ // / the operation(s) updating the iteration variable with the current
36
+ // / iteration number.
37
+ llvm::SmallVector<mlir::Operation *, 2 > indVarUpdateOps;
34
38
35
- using LoopNestToIndVarMap =
36
- llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
37
-
38
- // / For the \p doLoop parameter, find the operation that declares its iteration
39
- // / variable or allocates memory for it.
40
- // /
41
- // / For example, give the following loop:
42
- // / ```
43
- // / ...
44
- // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
45
- // / ...
46
- // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
47
- // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
48
- // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
49
- // / ...
50
- // / }
51
- // / ```
52
- // /
53
- // / This function returns the `hlfir.declare` op for `%i`.
54
- // /
55
- // / Note: The current implementation is dependent on how flang emits loop
56
- // / bodies; which is sufficient for the current simple test/use cases. If this
57
- // / proves to be insufficient, this should be made more generic.
58
- mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
59
- mlir::Value result = nullptr ;
60
-
61
- // Checks if a StoreOp is updating the memref of the loop's iteration
62
- // variable.
63
- auto isStoringIV = [&](fir::StoreOp storeOp) {
64
- // Direct store into the IV memref.
65
- if (storeOp.getValue () == doLoop.getInductionVar ())
66
- return true ;
67
-
68
- // Indirect store into the IV memref.
69
- if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
70
- storeOp.getValue ().getDefiningOp ())) {
71
- if (convertOp.getOperand () == doLoop.getInductionVar ())
39
+ private:
40
+ // / For the \p doLoop parameter, find the following:
41
+ // /
42
+ // / 1. The operation that declares its iteration variable or allocates memory
43
+ // / for it. For example, give the following loop:
44
+ // / ```
45
+ // / ...
46
+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
47
+ // / ...
48
+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
49
+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
50
+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
51
+ // / ...
52
+ // / }
53
+ // / ```
54
+ // /
55
+ // / This function sets the `iterVarMemDef` member to the `hlfir.declare` op
56
+ // / for `%i`.
57
+ // /
58
+ // / 2. The operation(s) that update the loop's iteration variable from its
59
+ // / induction variable. For the above example, the `indVarUpdateOps` is
60
+ // / populated with the first 2 ops in the loop's body.
61
+ // /
62
+ // / Note: The current implementation is dependent on how flang emits loop
63
+ // / bodies; which is sufficient for the current simple test/use cases. If this
64
+ // / proves to be insufficient, this should be made more generic.
65
+ void populateInfo (fir::DoLoopOp doLoop) {
66
+ mlir::Value result = nullptr ;
67
+
68
+ // Checks if a StoreOp is updating the memref of the loop's iteration
69
+ // variable.
70
+ auto isStoringIV = [&](fir::StoreOp storeOp) {
71
+ // Direct store into the IV memref.
72
+ if (storeOp.getValue () == doLoop.getInductionVar ()) {
73
+ indVarUpdateOps.push_back (storeOp);
72
74
return true ;
73
- }
74
-
75
- return false ;
76
- };
75
+ }
77
76
78
- for (mlir::Operation &op : doLoop) {
79
- if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
80
- if (isStoringIV (storeOp)) {
81
- result = storeOp.getMemref ();
82
- break ;
77
+ // Indirect store into the IV memref.
78
+ if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
79
+ storeOp.getValue ().getDefiningOp ())) {
80
+ if (convertOp.getOperand () == doLoop.getInductionVar ()) {
81
+ indVarUpdateOps.push_back (convertOp);
82
+ indVarUpdateOps.push_back (storeOp);
83
+ return true ;
84
+ }
83
85
}
86
+
87
+ return false ;
88
+ };
89
+
90
+ for (mlir::Operation &op : doLoop) {
91
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
92
+ if (isStoringIV (storeOp)) {
93
+ result = storeOp.getMemref ();
94
+ break ;
95
+ }
96
+ }
97
+
98
+ assert (result != nullptr && result.getDefiningOp () != nullptr );
99
+ iterVarMemDef = result.getDefiningOp ();
84
100
}
101
+ };
85
102
86
- assert (result != nullptr && result.getDefiningOp () != nullptr );
87
- return result.getDefiningOp ();
88
- }
103
+ using LoopNestToIndVarMap =
104
+ llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
89
105
90
106
// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
91
107
// / there are no operations in \p outerloop's body other than:
@@ -181,10 +197,7 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
181
197
assert (currentLoop.getUnordered ());
182
198
183
199
while (true ) {
184
- loopNest.insert (
185
- {currentLoop,
186
- InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
187
-
200
+ loopNest.insert ({currentLoop, InductionVariableInfo (currentLoop)});
188
201
llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
189
202
190
203
for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
@@ -210,6 +223,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
210
223
211
224
return mlir::success ();
212
225
}
226
+
227
+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
228
+ // / particular, this function would take this input IR:
229
+ // / ```
230
+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
231
+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
232
+ // / %j_lb = arith.constant 1 : i32
233
+ // / %j_ub = arith.constant 10 : i32
234
+ // / %j_step = arith.constant 1 : index
235
+ // /
236
+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
237
+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
238
+ // / ...
239
+ // / }
240
+ // / }
241
+ // / ```
242
+ // /
243
+ // / into the following form (using generic op form since the result is
244
+ // / technically an invalid `fir.do_loop` op:
245
+ // /
246
+ // / ```
247
+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
248
+ // / ^bb0(%i_iv: index):
249
+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
250
+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
251
+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
252
+ // /
253
+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
254
+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
255
+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
256
+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
257
+ // / ...
258
+ // / })
259
+ // / ```
260
+ // /
261
+ // / What happened to the loop nest is the following:
262
+ // /
263
+ // / * the innermost loop's entry block was updated from having one operand to
264
+ // / having `n` operands where `n` is the number of loops in the nest,
265
+ // /
266
+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
267
+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
268
+ // /
269
+ // / * the innermost loop's entry block's arguments were mapped in order from the
270
+ // / outermost to the innermost IV.
271
+ // /
272
+ // / With this IR change, we can directly inline the innermost loop's region into
273
+ // / the newly generated `omp.loop_nest` op.
274
+ // /
275
+ // / Note that this function has a pre-condition that \p loopNest consists of
276
+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
277
+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
278
+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
279
+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
280
+ looputils::LoopNestToIndVarMap &loopNest) {
281
+ if (loopNest.size () <= 1 )
282
+ return ;
283
+
284
+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
285
+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
286
+
287
+ llvm::SmallVector<mlir::Type> argTypes;
288
+ llvm::SmallVector<mlir::Location> argLocs;
289
+
290
+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
291
+ // Sink the IV update ops to the innermost loop. We need to do for all loops
292
+ // except for the innermost one, hence the `drop_end` usage above.
293
+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
294
+ op->moveBefore (&innermostFirstOp);
295
+
296
+ argTypes.push_back (doLoop.getInductionVar ().getType ());
297
+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
298
+ }
299
+
300
+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
301
+ // Extend the innermost entry block with arguments to represent the outer IVs.
302
+ innermmostRegion.addArguments (argTypes, argLocs);
303
+
304
+ unsigned idx = 1 ;
305
+ // In reverse, remap the IVs of the loop nest from the old values to the new
306
+ // ones. We do that in reverse since the first argument before this loop is
307
+ // the old IV for the innermost loop. Therefore, we want to replace it first
308
+ // before the old value (1st argument in the block) is remapped to be the IV
309
+ // of the outermost loop in the nest.
310
+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
311
+ doLoop.getInductionVar ().replaceAllUsesWith (
312
+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
313
+ ++idx;
314
+ }
315
+ }
213
316
} // namespace looputils
214
317
215
318
class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -236,6 +339,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
236
339
" Some `do concurent` loops are not perfectly-nested. "
237
340
" These will be serialized." );
238
341
342
+ looputils::sinkLoopIVArgs (rewriter, loopNest);
239
343
mlir::IRMapping mapper;
240
344
genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
241
345
mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments