@@ -26,10 +26,13 @@ namespace flangomp {
2626namespace {
2727namespace looputils {
2828// / Stores info needed about the induction/iteration variable for each `do
29- // / concurrent` in a loop nest. This includes only for now :
29+ // / concurrent` in a loop nest. This includes:
3030// / * the operation allocating memory for iteration variable,
31+ // / * the operation(s) updating the iteration variable with the current
32+ // / iteration number.
3133struct InductionVariableInfo {
3234 mlir::Operation *iterVarMemDef;
35+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
3336};
3437
3538using LoopNestToIndVarMap =
@@ -102,6 +105,47 @@ mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) {
102105 return result.getDefiningOp ();
103106}
104107
108+ // / Collects the op(s) responsible for updating a loop's iteration variable with
109+ // / the current iteration number. For example, for the input IR:
110+ // / ```
111+ // / %i = fir.alloca i32 {bindc_name = "i"}
112+ // / %i_decl:2 = hlfir.declare %i ...
113+ // / ...
114+ // / fir.do_loop %i_iv = %lb to %ub step %step unordered {
115+ // / %1 = fir.convert %i_iv : (index) -> i32
116+ // / fir.store %1 to %i_decl#1 : !fir.ref<i32>
117+ // / ...
118+ // / }
119+ // / ```
120+ // / this function would return the first 2 ops in the `fir.do_loop`'s region.
121+ llvm::SetVector<mlir::Operation *>
122+ extractIndVarUpdateOps (fir::DoLoopOp doLoop) {
123+ mlir::Value indVar = doLoop.getInductionVar ();
124+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
125+
126+ llvm::SmallVector<mlir::Value> toProcess;
127+ toProcess.push_back (indVar);
128+
129+ llvm::DenseSet<mlir::Value> done;
130+
131+ while (!toProcess.empty ()) {
132+ mlir::Value val = toProcess.back ();
133+ toProcess.pop_back ();
134+
135+ if (!done.insert (val).second )
136+ continue ;
137+
138+ for (mlir::Operation *user : val.getUsers ()) {
139+ indVarUpdateOps.insert (user);
140+
141+ for (mlir::Value result : user->getResults ())
142+ toProcess.push_back (result);
143+ }
144+ }
145+
146+ return std::move (indVarUpdateOps);
147+ }
148+
105149// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
106150// / there are no operations in \p outerloop's body other than:
107151// /
@@ -175,7 +219,9 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
175219 while (true ) {
176220 loopNest.try_emplace (
177221 currentLoop,
178- InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)});
222+ InductionVariableInfo{
223+ findLoopIterationVarMemDecl (currentLoop),
224+ std::move (looputils::extractIndVarUpdateOps (currentLoop))});
179225
180226 auto directlyNestedLoops = currentLoop.getRegion ().getOps <fir::DoLoopOp>();
181227 llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
@@ -200,6 +246,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
200246
201247 return mlir::success ();
202248}
249+
250+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
251+ // / particular, this function would take this input IR:
252+ // / ```
253+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
254+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
255+ // / %j_lb = arith.constant 1 : i32
256+ // / %j_ub = arith.constant 10 : i32
257+ // / %j_step = arith.constant 1 : index
258+ // /
259+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
260+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
261+ // / ...
262+ // / }
263+ // / }
264+ // / ```
265+ // /
266+ // / into the following form (using generic op form since the result is
267+ // / technically an invalid `fir.do_loop` op:
268+ // /
269+ // / ```
270+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
271+ // / ^bb0(%i_iv: index):
272+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
273+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
274+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
275+ // /
276+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
277+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
278+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
279+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
280+ // / ...
281+ // / })
282+ // / ```
283+ // /
284+ // / What happened to the loop nest is the following:
285+ // /
286+ // / * the innermost loop's entry block was updated from having one operand to
287+ // / having `n` operands where `n` is the number of loops in the nest,
288+ // /
289+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
290+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
291+ // /
292+ // / * the innermost loop's entry block's arguments were mapped in order from the
293+ // / outermost to the innermost IV.
294+ // /
295+ // / With this IR change, we can directly inline the innermost loop's region into
296+ // / the newly generated `omp.loop_nest` op.
297+ // /
298+ // / Note that this function has a pre-condition that \p loopNest consists of
299+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
300+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
301+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
302+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
303+ looputils::LoopNestToIndVarMap &loopNest) {
304+ if (loopNest.size () <= 1 )
305+ return ;
306+
307+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
308+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
309+
310+ llvm::SmallVector<mlir::Type> argTypes;
311+ llvm::SmallVector<mlir::Location> argLocs;
312+
313+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
314+ // Sink the IV update ops to the innermost loop. We need to do for all loops
315+ // except for the innermost one, hence the `drop_end` usage above.
316+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
317+ op->moveBefore (&innermostFirstOp);
318+
319+ argTypes.push_back (doLoop.getInductionVar ().getType ());
320+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
321+ }
322+
323+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
324+ // Extend the innermost entry block with arguments to represent the outer IVs.
325+ innermmostRegion.addArguments (argTypes, argLocs);
326+
327+ unsigned idx = 1 ;
328+ // In reverse, remap the IVs of the loop nest from the old values to the new
329+ // ones. We do that in reverse since the first argument before this loop is
330+ // the old IV for the innermost loop. Therefore, we want to replace it first
331+ // before the old value (1st argument in the block) is remapped to be the IV
332+ // of the outermost loop in the nest.
333+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
334+ doLoop.getInductionVar ().replaceAllUsesWith (
335+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
336+ ++idx;
337+ }
338+ }
203339} // namespace looputils
204340
205341class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -222,6 +358,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
222358 " Some `do concurent` loops are not perfectly-nested. "
223359 " These will be serialzied." );
224360
361+ looputils::sinkLoopIVArgs (rewriter, loopNest);
225362 mlir::IRMapping mapper;
226363 genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
227364 mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments