@@ -30,6 +30,9 @@ namespace looputils {
3030struct InductionVariableInfo {
3131 // / the operation allocating memory for iteration variable,
3232 mlir::Operation *iterVarMemDef;
33+ // / the operation(s) updating the iteration variable with the current
34+ // / iteration number.
35+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
3336};
3437
3538using LoopNestToIndVarMap =
@@ -70,6 +73,47 @@ mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) {
7073 return result.getDefiningOp ();
7174}
7275
76+ // / Collects the op(s) responsible for updating a loop's iteration variable with
77+ // / the current iteration number. For example, for the input IR:
78+ // / ```
79+ // / %i = fir.alloca i32 {bindc_name = "i"}
80+ // / %i_decl:2 = hlfir.declare %i ...
81+ // / ...
82+ // / fir.do_loop %i_iv = %lb to %ub step %step unordered {
83+ // / %1 = fir.convert %i_iv : (index) -> i32
84+ // / fir.store %1 to %i_decl#1 : !fir.ref<i32>
85+ // / ...
86+ // / }
87+ // / ```
88+ // / this function would return the first 2 ops in the `fir.do_loop`'s region.
89+ llvm::SetVector<mlir::Operation *>
90+ extractIndVarUpdateOps (fir::DoLoopOp doLoop) {
91+ mlir::Value indVar = doLoop.getInductionVar ();
92+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
93+
94+ llvm::SmallVector<mlir::Value> toProcess;
95+ toProcess.push_back (indVar);
96+
97+ llvm::DenseSet<mlir::Value> done;
98+
99+ while (!toProcess.empty ()) {
100+ mlir::Value val = toProcess.back ();
101+ toProcess.pop_back ();
102+
103+ if (!done.insert (val).second )
104+ continue ;
105+
106+ for (mlir::Operation *user : val.getUsers ()) {
107+ indVarUpdateOps.insert (user);
108+
109+ for (mlir::Value result : user->getResults ())
110+ toProcess.push_back (result);
111+ }
112+ }
113+
114+ return std::move (indVarUpdateOps);
115+ }
116+
73117// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
74118// / there are no operations in \p outerloop's body other than:
75119// /
@@ -166,7 +210,9 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
166210 while (true ) {
167211 loopNest.insert (
168212 {currentLoop,
169- InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
213+ InductionVariableInfo{
214+ findLoopIterationVarMemDecl (currentLoop),
215+ std::move (looputils::extractIndVarUpdateOps (currentLoop))}});
170216
171217 llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
172218
@@ -193,6 +239,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
193239
194240 return mlir::success ();
195241}
242+
243+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
244+ // / particular, this function would take this input IR:
245+ // / ```
246+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
247+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
248+ // / %j_lb = arith.constant 1 : i32
249+ // / %j_ub = arith.constant 10 : i32
250+ // / %j_step = arith.constant 1 : index
251+ // /
252+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
253+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
254+ // / ...
255+ // / }
256+ // / }
257+ // / ```
258+ // /
259+ // / into the following form (using generic op form since the result is
260+ // / technically an invalid `fir.do_loop` op:
261+ // /
262+ // / ```
263+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
264+ // / ^bb0(%i_iv: index):
265+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
266+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
267+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
268+ // /
269+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
270+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
271+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
272+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
273+ // / ...
274+ // / })
275+ // / ```
276+ // /
277+ // / What happened to the loop nest is the following:
278+ // /
279+ // / * the innermost loop's entry block was updated from having one operand to
280+ // / having `n` operands where `n` is the number of loops in the nest,
281+ // /
282+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
283+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
284+ // /
285+ // / * the innermost loop's entry block's arguments were mapped in order from the
286+ // / outermost to the innermost IV.
287+ // /
288+ // / With this IR change, we can directly inline the innermost loop's region into
289+ // / the newly generated `omp.loop_nest` op.
290+ // /
291+ // / Note that this function has a pre-condition that \p loopNest consists of
292+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
293+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
294+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
295+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
296+ looputils::LoopNestToIndVarMap &loopNest) {
297+ if (loopNest.size () <= 1 )
298+ return ;
299+
300+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
301+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
302+
303+ llvm::SmallVector<mlir::Type> argTypes;
304+ llvm::SmallVector<mlir::Location> argLocs;
305+
306+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
307+ // Sink the IV update ops to the innermost loop. We need to do for all loops
308+ // except for the innermost one, hence the `drop_end` usage above.
309+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
310+ op->moveBefore (&innermostFirstOp);
311+
312+ argTypes.push_back (doLoop.getInductionVar ().getType ());
313+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
314+ }
315+
316+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
317+ // Extend the innermost entry block with arguments to represent the outer IVs.
318+ innermmostRegion.addArguments (argTypes, argLocs);
319+
320+ unsigned idx = 1 ;
321+ // In reverse, remap the IVs of the loop nest from the old values to the new
322+ // ones. We do that in reverse since the first argument before this loop is
323+ // the old IV for the innermost loop. Therefore, we want to replace it first
324+ // before the old value (1st argument in the block) is remapped to be the IV
325+ // of the outermost loop in the nest.
326+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
327+ doLoop.getInductionVar ().replaceAllUsesWith (
328+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
329+ ++idx;
330+ }
331+ }
196332} // namespace looputils
197333
198334class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -219,6 +355,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
219355 " Some `do concurent` loops are not perfectly-nested. "
220356 " These will be serialized." );
221357
358+ looputils::sinkLoopIVArgs (rewriter, loopNest);
222359 mlir::IRMapping mapper;
223360 genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
224361 mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments