1111#include " flang/Optimizer/OpenMP/Utils.h"
1212#include " mlir/Analysis/SliceAnalysis.h"
1313#include " mlir/Dialect/OpenMP/OpenMPDialect.h"
14+ #include " mlir/IR/IRMapping.h"
1415#include " mlir/Transforms/DialectConversion.h"
1516#include " mlir/Transforms/RegionUtils.h"
1617
@@ -24,7 +25,82 @@ namespace flangomp {
2425
2526namespace {
2627namespace looputils {
27- using LoopNest = llvm::SetVector<fir::DoLoopOp>;
28+ // / Stores info needed about the induction/iteration variable for each `do
29+ // / concurrent` in a loop nest.
30+ struct InductionVariableInfo {
31+ // / the operation allocating memory for iteration variable,
32+ mlir::Operation *iterVarMemDef;
33+ };
34+
35+ using LoopNestToIndVarMap =
36+ llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
37+
38+ // / Given an operation `op`, this returns true if one of `op`'s operands is
39+ // / "ultimately" the loop's induction variable. This helps in cases where the
40+ // / induction variable's use is "hidden" behind a convert/cast.
41+ // /
42+ // / For example, give the following loop:
43+ // / ```
44+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
45+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
46+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
47+ // / ...
48+ // / }
49+ // / ```
50+ // /
51+ // / If \p op is the `fir.store` operation, then this function will return true
52+ // / since the IV is the "ultimate" operand to the `fir.store` op through the
53+ // / `%ind_var_conv` -> `%ind_var` conversion sequence.
54+ // /
55+ // / For why this is useful, see its use in `findLoopIndVarMemDecl`.
56+ bool isIndVarUltimateOperand (mlir::Operation *op, fir::DoLoopOp doLoop) {
57+ while (op != nullptr && op->getNumOperands () > 0 ) {
58+ auto ivIt = llvm::find_if (op->getOperands (), [&](mlir::Value operand) {
59+ return operand == doLoop.getInductionVar ();
60+ });
61+
62+ if (ivIt != op->getOperands ().end ())
63+ return true ;
64+
65+ op = op->getOperand (0 ).getDefiningOp ();
66+ }
67+
68+ return false ;
69+ }
70+
71+ // / For the \p doLoop parameter, find the operation that declares its iteration
72+ // / variable or allocates memory for it.
73+ // /
74+ // / For example, give the following loop:
75+ // / ```
76+ // / ...
77+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
78+ // / ...
79+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
80+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
81+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
82+ // / ...
83+ // / }
84+ // / ```
85+ // /
86+ // / This function returns the `hlfir.declare` op for `%i`.
87+ mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
88+ mlir::Value result = nullptr ;
89+ mlir::visitUsedValuesDefinedAbove (
90+ doLoop.getRegion (), [&](mlir::OpOperand *operand) {
91+ if (result)
92+ return ;
93+
94+ if (isIndVarUltimateOperand (operand->getOwner (), doLoop)) {
95+ assert (result == nullptr &&
96+ " loop can have only one induction variable" );
97+ result = operand->get ();
98+ }
99+ });
100+
101+ assert (result != nullptr && result.getDefiningOp () != nullptr );
102+ return result.getDefiningOp ();
103+ }
28104
29105// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
30106// / there are no operations in \p outerloop's body other than:
@@ -116,11 +192,14 @@ bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
116192// / fails to recognize a certain nested loop as part of the nest it just returns
117193// / the parent loops it discovered before.
118194mlir::LogicalResult collectLoopNest (fir::DoLoopOp currentLoop,
119- LoopNest &loopNest) {
195+ LoopNestToIndVarMap &loopNest) {
120196 assert (currentLoop.getUnordered ());
121197
122198 while (true ) {
123- loopNest.insert (currentLoop);
199+ loopNest.insert (
200+ {currentLoop,
201+ InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
202+
124203 llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
125204
126205 for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
@@ -152,26 +231,136 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
152231public:
153232 using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
154233
155- DoConcurrentConversion (mlir::MLIRContext *context, bool mapToDevice)
156- : OpConversionPattern(context), mapToDevice(mapToDevice) {}
234+ DoConcurrentConversion (mlir::MLIRContext *context, bool mapToDevice,
235+ llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip)
236+ : OpConversionPattern(context), mapToDevice(mapToDevice),
237+ concurrentLoopsToSkip (concurrentLoopsToSkip) {}
157238
158239 mlir::LogicalResult
159240 matchAndRewrite (fir::DoLoopOp doLoop, OpAdaptor adaptor,
160241 mlir::ConversionPatternRewriter &rewriter) const override {
161- looputils::LoopNest loopNest;
242+ looputils::LoopNestToIndVarMap loopNest;
162243 bool hasRemainingNestedLoops =
163244 failed (looputils::collectLoopNest (doLoop, loopNest));
164245 if (hasRemainingNestedLoops)
165246 mlir::emitWarning (doLoop.getLoc (),
166247 " Some `do concurent` loops are not perfectly-nested. "
167248 " These will be serialized." );
168249
169- // TODO This will be filled in with the next PRs that upstreams the rest of
170- // the ROCm implementaion.
250+ mlir::IRMapping mapper;
251+ genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
252+ mlir::omp::LoopNestOperands loopNestClauseOps;
253+ genLoopNestClauseOps (doLoop.getLoc (), rewriter, loopNest, mapper,
254+ loopNestClauseOps);
255+
256+ mlir::omp::LoopNestOp ompLoopNest =
257+ genWsLoopOp (rewriter, loopNest.back ().first , mapper, loopNestClauseOps,
258+ /* isComposite=*/ mapToDevice);
259+
260+ rewriter.eraseOp (doLoop);
261+
262+ // Mark `unordered` loops that are not perfectly nested to be skipped from
263+ // the legality check of the `ConversionTarget` since we are not interested
264+ // in mapping them to OpenMP.
265+ ompLoopNest->walk ([&](fir::DoLoopOp doLoop) {
266+ if (doLoop.getUnordered ()) {
267+ concurrentLoopsToSkip.insert (doLoop);
268+ }
269+ });
270+
171271 return mlir::success ();
172272 }
173273
274+ private:
275+ mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
276+ mlir::ConversionPatternRewriter &rewriter,
277+ looputils::LoopNestToIndVarMap &loopNest,
278+ mlir::IRMapping &mapper) const {
279+ auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
280+ rewriter.createBlock (¶llelOp.getRegion ());
281+ rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
282+
283+ genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
284+ return parallelOp;
285+ }
286+
287+ void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
288+ looputils::LoopNestToIndVarMap &loopNest,
289+ mlir::IRMapping &mapper) const {
290+
291+ for (auto &[_, indVarInfo] : loopNest)
292+ genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
293+ }
294+
295+ mlir::Operation *
296+ genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
297+ mlir::Operation *indVarMemDef,
298+ mlir::IRMapping &mapper) const {
299+ assert (
300+ indVarMemDef != nullptr &&
301+ " Induction variable memdef is expected to have a defining operation." );
302+
303+ llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
304+ for (auto operand : indVarMemDef->getOperands ())
305+ indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
306+ indVarDeclareAndAlloc.insert (indVarMemDef);
307+
308+ mlir::Operation *result;
309+ for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
310+ result = rewriter.clone (*opToClone, mapper);
311+
312+ return result;
313+ }
314+
315+ void genLoopNestClauseOps (
316+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
317+ looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
318+ mlir::omp::LoopNestOperands &loopNestClauseOps) const {
319+ assert (loopNestClauseOps.loopLowerBounds .empty () &&
320+ " Loop nest bounds were already emitted!" );
321+
322+ auto populateBounds = [&](mlir::Value var,
323+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
324+ bounds.push_back (var.getDefiningOp ()->getResult (0 ));
325+ };
326+
327+ for (auto &[doLoop, _] : loopNest) {
328+ populateBounds (doLoop.getLowerBound (), loopNestClauseOps.loopLowerBounds );
329+ populateBounds (doLoop.getUpperBound (), loopNestClauseOps.loopUpperBounds );
330+ populateBounds (doLoop.getStep (), loopNestClauseOps.loopSteps );
331+ }
332+
333+ loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
334+ }
335+
336+ mlir::omp::LoopNestOp
337+ genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
338+ mlir::IRMapping &mapper,
339+ const mlir::omp::LoopNestOperands &clauseOps,
340+ bool isComposite) const {
341+
342+ auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
343+ wsloopOp.setComposite (isComposite);
344+ rewriter.createBlock (&wsloopOp.getRegion ());
345+
346+ auto loopNestOp =
347+ rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
348+
349+ // Clone the loop's body inside the loop nest construct using the
350+ // mapped values.
351+ rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
352+ loopNestOp.getRegion ().begin (), mapper);
353+
354+ mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
355+ rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
356+ rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
357+ rewriter.eraseOp (terminator);
358+
359+ return loopNestOp;
360+ }
361+
174362 bool mapToDevice;
363+ llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
175364};
176365
177366class DoConcurrentConversionPass
@@ -200,16 +389,18 @@ class DoConcurrentConversionPass
200389 return ;
201390 }
202391
392+ llvm::DenseSet<fir::DoLoopOp> concurrentLoopsToSkip;
203393 mlir::RewritePatternSet patterns (context);
204394 patterns.insert <DoConcurrentConversion>(
205- context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device);
395+ context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
396+ concurrentLoopsToSkip);
206397 mlir::ConversionTarget target (*context);
207398 target.addDynamicallyLegalOp <fir::DoLoopOp>([&](fir::DoLoopOp op) {
208399 // The goal is to handle constructs that eventually get lowered to
209400 // `fir.do_loop` with the `unordered` attribute (e.g. array expressions).
210401 // Currently, this is only enabled for the `do concurrent` construct since
211402 // the pass runs early in the pipeline.
212- return !op.getUnordered ();
403+ return !op.getUnordered () || concurrentLoopsToSkip. contains (op) ;
213404 });
214405 target.markUnknownOpDynamicallyLegal (
215406 [](mlir::Operation *) { return true ; });
0 commit comments