99#include " flang/Optimizer/Dialect/FIROps.h"
1010#include " flang/Optimizer/OpenMP/Passes.h"
1111#include " flang/Optimizer/OpenMP/Utils.h"
12+ #include " mlir/Analysis/SliceAnalysis.h"
1213#include " mlir/Dialect/OpenMP/OpenMPDialect.h"
1314#include " mlir/Transforms/DialectConversion.h"
15+ #include " mlir/Transforms/RegionUtils.h"
1416
1517namespace flangomp {
1618#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -21,6 +23,131 @@ namespace flangomp {
2123#define DBGS () (llvm::dbgs() << " [" DEBUG_TYPE << " ]: " )
2224
2325namespace {
26+ namespace looputils {
27+ using LoopNest = llvm::SetVector<fir::DoLoopOp>;
28+
29+ // / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
30+ // / there are no operations in \p outerloop's body other than:
31+ // /
32+ // / 1. the operations needed to assign/update \p outerLoop's induction variable.
33+ // / 2. \p innerLoop itself.
34+ // /
35+ // / \p return true if \p innerLoop is perfectly nested inside \p outerLoop
36+ // / according to the above definition.
37+ bool isPerfectlyNested (fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
38+ mlir::ForwardSliceOptions forwardSliceOptions;
39+ forwardSliceOptions.inclusive = true ;
40+ // The following will be used as an example to clarify the internals of this
41+ // function:
42+ // ```
43+ // 1. fir.do_loop %i_idx = %34 to %36 step %c1 unordered {
44+ // 2. %i_idx_2 = fir.convert %i_idx : (index) -> i32
45+ // 3. fir.store %i_idx_2 to %i_iv#1 : !fir.ref<i32>
46+ //
47+ // 4. fir.do_loop %j_idx = %37 to %39 step %c1_3 unordered {
48+ // 5. %j_idx_2 = fir.convert %j_idx : (index) -> i32
49+ // 6. fir.store %j_idx_2 to %j_iv#1 : !fir.ref<i32>
50+ // ... loop nest body, possible uses %i_idx ...
51+ // }
52+ // }
53+ // ```
54+ // In this example, the `j` loop is perfectly nested inside the `i` loop and
55+ // below is how we find that.
56+
57+ // We don't care about the outer-loop's induction variable's uses within the
58+ // inner-loop, so we filter out these uses.
59+ //
60+ // This filter tells `getForwardSlice` (below) to only collect operations
61+ // which produce results defined above (i.e. outside) the inner-loop's body.
62+ //
63+ // Since `outerLoop.getInductionVar()` is a block argument (to the
64+ // outer-loop's body), the filter effectively collects uses of
65+ // `outerLoop.getInductionVar()` inside the outer-loop but outside the
66+ // inner-loop.
67+ forwardSliceOptions.filter = [&](mlir::Operation *op) {
68+ return mlir::areValuesDefinedAbove (op->getResults (), innerLoop.getRegion ());
69+ };
70+
71+ llvm::SetVector<mlir::Operation *> indVarSlice;
72+ // The forward slice of the `i` loop's IV will be the 2 ops in line 1 & 2
73+ // above. Uses of `%i_idx` inside the `j` loop are not collected because of
74+ // the filter.
75+ mlir::getForwardSlice (outerLoop.getInductionVar (), &indVarSlice,
76+ forwardSliceOptions);
77+ llvm::DenseSet<mlir::Operation *> indVarSet (indVarSlice.begin (),
78+ indVarSlice.end ());
79+
80+ llvm::DenseSet<mlir::Operation *> outerLoopBodySet;
81+ // The following walk collects ops inside `outerLoop` that are **not**:
82+ // * the outer-loop itself,
83+ // * or the inner-loop,
84+ // * or the `fir.result` op (the outer-loop's terminator).
85+ //
86+ // For the above example, this will also populate `outerLoopBodySet` with ops
87+ // in line 1 & 2 since we skip the `i` loop, the `j` loop, and the terminator.
88+ outerLoop.walk <mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
89+ if (op == outerLoop)
90+ return mlir::WalkResult::advance ();
91+
92+ if (op == innerLoop)
93+ return mlir::WalkResult::skip ();
94+
95+ if (mlir::isa<fir::ResultOp>(op))
96+ return mlir::WalkResult::advance ();
97+
98+ outerLoopBodySet.insert (op);
99+ return mlir::WalkResult::advance ();
100+ });
101+
102+ // If `outerLoopBodySet` ends up having the same ops as `indVarSet`, then
103+ // `outerLoop` only contains ops that setup its induction variable +
104+ // `innerLoop` + the `fir.result` terminator. In other words, `innerLoop` is
105+ // perfectly nested inside `outerLoop`.
106+ bool result = (outerLoopBodySet == indVarSet);
107+ mlir::Location loc = outerLoop.getLoc ();
108+ LLVM_DEBUG (DBGS () << " Loop pair starting at location " << loc << " is"
109+ << (result ? " " : " not" ) << " perfectly nested\n " );
110+
111+ return result;
112+ }
113+
114+ // / Starting with `currentLoop` collect a perfectly nested loop nest, if any.
115+ // / This function collects as much as possible loops in the nest; it case it
116+ // / fails to recognize a certain nested loop as part of the nest it just returns
117+ // / the parent loops it discovered before.
118+ mlir::LogicalResult collectLoopNest (fir::DoLoopOp currentLoop,
119+ LoopNest &loopNest) {
120+ assert (currentLoop.getUnordered ());
121+
122+ while (true ) {
123+ loopNest.insert (currentLoop);
124+ llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
125+
126+ for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
127+ if (nestedLoop.getUnordered ())
128+ unorderedLoops.push_back (nestedLoop);
129+
130+ if (unorderedLoops.empty ())
131+ break ;
132+
133+ // Having more than one unordered loop means that we are not dealing with a
134+ // perfect loop nest (i.e. a mulit-range `do concurrent` loop); which is the
135+ // case we are after here.
136+ if (unorderedLoops.size () > 1 )
137+ return mlir::failure ();
138+
139+ fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front ();
140+
141+ if (!isPerfectlyNested (currentLoop, nestedUnorderedLoop))
142+ return mlir::failure ();
143+
144+ currentLoop = nestedUnorderedLoop;
145+ }
146+
147+ return mlir::success ();
148+ }
149+ } // namespace looputils
150+
24151class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
25152public:
26153 using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
@@ -31,6 +158,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
31158 mlir::LogicalResult
32159 matchAndRewrite (fir::DoLoopOp doLoop, OpAdaptor adaptor,
33160 mlir::ConversionPatternRewriter &rewriter) const override {
161+ looputils::LoopNest loopNest;
162+ bool hasRemainingNestedLoops =
163+ failed (looputils::collectLoopNest (doLoop, loopNest));
164+ if (hasRemainingNestedLoops)
165+ mlir::emitWarning (doLoop.getLoc (),
166+ " Some `do concurent` loops are not perfectly-nested. "
167+ " These will be serialzied." );
168+
34169 // TODO This will be filled in with the next PRs that upstreams the rest of
35170 // the ROCm implementaion.
36171 return mlir::success ();
0 commit comments