9
9
#include " flang/Optimizer/Dialect/FIROps.h"
10
10
#include " flang/Optimizer/OpenMP/Passes.h"
11
11
#include " flang/Optimizer/OpenMP/Utils.h"
12
+ #include " mlir/Analysis/SliceAnalysis.h"
12
13
#include " mlir/Dialect/OpenMP/OpenMPDialect.h"
13
14
#include " mlir/Transforms/DialectConversion.h"
15
+ #include " mlir/Transforms/RegionUtils.h"
14
16
15
17
namespace flangomp {
16
18
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -21,6 +23,131 @@ namespace flangomp {
21
23
#define DBGS () (llvm::dbgs() << " [" DEBUG_TYPE << " ]: " )
22
24
23
25
namespace {
26
+ namespace looputils {
27
+ using LoopNest = llvm::SetVector<fir::DoLoopOp>;
28
+
29
+ // / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
30
+ // / there are no operations in \p outerloop's body other than:
31
+ // /
32
+ // / 1. the operations needed to assign/update \p outerLoop's induction variable.
33
+ // / 2. \p innerLoop itself.
34
+ // /
35
+ // / \p return true if \p innerLoop is perfectly nested inside \p outerLoop
36
+ // / according to the above definition.
37
+ bool isPerfectlyNested (fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
38
+ mlir::ForwardSliceOptions forwardSliceOptions;
39
+ forwardSliceOptions.inclusive = true ;
40
+ // The following will be used as an example to clarify the internals of this
41
+ // function:
42
+ // ```
43
+ // 1. fir.do_loop %i_idx = %34 to %36 step %c1 unordered {
44
+ // 2. %i_idx_2 = fir.convert %i_idx : (index) -> i32
45
+ // 3. fir.store %i_idx_2 to %i_iv#1 : !fir.ref<i32>
46
+ //
47
+ // 4. fir.do_loop %j_idx = %37 to %39 step %c1_3 unordered {
48
+ // 5. %j_idx_2 = fir.convert %j_idx : (index) -> i32
49
+ // 6. fir.store %j_idx_2 to %j_iv#1 : !fir.ref<i32>
50
+ // ... loop nest body, possible uses %i_idx ...
51
+ // }
52
+ // }
53
+ // ```
54
+ // In this example, the `j` loop is perfectly nested inside the `i` loop and
55
+ // below is how we find that.
56
+
57
+ // We don't care about the outer-loop's induction variable's uses within the
58
+ // inner-loop, so we filter out these uses.
59
+ //
60
+ // This filter tells `getForwardSlice` (below) to only collect operations
61
+ // which produce results defined above (i.e. outside) the inner-loop's body.
62
+ //
63
+ // Since `outerLoop.getInductionVar()` is a block argument (to the
64
+ // outer-loop's body), the filter effectively collects uses of
65
+ // `outerLoop.getInductionVar()` inside the outer-loop but outside the
66
+ // inner-loop.
67
+ forwardSliceOptions.filter = [&](mlir::Operation *op) {
68
+ return mlir::areValuesDefinedAbove (op->getResults (), innerLoop.getRegion ());
69
+ };
70
+
71
+ llvm::SetVector<mlir::Operation *> indVarSlice;
72
+ // The forward slice of the `i` loop's IV will be the 2 ops in line 1 & 2
73
+ // above. Uses of `%i_idx` inside the `j` loop are not collected because of
74
+ // the filter.
75
+ mlir::getForwardSlice (outerLoop.getInductionVar (), &indVarSlice,
76
+ forwardSliceOptions);
77
+ llvm::DenseSet<mlir::Operation *> indVarSet (indVarSlice.begin (),
78
+ indVarSlice.end ());
79
+
80
+ llvm::DenseSet<mlir::Operation *> outerLoopBodySet;
81
+ // The following walk collects ops inside `outerLoop` that are **not**:
82
+ // * the outer-loop itself,
83
+ // * or the inner-loop,
84
+ // * or the `fir.result` op (the outer-loop's terminator).
85
+ //
86
+ // For the above example, this will also populate `outerLoopBodySet` with ops
87
+ // in line 1 & 2 since we skip the `i` loop, the `j` loop, and the terminator.
88
+ outerLoop.walk <mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
89
+ if (op == outerLoop)
90
+ return mlir::WalkResult::advance ();
91
+
92
+ if (op == innerLoop)
93
+ return mlir::WalkResult::skip ();
94
+
95
+ if (mlir::isa<fir::ResultOp>(op))
96
+ return mlir::WalkResult::advance ();
97
+
98
+ outerLoopBodySet.insert (op);
99
+ return mlir::WalkResult::advance ();
100
+ });
101
+
102
+ // If `outerLoopBodySet` ends up having the same ops as `indVarSet`, then
103
+ // `outerLoop` only contains ops that setup its induction variable +
104
+ // `innerLoop` + the `fir.result` terminator. In other words, `innerLoop` is
105
+ // perfectly nested inside `outerLoop`.
106
+ bool result = (outerLoopBodySet == indVarSet);
107
+ mlir::Location loc = outerLoop.getLoc ();
108
+ LLVM_DEBUG (DBGS () << " Loop pair starting at location " << loc << " is"
109
+ << (result ? " " : " not" ) << " perfectly nested\n " );
110
+
111
+ return result;
112
+ }
113
+
114
+ // / Starting with `currentLoop` collect a perfectly nested loop nest, if any.
115
+ // / This function collects as much as possible loops in the nest; it case it
116
+ // / fails to recognize a certain nested loop as part of the nest it just returns
117
+ // / the parent loops it discovered before.
118
+ mlir::LogicalResult collectLoopNest (fir::DoLoopOp currentLoop,
119
+ LoopNest &loopNest) {
120
+ assert (currentLoop.getUnordered ());
121
+
122
+ while (true ) {
123
+ loopNest.insert (currentLoop);
124
+ llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
125
+
126
+ for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
127
+ if (nestedLoop.getUnordered ())
128
+ unorderedLoops.push_back (nestedLoop);
129
+
130
+ if (unorderedLoops.empty ())
131
+ break ;
132
+
133
+ // Having more than one unordered loop means that we are not dealing with a
134
+ // perfect loop nest (i.e. a mulit-range `do concurrent` loop); which is the
135
+ // case we are after here.
136
+ if (unorderedLoops.size () > 1 )
137
+ return mlir::failure ();
138
+
139
+ fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front ();
140
+
141
+ if (!isPerfectlyNested (currentLoop, nestedUnorderedLoop))
142
+ return mlir::failure ();
143
+
144
+ currentLoop = nestedUnorderedLoop;
145
+ }
146
+
147
+ return mlir::success ();
148
+ }
149
+ } // namespace looputils
150
+
24
151
class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
25
152
public:
26
153
using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
@@ -31,6 +158,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
31
158
mlir::LogicalResult
32
159
matchAndRewrite (fir::DoLoopOp doLoop, OpAdaptor adaptor,
33
160
mlir::ConversionPatternRewriter &rewriter) const override {
161
+ looputils::LoopNest loopNest;
162
+ bool hasRemainingNestedLoops =
163
+ failed (looputils::collectLoopNest (doLoop, loopNest));
164
+ if (hasRemainingNestedLoops)
165
+ mlir::emitWarning (doLoop.getLoc (),
166
+ " Some `do concurent` loops are not perfectly-nested. "
167
+ " These will be serialized." );
168
+
34
169
// TODO This will be filled in with the next PRs that upstreams the rest of
35
170
// the ROCm implementaion.
36
171
return mlir::success ();
0 commit comments