17
17
#include " llvm/ADT/SmallSet.h"
18
18
#include " llvm/ADT/SmallVector.h"
19
19
#include " llvm/ADT/Statistic.h"
20
+ #include " llvm/ADT/StringMap.h"
20
21
#include " llvm/ADT/StringRef.h"
21
- #include " llvm/ADT/StringSet.h"
22
22
#include " llvm/Analysis/DependenceAnalysis.h"
23
23
#include " llvm/Analysis/LoopCacheAnalysis.h"
24
24
#include " llvm/Analysis/LoopInfo.h"
@@ -80,6 +80,21 @@ enum class RuleTy {
80
80
ForVectorization,
81
81
};
82
82
83
+ // / Store the information about if corresponding direction vector was negated
84
+ // / by normalization or not. This is necessary to restore the original one from
85
+ // / a row of a dependency matrix, because we only manage normalized direction
86
+ // / vectors and duplicate vectors are eliminated. So there may be both original
87
+ // / and negated vectors for a single entry (a row of dependency matrix). E.g.,
88
+ // / if there are two direction vectors `[< =]` and `[> =]`, the later one will
89
+ // / be converted to the same as former one by normalization, so only `[< =]`
90
+ // / would be retained in the final result.
91
+ struct NegatedStatus {
92
+ bool Original = false ;
93
+ bool Negated = false ;
94
+
95
+ bool isNonNegativeDir (char Dir) const ;
96
+ };
97
+
83
98
} // end anonymous namespace
84
99
85
100
// Minimum loop depth supported.
@@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
126
141
}
127
142
#endif
128
143
129
- static bool populateDependencyMatrix (CharMatrix &DepMatrix, unsigned Level,
130
- Loop *L, DependenceInfo *DI,
131
- ScalarEvolution *SE,
144
+ static bool populateDependencyMatrix (CharMatrix &DepMatrix,
145
+ std::vector<NegatedStatus> &NegStatusVec,
146
+ unsigned Level, Loop *L,
147
+ DependenceInfo *DI, ScalarEvolution *SE,
132
148
OptimizationRemarkEmitter *ORE) {
133
149
using ValueVector = SmallVector<Value *, 16 >;
134
150
@@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
167
183
return false ;
168
184
}
169
185
ValueVector::iterator I, IE, J, JE;
170
- StringSet<> Seen;
186
+
187
+ // Manage all found direction vectors. and map it to the index of DepMatrix.
188
+ StringMap<unsigned > Seen;
171
189
172
190
for (I = MemInstr.begin (), IE = MemInstr.end (); I != IE; ++I) {
173
191
for (J = I, JE = MemInstr.end (); J != JE; ++J) {
@@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
182
200
assert (D->isOrdered () && " Expected an output, flow or anti dep." );
183
201
// If the direction vector is negative, normalize it to
184
202
// make it non-negative.
185
- if (D->normalize (SE))
203
+ bool Normalized = D->normalize (SE);
204
+ if (Normalized)
186
205
LLVM_DEBUG (dbgs () << " Negative dependence vector normalized.\n " );
187
206
LLVM_DEBUG (StringRef DepType =
188
207
D->isFlow () ? " flow" : D->isAnti () ? " anti" : " output" ;
@@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
214
233
}
215
234
216
235
// Make sure we only add unique entries to the dependency matrix.
217
- if (Seen.insert (StringRef (Dep.data (), Dep.size ())).second )
236
+ unsigned Index = DepMatrix.size ();
237
+ auto [Ite, Inserted] =
238
+ Seen.try_emplace (StringRef (Dep.data (), Dep.size ()), Index);
239
+ if (Inserted) {
218
240
DepMatrix.push_back (Dep);
241
+ NegStatusVec.push_back (NegatedStatus{});
242
+ } else
243
+ Index = Ite->second ;
244
+
245
+ NegatedStatus &Status = NegStatusVec[Index];
246
+ (Normalized ? Status.Negated : Status.Original ) = true ;
219
247
}
220
248
}
221
249
}
@@ -400,6 +428,7 @@ class LoopInterchangeProfitability {
400
428
bool isProfitable (const Loop *InnerLoop, const Loop *OuterLoop,
401
429
unsigned InnerLoopId, unsigned OuterLoopId,
402
430
CharMatrix &DepMatrix,
431
+ const std::vector<NegatedStatus> &NegStatusVec,
403
432
const DenseMap<const Loop *, unsigned > &CostMap,
404
433
std::unique_ptr<CacheCost> &CC);
405
434
@@ -409,9 +438,10 @@ class LoopInterchangeProfitability {
409
438
const DenseMap<const Loop *, unsigned > &CostMap,
410
439
std::unique_ptr<CacheCost> &CC);
411
440
std::optional<bool > isProfitablePerInstrOrderCost ();
412
- std::optional<bool > isProfitableForVectorization (unsigned InnerLoopId,
413
- unsigned OuterLoopId,
414
- CharMatrix &DepMatrix);
441
+ std::optional<bool >
442
+ isProfitableForVectorization (unsigned InnerLoopId, unsigned OuterLoopId,
443
+ CharMatrix &DepMatrix,
444
+ const std::vector<NegatedStatus> &NegStatusVec);
415
445
Loop *OuterLoop;
416
446
Loop *InnerLoop;
417
447
@@ -503,8 +533,9 @@ struct LoopInterchange {
503
533
<< " \n " );
504
534
505
535
CharMatrix DependencyMatrix;
536
+ std::vector<NegatedStatus> NegStatusVec;
506
537
Loop *OuterMostLoop = *(LoopList.begin ());
507
- if (!populateDependencyMatrix (DependencyMatrix, LoopNestDepth,
538
+ if (!populateDependencyMatrix (DependencyMatrix, NegStatusVec, LoopNestDepth,
508
539
OuterMostLoop, DI, SE, ORE)) {
509
540
LLVM_DEBUG (dbgs () << " Populating dependency matrix failed\n " );
510
541
return false ;
@@ -543,8 +574,8 @@ struct LoopInterchange {
543
574
for (unsigned j = SelecLoopId; j > 0 ; j--) {
544
575
bool ChangedPerIter = false ;
545
576
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
546
- bool Interchanged =
547
- processLoop (LoopList, i, i - 1 , DependencyMatrix , CostMap);
577
+ bool Interchanged = processLoop (LoopList, i, i - 1 , DependencyMatrix,
578
+ NegStatusVec , CostMap);
548
579
ChangedPerIter |= Interchanged;
549
580
Changed |= Interchanged;
550
581
}
@@ -559,6 +590,8 @@ struct LoopInterchange {
559
590
bool processLoop (SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
560
591
unsigned OuterLoopId,
561
592
std::vector<std::vector<char >> &DependencyMatrix,
593
+
594
+ const std::vector<NegatedStatus> &NegStatusVec,
562
595
const DenseMap<const Loop *, unsigned > &CostMap) {
563
596
Loop *OuterLoop = LoopList[OuterLoopId];
564
597
Loop *InnerLoop = LoopList[InnerLoopId];
@@ -572,7 +605,7 @@ struct LoopInterchange {
572
605
LLVM_DEBUG (dbgs () << " Loops are legal to interchange\n " );
573
606
LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE);
574
607
if (!LIP.isProfitable (InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
575
- DependencyMatrix, CostMap, CC)) {
608
+ DependencyMatrix, NegStatusVec, CostMap, CC)) {
576
609
LLVM_DEBUG (dbgs () << " Interchanging loops not profitable.\n " );
577
610
return false ;
578
611
}
@@ -1197,27 +1230,71 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1197
1230
return std::nullopt;
1198
1231
}
1199
1232
1233
+ static char flipDirection (char Dir) {
1234
+ switch (Dir) {
1235
+ case ' <' :
1236
+ return ' >' ;
1237
+ case ' >' :
1238
+ return ' <' ;
1239
+ case ' =' :
1240
+ case ' I' :
1241
+ case ' *' :
1242
+ return Dir;
1243
+ default :
1244
+ llvm_unreachable (" Unknown direction" );
1245
+ }
1246
+ }
1247
+
1248
+ // / Ensure that there are no negative direction dependencies corresponding to \p
1249
+ // / Dir.
1250
+ bool NegatedStatus::isNonNegativeDir (char Dir) const {
1251
+ assert ((Original || Negated) && " Cannot restore the original direction" );
1252
+
1253
+ // If both flag is true, it means that there is both as-is and negated
1254
+ // direction. In this case only `=` or `I` don't have negative direction
1255
+ // dependency.
1256
+ if (Original && Negated)
1257
+ return Dir == ' =' || Dir == ' I' ;
1258
+
1259
+ char Restored = Negated ? flipDirection (Dir) : Dir;
1260
+ return Restored == ' =' || Restored == ' I' || Restored == ' <' ;
1261
+ }
1262
+
1200
1263
// / Return true if we can vectorize the loop specified by \p LoopId.
1201
- static bool canVectorize (const CharMatrix &DepMatrix, unsigned LoopId) {
1264
+ static bool canVectorize (const CharMatrix &DepMatrix,
1265
+ const std::vector<NegatedStatus> &NegStatusVec,
1266
+ unsigned LoopId) {
1267
+ // The loop can be vectorized if there are no negative dependencies. Consider
1268
+ // the dependency of `j` in the following example.
1269
+ //
1270
+ // Positive: ... = A[i][j] Negative: ... = A[i][j-1]
1271
+ // A[i][j-1] = ... A[i][j] = ...
1272
+ //
1273
+ // In the right case, vectorizing the loop can change the loaded value from
1274
+ // `A[i][j-1]`. At the moment we don't take into account the distance of the
1275
+ // dependency and vector width.
1276
+ // TODO: Considering the dependency distance and the vector width can give a
1277
+ // more accurate result. For example, the following loop can be vectorized if
1278
+ // the vector width is less than or equal to 4 x sizeof(A[0][0]).
1202
1279
for (unsigned I = 0 ; I != DepMatrix.size (); I++) {
1203
1280
char Dir = DepMatrix[I][LoopId];
1204
- if (Dir != ' I ' && Dir != ' = ' )
1281
+ if (!NegStatusVec[I]. isNonNegativeDir ( Dir) )
1205
1282
return false ;
1206
1283
}
1207
1284
return true ;
1208
1285
}
1209
1286
1210
1287
std::optional<bool > LoopInterchangeProfitability::isProfitableForVectorization (
1211
- unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1212
- // If the outer loop is not loop independent it is not profitable to move
1213
- // this to inner position, since doing so would not enable inner loop
1214
- // parallelism .
1215
- if (!canVectorize (DepMatrix, OuterLoopId))
1288
+ unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix,
1289
+ const std::vector<NegatedStatus> &NegStatusVec) {
1290
+ // If the outer loop cannot be vectorized, it is not profitable to move this
1291
+ // to inner position .
1292
+ if (!canVectorize (DepMatrix, NegStatusVec, OuterLoopId))
1216
1293
return false ;
1217
1294
1218
- // If inner loop has dependence and outer loop is loop independent then it is
1295
+ // If inner loop cannot be vectorized and outer loop can be then it is
1219
1296
// profitable to interchange to enable inner loop parallelism.
1220
- if (!canVectorize (DepMatrix, InnerLoopId))
1297
+ if (!canVectorize (DepMatrix, NegStatusVec, InnerLoopId))
1221
1298
return true ;
1222
1299
1223
1300
// If both the inner and the outer loop can be vectorized, it is necessary to
@@ -1231,6 +1308,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1231
1308
bool LoopInterchangeProfitability::isProfitable (
1232
1309
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1233
1310
unsigned OuterLoopId, CharMatrix &DepMatrix,
1311
+ const std::vector<NegatedStatus> &NegStatusVec,
1234
1312
const DenseMap<const Loop *, unsigned > &CostMap,
1235
1313
std::unique_ptr<CacheCost> &CC) {
1236
1314
// isProfitable() is structured to avoid endless loop interchange. If the
@@ -1252,8 +1330,8 @@ bool LoopInterchangeProfitability::isProfitable(
1252
1330
shouldInterchange = isProfitablePerInstrOrderCost ();
1253
1331
break ;
1254
1332
case RuleTy::ForVectorization:
1255
- shouldInterchange =
1256
- isProfitableForVectorization (InnerLoopId, OuterLoopId, DepMatrix);
1333
+ shouldInterchange = isProfitableForVectorization (InnerLoopId, OuterLoopId,
1334
+ DepMatrix, NegStatusVec );
1257
1335
break ;
1258
1336
}
1259
1337
0 commit comments