-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[DependenceAnalysis] Extending SIV to handle separate loops #128782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Alireza Torabian (1997alireza) ChangesWhen there is a dependency between two memory instructions in separate loops, SIV will be able to test them and compute the direction and the distance of the dependency. Patch is 48.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128782.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 426ac757b4b0d..8e86c091c60a2 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -97,10 +97,11 @@ namespace llvm {
bool PeelFirst : 1; // Peeling the first iteration will break dependence.
bool PeelLast : 1; // Peeling the last iteration will break the dependence.
bool Splitable : 1; // Splitting the loop will break dependence.
+ bool SeparateLoops : 1; // Is performed across two separate loop nests.
const SCEV *Distance = nullptr; // NULL implies no distance available.
DVEntry()
: Direction(ALL), Scalar(true), PeelFirst(false), PeelLast(false),
- Splitable(false) {}
+ Splitable(false), SeparateLoops(false) {}
};
/// getSrc - Returns the source instruction for this dependence.
@@ -182,6 +183,10 @@ namespace llvm {
/// the dependence.
virtual bool isSplitable(unsigned Level) const { return false; }
+ /// inSeparateLoops - Returns true if this level is performed across
+ /// two separate loop nests.
+ virtual bool inSeparateLoops(unsigned Level) const { return false; }
+
/// isScalar - Returns true if a particular level is scalar; that is,
/// if no subscript in the source or destination mention the induction
/// variable associated with the loop at this level.
@@ -275,6 +280,10 @@ namespace llvm {
/// the dependence.
bool isSplitable(unsigned Level) const override;
+ /// inSeparateLoops - Returns true if this level is performed across
+ /// two separate loop nests.
+ bool inSeparateLoops(unsigned Level) const override;
+
/// isScalar - Returns true if a particular level is scalar; that is,
/// if no subscript in the source or destination mention the induction
/// variable associated with the loop at this level.
@@ -405,7 +414,8 @@ namespace llvm {
const SCEV *A;
const SCEV *B;
const SCEV *C;
- const Loop *AssociatedLoop;
+ const Loop *AssociatedSrcLoop;
+ const Loop *AssociatedDstLoop;
public:
/// isEmpty - Return true if the constraint is of kind Empty.
@@ -449,18 +459,25 @@ namespace llvm {
/// Otherwise assert.
const SCEV *getD() const;
- /// getAssociatedLoop - Returns the loop associated with this constraint.
- const Loop *getAssociatedLoop() const;
+ /// getAssociatedSrcLoop - Returns the source loop associated with this
+ /// constraint.
+ const Loop *getAssociatedSrcLoop() const;
+
+ /// getAssociatedDstLoop - Returns the destination loop associated with
+ /// this constraint.
+ const Loop *getAssociatedDstLoop() const;
/// setPoint - Change a constraint to Point.
- void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop);
+ void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop);
/// setLine - Change a constraint to Line.
- void setLine(const SCEV *A, const SCEV *B,
- const SCEV *C, const Loop *CurrentLoop);
+ void setLine(const SCEV *A, const SCEV *B, const SCEV *C,
+ const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop);
/// setDistance - Change a constraint to Distance.
- void setDistance(const SCEV *D, const Loop *CurrentLoop);
+ void setDistance(const SCEV *D, const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop);
/// setEmpty - Change a constraint to Empty.
void setEmpty();
@@ -473,6 +490,10 @@ namespace llvm {
void dump(raw_ostream &OS) const;
};
+ /// Returns true if two loops are the same or they have the same upperbound
+ /// and depth
+ bool areLoopsSimilar(const Loop *SrcLoop, const Loop *DstLoop) const;
+
/// establishNestingLevels - Examines the loop nesting of the Src and Dst
/// instructions and establishes their shared loops. Sets the variables
/// CommonLevels, SrcLevels, and MaxLevels.
@@ -523,10 +544,22 @@ namespace llvm {
/// e - 5
/// f - 6
/// g - 7 = MaxLevels
- void establishNestingLevels(const Instruction *Src,
- const Instruction *Dst);
-
- unsigned CommonLevels, SrcLevels, MaxLevels;
+ /// If ConsiderSeparateLoops is true then we also want to consider similar
+ /// seperate loops. Assume that loop nests at level c and e are similar,
+ /// meaning that they have the same upperbound and depth. Then we consider
+ /// them as a common level.
+ /// a - 1
+ /// b - 2
+ /// <c, e> - 3 = CommonLevels
+ /// d - 4 = SrcLevels
+ /// f - 5
+ /// g - 6 = MaxLevels
+ /// SeparateLevels means that how many of the last common levels are
+ /// separated, which is 1 in this case.
+ void establishNestingLevels(const Instruction *Src, const Instruction *Dst,
+ bool ConsiderSeparateLoops = false);
+
+ unsigned CommonLevels, SrcLevels, MaxLevels, SeparateLevels;
/// mapSrcLoop - Given one of the loops containing the source, return
/// its level index in our numbering scheme.
@@ -668,7 +701,8 @@ namespace llvm {
bool strongSIVtest(const SCEV *Coeff,
const SCEV *SrcConst,
const SCEV *DstConst,
- const Loop *CurrentLoop,
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop,
unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const;
@@ -686,7 +720,8 @@ namespace llvm {
bool weakCrossingSIVtest(const SCEV *SrcCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
- const Loop *CurrentLoop,
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop,
unsigned Level,
FullDependence &Result,
Constraint &NewConstraint,
@@ -705,7 +740,8 @@ namespace llvm {
const SCEV *DstCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
- const Loop *CurrentLoop,
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop,
unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const;
@@ -723,7 +759,8 @@ namespace llvm {
bool weakZeroSrcSIVtest(const SCEV *DstCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
- const Loop *CurrentLoop,
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop,
unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const;
@@ -741,7 +778,8 @@ namespace llvm {
bool weakZeroDstSIVtest(const SCEV *SrcCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
- const Loop *CurrentLoop,
+ const Loop *CurrentSrcLoop,
+ const Loop *CurrentDstLoop,
unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dc0ed22dbcc0b..b947e92a6375b 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -104,6 +104,7 @@ STATISTIC(GCDindependence, "GCD independence");
STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+STATISTIC(SeparateLoopsConsidered, "Separate loops considered");
static cl::opt<bool>
Delinearize("da-delinearize", cl::init(true), cl::Hidden,
@@ -377,6 +378,13 @@ bool FullDependence::isSplitable(unsigned Level) const {
}
+// Returns true if this level is performed across two separate loop nests.
+bool FullDependence::inSeparateLoops(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].SeparateLoops;
+}
+
+
//===----------------------------------------------------------------------===//
// DependenceInfo::Constraint methods
@@ -431,37 +439,52 @@ const SCEV *DependenceInfo::Constraint::getD() const {
}
-// Returns the loop associated with this constraint.
-const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
+// Returns the source loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedSrcLoop() const {
+ assert((Kind == Distance || Kind == Line || Kind == Point) &&
+ "Kind should be Distance, Line, or Point");
+ return AssociatedSrcLoop;
+}
+
+
+// Returns the destination loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedDstLoop() const {
assert((Kind == Distance || Kind == Line || Kind == Point) &&
"Kind should be Distance, Line, or Point");
- return AssociatedLoop;
+ return AssociatedDstLoop;
}
+
void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
- const Loop *CurLoop) {
+ const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Point;
A = X;
B = Y;
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
- const SCEV *CC, const Loop *CurLoop) {
+ const SCEV *CC, const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Line;
A = AA;
B = BB;
C = CC;
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setDistance(const SCEV *D,
- const Loop *CurLoop) {
+ const Loop *CurSrcLoop,
+ const Loop *CurDstLoop) {
Kind = Distance;
A = SE->getOne(D->getType());
B = SE->getNegativeSCEV(A);
C = SE->getNegativeSCEV(D);
- AssociatedLoop = CurLoop;
+ AssociatedSrcLoop = CurSrcLoop;
+ AssociatedDstLoop = CurDstLoop;
}
void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
@@ -608,8 +631,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
++DeltaSuccesses;
return true;
}
- if (const SCEVConstant *CUB =
- collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+ if (const SCEVConstant *CUB = collectConstantUpperBound(
+ X->getAssociatedSrcLoop(), Prod1->getType())) {
const APInt &UpperBound = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
@@ -620,7 +643,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
}
X->setPoint(SE->getConstant(Xq),
SE->getConstant(Yq),
- X->getAssociatedLoop());
+ X->getAssociatedSrcLoop(),
+ X->getAssociatedDstLoop());
++DeltaSuccesses;
return true;
}
@@ -656,6 +680,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
// For debugging purposes. Dumps a dependence to OS.
void Dependence::dump(raw_ostream &OS) const {
bool Splitable = false;
+ bool SeparatesStarted = false;
if (isConfused())
OS << "confused";
else {
@@ -672,6 +697,10 @@ void Dependence::dump(raw_ostream &OS) const {
unsigned Levels = getLevels();
OS << " [";
for (unsigned II = 1; II <= Levels; ++II) {
+ if (!SeparatesStarted && inSeparateLoops(II)) {
+ SeparatesStarted = true;
+ OS << "/ ";
+ }
if (isSplitable(II))
Splitable = true;
if (isPeelFirst(II))
@@ -758,6 +787,35 @@ bool isLoadOrStore(const Instruction *I) {
return false;
}
+// Returns true if two loops are the same or they have the same tripcount and
+// depth
+bool DependenceInfo::areLoopsSimilar(const Loop *SrcLoop,
+ const Loop *DstLoop) const {
+ if (SrcLoop == DstLoop)
+ return true;
+
+ if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
+ return false;
+
+ if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
+ !DstLoop->getLoopLatch())
+ return false;
+
+ const SCEV *SrcUB, *DstUP;
+ if (SE->hasLoopInvariantBackedgeTakenCount(SrcLoop))
+ SrcUB = SE->getBackedgeTakenCount(SrcLoop);
+ if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop))
+ DstUP = SE->getBackedgeTakenCount(DstLoop);
+
+ if (SrcUB == nullptr || DstUP == nullptr)
+ return false;
+
+ if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
+ return true;
+
+ return false;
+}
+
// Examines the loop nesting of the Src and Dst
// instructions and establishes their shared loops. Sets the variables
@@ -809,8 +867,21 @@ bool isLoadOrStore(const Instruction *I) {
// e - 5
// f - 6
// g - 7 = MaxLevels
+// If ConsiderSeparateLoops is true then we also want to consider similar
+// seperate loops. Assume that loop nests at level c and e are similar,
+// meaning that they have the same tripcount and depth. Then we consider
+// them as a common level.
+// a - 1
+// b - 2
+// <c, e> - 3 = CommonLevels
+// d - 4 = SrcLevels
+// f - 5
+// g - 6 = MaxLevels
+// SeparateLevels means that how many of the last common levels are
+// separated, which is 1 in this case.
void DependenceInfo::establishNestingLevels(const Instruction *Src,
- const Instruction *Dst) {
+ const Instruction *Dst,
+ bool ConsiderSeparateLoops) {
const BasicBlock *SrcBlock = Src->getParent();
const BasicBlock *DstBlock = Dst->getParent();
unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
@@ -819,6 +890,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
const Loop *DstLoop = LI->getLoopFor(DstBlock);
SrcLevels = SrcLevel;
MaxLevels = SrcLevel + DstLevel;
+ SeparateLevels = 0;
while (SrcLevel > DstLevel) {
SrcLoop = SrcLoop->getParentLoop();
SrcLevel--;
@@ -827,11 +899,23 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
DstLoop = DstLoop->getParentLoop();
DstLevel--;
}
- while (SrcLoop != DstLoop) {
- SrcLoop = SrcLoop->getParentLoop();
- DstLoop = DstLoop->getParentLoop();
- SrcLevel--;
- }
+ if (ConsiderSeparateLoops) {
+ while (!areLoopsSimilar(SrcLoop, DstLoop)) {
+ SrcLoop = SrcLoop->getParentLoop();
+ DstLoop = DstLoop->getParentLoop();
+ SrcLevel--;
+ }
+ while (SrcLoop != DstLoop) {
+ SrcLoop = SrcLoop->getParentLoop();
+ DstLoop = DstLoop->getParentLoop();
+ SeparateLevels++;
+ }
+ } else
+ while (SrcLoop != DstLoop) {
+ SrcLoop = SrcLoop->getParentLoop();
+ DstLoop = DstLoop->getParentLoop();
+ SrcLevel--;
+ }
CommonLevels = SrcLevel;
MaxLevels -= CommonLevels;
}
@@ -1223,8 +1307,9 @@ bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
//
// Return true if dependence disproved.
bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
- const SCEV *DstConst, const Loop *CurLoop,
- unsigned Level, FullDependence &Result,
+ const SCEV *DstConst, const Loop *CurSrcLoop,
+ const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result,
Constraint &NewConstraint) const {
LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
@@ -1242,7 +1327,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
// check that |Delta| < iteration count
- if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ if (const SCEV *UpperBound =
+ collectUpperBound(CurSrcLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
const SCEV *AbsDelta =
@@ -1275,7 +1361,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
return true;
}
Result.DV[Level].Distance = SE->getConstant(Distance);
- NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+ NewConstraint.setDistance(SE->getConstant(Distance), CurSrcLoop,
+ CurDstLoop);
if (Distance.sgt(0))
Result.DV[Level].Direction &= Dependence::DVEntry::LT;
else if (Distance.slt(0))
@@ -1287,7 +1374,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
else if (Delta->isZero()) {
// since 0/X == 0
Result.DV[Level].Distance = Delta;
- NewConstraint.setDistance(Delta, CurLoop);
+ NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
++StrongSIVsuccesses;
}
@@ -1295,13 +1382,12 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
if (Coeff->isOne()) {
LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
Result.DV[Level].Distance = Delta; // since X/1 == X
- NewConstraint.setDistance(Delta, CurLoop);
+ NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
}
else {
Result.Consistent = false;
- NewConstraint.setLine(Coeff,
- SE->getNegativeSCEV(Coeff),
- SE->getNegativeSCEV(Delta), CurLoop);
+ NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff),
+ SE->getNegativeSCEV(Delta), CurSrcLoop, CurDstLoop);
}
// maybe we can get a useful direction
@@ -1360,8 +1446,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
// Return true if dependence disproved.
bool DependenceInfo::weakCrossingSIVtest(
const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
- const Loop *CurLoop, unsigned Level, FullDependence &Result,
- Constraint &NewConstraint, const SCEV *&SplitIter) const {
+ const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+ FullDependence &Result, Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
@@ -1372,7 +1459,7 @@ bool DependenceInfo::weakCrossingSIVtest(
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
- NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+ NewConstraint.setLine(Coeff, Coeff, Delta, CurSrcLoop, CurDstLoop);
if (Delta->isZero()) {
Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
@@ -1420,7 +1507,8 @@ bool DependenceInfo::weakCrossingSIVtest(
// We're certain that Delta > 0 and ConstCoeff > 0.
// Check Delta/(2*ConstCoeff) against ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
68d430c
to
f840335
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Effectively, with SeparateLoops=true, this will handle loops as if they are fused.
But it also means that the result is plain wrong for non-fused loops. E.g. the dependence distance d
computed by e.g. strongSIVtest
may result in d > 0
, where in reality all the instances of write occur before the read because their loops are sequential, not nested.
Another concern is that the analysis makes the decision on how to loop fusion occurs. The FuseLoops pass may want to fust loops with non-equal trip count, then it has to make the decision which iterations are executed with which ones. Even in cases where the trip count matches such as
for (int i = 0; i < n; +=1)
A[i+1] += 1;
for (int i = 0; i < n; +=1)
A[i] += 2;
loop fusion would optimally be
for (int i = 0; i < n+1; +=1) {
if (i > 0) A[i] += 1;
if (i < n) A[i] += 2;
}
or after LoopBoundSplitPass etc.
A[0] += 2;
for (int i = 0; i < n+1; +=1)
A[i] += 3;
A[n] += 1;
i.e. not as naive as DA would assume. Ideally, we would chose an approach that allows us to extend FuseLoops over time.
I think instead of a SeparateLoops
parameter, DA should receive the info which loops are considered to be fused from FuseLoops -- otherwise they might be disagree. "SeparateLoops" isn't a good name anyway. It goes pack to the old problem that we want to analyze the result of a optimization without the optimization having been applied first. It would be great if we could leave pass-specific concerns out of DA itself, it does not scale well with the number of passes, but I concede that sometimes it might be a pragmatic solution.
/// It's used to help calculate distinct loops referenced by the | ||
/// destination. Here's the map from loops to levels: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nit] Please avoid unelated changes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
f840335
to
3cfd2e0
Compare
Good point! To avoid the confusion among the original distances or directions and the new information we provide by this patch, now I will provide them in a different array. I added a new array of
Loop fusion or or any other optimization passes that want to use the analysis results can decide how to use it. For the case you mentioned, loop fusion can peel the iterations first to make the trip counts the same and then apply DA.
We prefer the DA to provide information across two loops, enabling loop fusion to identify dependencies before applying the optimization. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be clear: I don't yet think this is worth integrating for a special case of loop fusion, especially if we don't even have the corresponding PR that makes use of this yet.
;; for (long int i = 0; i < n; i++) { | ||
;; for (long int j = 0; j < n; j++) { | ||
;; for (long int k = 0; k < n; k++) { | ||
;; A[i][j][k] = i; | ||
;; } | ||
;; for (long int k = 0; k < n; k++) { | ||
;; *B++ = A[i + 3][j + 2][k + 1]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the same case as @p2
from PreliminaryNoValidityCheckFixedSize.ll ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is replaced with another test case in which there are two separate levels now.
// MIV is not handled yet on separate loops; check if there is any MIV test | ||
for (unsigned P = 0; P < Pairs; ++P) { | ||
Pair[P].Loops.resize(MaxLevels + 1); | ||
auto classification = classifyPair( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Style] Local variables start with capital letters; No Almost-Always-Auto
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
// a - 1 | ||
// b - 2 = CommonLevels | ||
// <c, e> - 3 : A SeparateLevel | ||
// d - 4 = SrcLevels | ||
// f - 6 | ||
// g - 7 = MaxLevels |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happened with level 5?
There are lots of references to the loop numbering scheme other than SIV that don't account for the change, e.g. getSplitIteration
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now I updated my changes such that it does not affect the loop numbering scheme anymore. Now this api will only provide an additional info which is SeparateLevels
.
auto classification = classifyPair( | ||
Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst, | ||
LI->getLoopFor(Dst->getParent()), Pair[P].Loops); | ||
if (classification == Subscript::MIV) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MIV
gets it special treatment here, with reverting to old scheme, but what about RDIV, ZIV?
classifyPair
itself relies on the old counting scheme, and will classify e.g. what was RDIV before to SIV.
There is also the case that with non-fused interpretation, we actually might be able to resolve dependencies, but do not with ConsiderSeparateLoops. What ensures that does not happen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SIV, RDIV or ZIV are able to handle pairs from different loops. RDIV and ZIV could do that before and SIV is able to handle it by this patch.
I cannot see a case where we may miss resolving any dependencies by considering separate loops optimization. Could you please clarify on that?
// the separate level extraction at the end of the depends api we have | ||
// a - 1 | ||
// b - 2 = CommonLevels | ||
// <c, e> - 3 : A SeparateLevel |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// <c, e> - 3 : A SeparateLevel | |
// <c, e> - 3 = SeparateLevels |
This introduced 3 different meanings of levels and it is not clear what interpretation at what point it the "correct" one. Better integrate into the text above, where SeparateLevels is zero unless ConsiderSeparateLoops.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed the confusion as mentioned in another comment above.
assert(Levels < Level && Level <= Levels + SeparateLevels && | ||
"Separate level out of range"); | ||
return DVSeparate[Level - Levels - 1].Direction; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not introduce a helper function that returns the correct index into DVSeparate
so you don't need the if/else here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comment. Added.
3cfd2e0
to
9d511f5
Compare
9d511f5
to
8e263a3
Compare
Thanks Michael for your time and comments. I tried to resolve them and provide a more clean patch along with changes applied to the loop fusion pass to use the info provided by DA. Please take a look and let me know what you think. I apologize for the delay, I was caught up with some other projects. |
b3df426
to
4420416
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are several unrelated formatting changes in there. If it is easer, we can commit a NFC clang-format change so you don't have to worry about clang-format introducing changes anymore.
With not seeing the loop fusion code, I meant to open a seaparate commit that modifies loop fusion, so this PR does mix two concerns. See https://llvm.org/docs/GitHub.html#stacked-pull-requests.
@@ -825,16 +874,23 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src, | |||
DstLoop = DstLoop->getParentLoop(); | |||
DstLevel--; | |||
} | |||
// find the first separate similar level |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This goes from innermost loops to outer ones, and stop at the innermost unequal one. But what if the second innermost is equal just the outermost is not:
for (int i = 0; i < 42; ++i) { // fusable
for (int j = 0; j < 21; ++j) { // not fusable
}
}
for (int i = 0; i < 42; ++i) { // fusable
for (int j = 0; j < 42; ++j) {
}
}
I think the i-loop should be comon levels, just the j-loops not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! It is fixed now in this code fragment.
/// SeparateLevels counts the number of loop levels after the common levels | ||
/// that are not identical but are considered similar. Two levels are | ||
/// considered similar if they have the same trip count and the same | ||
/// nesting depth. | ||
/// For example, if loops `c` and `e` are similar, then they contribute to | ||
/// the SeparateLevels count and SeparateLevels is set to 1. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not messing up with the level count is a good thing.
/// this loop will break this dependence. | ||
bool isPeelFirst(unsigned Level) const override; | ||
/// this loop will break this dependence. If Separate is set to true, | ||
/// information about a separate level is provided. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is nothing in the header file that even explains what a "separate level" actually is. Rather than having this sentence not saying anything here, consider central documentation whati t means, e.g. as class comment.
Could contain:
- Number of loops inside common loops that are "similar" (+definition of similar)
- Have different
llvm::Loop
objects but can be interpreted as a single fused loop withSeparate=true
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment is added in the header file.
@@ -20,7 +20,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { | |||
; CHECK-NEXT: Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8 | |||
; CHECK-NEXT: da analyze - none! | |||
; CHECK-NEXT: Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: %0 = load i64, ptr %arrayidx17, align 8 | |||
; CHECK-NEXT: da analyze - flow [-3 -2]! | |||
; CHECK-NEXT: da analyze - flow [-3 -2 / -1]! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rather than merging two DA interpretations into the same output, have you considered keeping the separate for better understandability. E.g.:
da analyze - flow [-3 -2]! / assuming 1 fused loop: [-3 -2 -1]!
Because of how FileCheck works, this test wouldn't even needed to be changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied.
540703a
to
17552ab
Compare
When there is a dependency between two memory instructions in separate loops, SIV will be able to test them and compute the direction and the distance of the dependency.
17552ab
to
21ef00f
Compare
Loop fusion pass will uses the information provided by DA to detect loop-carried dependencies and fuse the loops if it is legal.
A separate commit and PR has been created for the changes applied to the loop fusion pass. |
Do you mean 1997alireza#1 ? You already merged it into this PR so https://github.com/llvm/llvm-project/pull/128782/files includes the LoopFuse changes again. The point of Stacked PRs is to not merge them. |
When there is a dependency between two memory instructions in separate loops, SIV will be able to test them and compute the direction and the distance of the dependency. Loop fusion pass will uses this information to detect loop-carried dependencies and fuse the loops if it is legal.