-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[VPlan] Consistently use (Part, 0) for first lane scalar values #80271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f4dabdf
b08e892
f56e217
172dbf6
d2c51ec
82d74df
c6797e6
53f2937
a166da5
8b6fd60
865da64
f71f752
ddf5f75
d6538e0
6429fdb
89f7a80
567faea
d9760f1
d72a629
fa8f747
57b4229
8b48685
e038070
031df8e
4c2f243
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -259,9 +259,10 @@ struct VPTransformState { | |
DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars; | ||
} Data; | ||
|
||
/// Get the generated Value for the given VPValue \p Def and the given \p Part. | ||
/// \see set. | ||
Value *get(VPValue *Def, unsigned Part); | ||
/// Get the generated vector Value for a given VPValue \p Def and a given \p | ||
/// Part if \p IsScalar is false, otherwise return the generated scalar | ||
/// for \p Part. \See set. | ||
Value *get(VPValue *Def, unsigned Part, bool IsScalar = false); | ||
|
||
/// Get the generated Value for a given VPValue and given Part and Lane. | ||
Value *get(VPValue *Def, const VPIteration &Instance); | ||
|
@@ -282,14 +283,22 @@ struct VPTransformState { | |
I->second[Instance.Part][CacheIdx]; | ||
} | ||
|
||
/// Set the generated Value for a given VPValue and a given Part. | ||
void set(VPValue *Def, Value *V, unsigned Part) { | ||
/// Set the generated vector Value for a given VPValue and a given Part, if \p | ||
/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0). | ||
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) { | ||
if (IsScalar) { | ||
set(Def, V, VPIteration(Part, 0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can early return, leaving the rest intact, consistent with get() above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
return; | ||
} | ||
assert((VF.isScalar() || V->getType()->isVectorTy()) && | ||
"scalar values must be stored as (Part, 0)"); | ||
if (!Data.PerPartOutput.count(Def)) { | ||
DataState::PerPartValuesTy Entry(UF); | ||
Data.PerPartOutput[Def] = Entry; | ||
} | ||
Data.PerPartOutput[Def][Part] = V; | ||
} | ||
|
||
/// Reset an existing vector value for \p Def and a given \p Part. | ||
void reset(VPValue *Def, Value *V, unsigned Part) { | ||
auto Iter = Data.PerPartOutput.find(Def); | ||
|
@@ -1376,6 +1385,13 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { | |
|
||
/// Returns the result type of the cast. | ||
Type *getResultType() const { return ResultTy; } | ||
|
||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
// At the moment, only uniform codegen is implemented. | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return true; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be pushed separately but only testable with rest of patch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly. |
||
}; | ||
|
||
/// A recipe for widening Call instructions. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -279,11 +279,12 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
Builder.SetCurrentDebugLocation(getDebugLoc()); | ||
|
||
if (Instruction::isBinaryOp(getOpcode())) { | ||
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); | ||
if (Part != 0 && vputils::onlyFirstPartUsed(this)) | ||
return State.get(this, 0); | ||
return State.get(this, 0, OnlyFirstLaneUsed); | ||
|
||
Value *A = State.get(getOperand(0), Part); | ||
Value *B = State.get(getOperand(1), Part); | ||
Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); | ||
Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); | ||
auto *Res = | ||
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); | ||
if (auto *I = dyn_cast<Instruction>(Res)) | ||
|
@@ -385,8 +386,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
if (Part != 0) | ||
return nullptr; | ||
// First create the compare. | ||
Value *IV = State.get(getOperand(0), Part); | ||
Value *TC = State.get(getOperand(1), Part); | ||
Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true); | ||
Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true); | ||
Value *Cond = Builder.CreateICmpEQ(IV, TC); | ||
|
||
// Now create the branch. | ||
|
@@ -407,7 +408,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
} | ||
case VPInstruction::ComputeReductionResult: { | ||
if (Part != 0) | ||
return State.get(this, 0); | ||
return State.get(this, 0, /*IsScalar*/ true); | ||
|
||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
|
@@ -424,7 +425,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
Type *PhiTy = OrigPhi->getType(); | ||
VectorParts RdxParts(State.UF); | ||
for (unsigned Part = 0; Part < State.UF; ++Part) | ||
RdxParts[Part] = State.get(LoopExitingDef, Part); | ||
RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop()); | ||
|
||
// If the vector reduction can be performed in a smaller type, we truncate | ||
// then extend the loop exit value to enable InstCombine to evaluate the | ||
|
@@ -512,9 +513,15 @@ void VPInstruction::execute(VPTransformState &State) { | |
if (!hasResult()) | ||
continue; | ||
assert(GeneratedValue && "generateInstruction must produce a value"); | ||
State.set(this, GeneratedValue, Part); | ||
|
||
bool IsVector = GeneratedValue->getType()->isVectorTy(); | ||
State.set(this, GeneratedValue, Part, !IsVector); | ||
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult || | ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) && | ||
"scalar value but not only first lane used"); | ||
} | ||
} | ||
|
||
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | ||
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); | ||
if (Instruction::isBinaryOp(getOpcode())) | ||
|
@@ -530,8 +537,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case VPInstruction::CalculateTripCountMinusVF: | ||
case VPInstruction::CanonicalIVIncrementForPart: | ||
case VPInstruction::BranchOnCount: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add case ComputeReductionResult? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ComputeReductionResult combines the partial reduction vector values, so it should use more than the first lane per operand. It produces a single scalar value though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, right. Got confused with onlyFirstLaneDefined. |
||
// TODO: Cover additional operands. | ||
return getOperand(0) == Op; | ||
return true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine, can probably be pushed separately, if testable. (written before the separate push ;-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left as is for now, as this only is needed for this patch at the moment. |
||
}; | ||
llvm_unreachable("switch should return"); | ||
} | ||
|
@@ -1344,7 +1350,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { | |
PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); | ||
} | ||
|
||
State.set(this, PartPtr, Part); | ||
State.set(this, PartPtr, Part, /*IsScalar*/ true); | ||
} | ||
} | ||
|
||
|
@@ -1640,7 +1646,7 @@ void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { | |
EntryPart->addIncoming(Start, VectorPH); | ||
EntryPart->setDebugLoc(getDebugLoc()); | ||
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) | ||
State.set(this, EntryPart, Part); | ||
State.set(this, EntryPart, Part, /*IsScalar*/ true); | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
@@ -1711,7 +1717,7 @@ void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, | |
#endif | ||
|
||
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { | ||
Value *CanonicalIV = State.get(getOperand(0), 0); | ||
Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true); | ||
Type *STy = CanonicalIV->getType(); | ||
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); | ||
ElementCount VF = State.VF; | ||
|
@@ -1801,7 +1807,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { | |
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { | ||
Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi"); | ||
EntryPart->insertBefore(HeaderBB->getFirstInsertionPt()); | ||
State.set(this, EntryPart, Part); | ||
State.set(this, EntryPart, Part, IsInLoop); | ||
} | ||
|
||
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); | ||
|
@@ -1833,7 +1839,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { | |
} | ||
|
||
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { | ||
Value *EntryPart = State.get(this, Part); | ||
Value *EntryPart = State.get(this, Part, IsInLoop); | ||
// Make sure to add the reduction start value only to the | ||
// first unroll part. | ||
Value *StartVal = (Part == 0) ? StartV : Iden; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!