-
Notifications
You must be signed in to change notification settings - Fork 11.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Use pointer to member 0 as VPInterleaveRecipe's pointer arg. #106431
Changes from 9 commits
ea52fe4
f23772b
5ad888a
d2fd005
3c896cb
0f2bdc0
e7c09a9
3f64c75
0b10868
45a5ea1
8a3cd0e
a361393
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -972,7 +972,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { | |||||
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {} | ||||||
}; | ||||||
|
||||||
protected: | ||||||
struct GEPFlagsTy { | ||||||
char IsInBounds : 1; | ||||||
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {} | ||||||
|
@@ -1323,6 +1322,13 @@ class VPInstruction : public VPRecipeWithIRFlags, | |||||
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint"); | ||||||
} | ||||||
|
||||||
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags = {false}, | ||||||
DebugLoc DL = {}, const Twine &Name = "") | ||||||
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, | ||||||
ArrayRef<VPValue *>({Ptr, Offset}), | ||||||
artagnon marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
GEPFlagsTy(Flags), DL), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||
Opcode(VPInstruction::PtrAdd), Name(Name.str()) {} | ||||||
|
||||||
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, | ||||||
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = ""); | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -653,7 +653,8 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
"can only generate first lane for PtrAdd"); | ||
Value *Ptr = State.get(getOperand(0), /* IsScalar */ true); | ||
Value *Addend = State.get(getOperand(1), /* IsScalar */ true); | ||
return Builder.CreatePtrAdd(Ptr, Addend, Name); | ||
return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name) | ||
: Builder.CreatePtrAdd(Ptr, Addend, Name); | ||
} | ||
case VPInstruction::ResumePhi: { | ||
Value *IncomingFromVPlanPred = | ||
|
@@ -2478,51 +2479,37 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { | |
unsigned InterleaveFactor = Group->getFactor(); | ||
auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor); | ||
|
||
// Prepare for the new pointers. | ||
unsigned Index = Group->getIndex(Instr); | ||
|
||
// TODO: extend the masked interleaved-group support to reversed access. | ||
VPValue *BlockInMask = getMask(); | ||
assert((!BlockInMask || !Group->isReverse()) && | ||
"Reversed masked interleave-group not supported."); | ||
|
||
Value *Idx; | ||
Value *Index; | ||
// If the group is reverse, adjust the index to refer to the last vector lane | ||
// instead of the first. We adjust the index from the first vector lane, | ||
// rather than directly getting the pointer for lane VF - 1, because the | ||
// pointer operand of the interleaved access is supposed to be uniform. | ||
if (Group->isReverse()) { | ||
Value *RuntimeVF = | ||
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); | ||
Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); | ||
Idx = State.Builder.CreateMul(Idx, | ||
State.Builder.getInt32(Group->getFactor())); | ||
Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index)); | ||
Idx = State.Builder.CreateNeg(Idx); | ||
} else | ||
Idx = State.Builder.getInt32(-Index); | ||
Index = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); | ||
Index = State.Builder.CreateMul(Index, | ||
State.Builder.getInt32(Group->getFactor())); | ||
Index = State.Builder.CreateNeg(Index); | ||
} else { | ||
// TODO: Drop redundant 0-index GEP as follow-up. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, better keep Idx null if not needed, to refrain from generating a redundant gep with zero. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could pull in here at the cost of a number of additional test changes or land separately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your call; generating gep with zero also causes some test discrepancies. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I'll leave it as is for now, then drop separately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropped in 3ec6f80 |
||
Index = State.Builder.getInt32(0); | ||
} | ||
|
||
VPValue *Addr = getAddr(); | ||
Value *ResAddr = State.get(Addr, VPIteration(0, 0)); | ||
if (auto *I = dyn_cast<Instruction>(ResAddr)) | ||
State.setDebugLocFrom(I->getDebugLoc()); | ||
|
||
// Notice current instruction could be any index. Need to adjust the address | ||
// to the member of index 0. | ||
// | ||
// E.g. a = A[i+1]; // Member of index 1 (Current instruction) | ||
// b = A[i]; // Member of index 0 | ||
// Current pointer is pointed to A[i+1], adjust it to A[i]. | ||
// | ||
// E.g. A[i+1] = a; // Member of index 1 | ||
// A[i] = b; // Member of index 0 | ||
// A[i+2] = c; // Member of index 2 (Current instruction) | ||
// Current pointer is pointed to A[i+2], adjust it to A[i]. | ||
|
||
bool InBounds = false; | ||
if (auto *gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts())) | ||
InBounds = gep->isInBounds(); | ||
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Idx, "", InBounds); | ||
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds); | ||
|
||
State.setDebugLocFrom(Instr->getDebugLoc()); | ||
Value *PoisonVec = PoisonValue::get(VecTy); | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1584,14 +1584,19 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( | |||||
} | ||||||
|
||||||
void VPlanTransforms::createInterleaveGroups( | ||||||
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> &InterleaveGroups, | ||||||
VPlan &Plan, | ||||||
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> | ||||||
&InterleaveGroups, | ||||||
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed) { | ||||||
if (InterleaveGroups.empty()) | ||||||
return; | ||||||
|
||||||
// Interleave memory: for each Interleave Group we marked earlier as relevant | ||||||
// for this VPlan, replace the Recipes widening its memory instructions with a | ||||||
// single VPInterleaveRecipe at its insertion point. | ||||||
VPDominatorTree VPDT; | ||||||
VPDT.recalculate(Plan); | ||||||
for (const auto *IG : InterleaveGroups) { | ||||||
auto *Recipe = | ||||||
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos())); | ||||||
SmallVector<VPValue *, 4> StoredValues; | ||||||
for (unsigned i = 0; i < IG->getFactor(); ++i) | ||||||
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) { | ||||||
|
@@ -1601,9 +1606,38 @@ void VPlanTransforms::createInterleaveGroups( | |||||
|
||||||
bool NeedsMaskForGaps = | ||||||
IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed; | ||||||
auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues, | ||||||
Recipe->getMask(), NeedsMaskForGaps); | ||||||
VPIG->insertBefore(Recipe); | ||||||
|
||||||
Instruction *IRInsertPos = IG->getInsertPos(); | ||||||
auto *InsertPos = | ||||||
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos)); | ||||||
|
||||||
// Get or create the start address for the interleave group. | ||||||
auto *Start = | ||||||
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0))); | ||||||
VPValue *Addr = Start->getAddr(); | ||||||
if (!VPDT.properlyDominates(Addr->getDefiningRecipe(), InsertPos)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then hoist Addr's defining recipe to insert pos, possibly hoisting additional defining recipes as needed? And/or sink loads above member zero to join it. In general, all members of an interleave group conceptually move to its insert pos, so may as well perform actual movement. This should facilitate subsequent SLP'ing. Interleave stores are inserted at the last member, so the address of any member should be available there. Interleaved loads are inserted at the first member, so only its address is guaranteed to be available there, although others may as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, should be done as potential follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, worth leaving behind a TODO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also handle(s) a live-in Addr? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done by checking if there's a defining recipe, thanks! |
||||||
bool InBounds = false; | ||||||
if (auto *Gep = dyn_cast<GetElementPtrInst>( | ||||||
getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts())) | ||||||
InBounds = Gep->isInBounds(); | ||||||
|
||||||
// We cannot re-use the address of the first member because it does not | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(may be confusing: insertion point appears "first" (for loads, last for stores); here we mean "first" in terms of memory addresses.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||
// dominate the insert position. Use the address of the insert position | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated thanks. |
||||||
// and create a PtrAdd to adjust the index to start at the first member. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||
APInt Offset(32, | ||||||
getLoadStoreType(IRInsertPos)->getScalarSizeInBits() / 8 * | ||||||
IG->getIndex(IRInsertPos), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth asserting Offset or index of IRInsertPos is non zero? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added assert, thanks! |
||||||
/*IsSigned=*/true); | ||||||
VPValue *OffsetVPV = Plan.getOrAddLiveIn( | ||||||
ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset)); | ||||||
VPBuilder B(InsertPos); | ||||||
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV) | ||||||
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV); | ||||||
} | ||||||
auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, | ||||||
InsertPos->getMask(), NeedsMaskForGaps); | ||||||
VPIG->insertBefore(InsertPos); | ||||||
|
||||||
unsigned J = 0; | ||||||
for (unsigned i = 0; i < IG->getFactor(); ++i) | ||||||
if (Instruction *Member = IG->getMember(i)) { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
default unneeded and may lead to potential ambiguation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!