-
Notifications
You must be signed in to change notification settings - Fork 14k
[LV][EVL] Generate negative strided load/store for reversed load/store #123608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3f572c0
627fbaa
f4f50e6
65cd8a0
830272f
498d8e7
e21dcdb
9562a3f
4e4e4f2
bd2f195
807de0d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2603,17 +2603,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, | |
} | ||
#endif | ||
|
||
/// Use all-true mask for reverse rather than actual mask, as it avoids a | ||
/// dependence w/o affecting the result. | ||
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand, | ||
Value *EVL, const Twine &Name) { | ||
VectorType *ValTy = cast<VectorType>(Operand->getType()); | ||
Value *AllTrueMask = | ||
Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue()); | ||
return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse, | ||
{Operand, AllTrueMask, EVL}, nullptr, Name); | ||
} | ||
|
||
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { | ||
auto *LI = cast<LoadInst>(&Ingredient); | ||
|
||
|
@@ -2628,18 +2617,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { | |
Value *EVL = State.get(getEVL(), VPLane(0)); | ||
Value *Addr = State.get(getAddr(), !CreateGather); | ||
Value *Mask = nullptr; | ||
if (VPValue *VPMask = getMask()) { | ||
if (VPValue *VPMask = getMask()) | ||
Mask = State.get(VPMask); | ||
if (isReverse()) | ||
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); | ||
} else { | ||
else | ||
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); | ||
} | ||
|
||
if (CreateGather) { | ||
NewLI = | ||
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL}, | ||
nullptr, "wide.masked.gather"); | ||
} else if (isReverse()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have test TSCV
|
||
auto *EltTy = DataTy->getElementType(); | ||
auto *PtrTy = Addr->getType(); | ||
Value *Operands[] = { | ||
Addr, | ||
ConstantInt::getSigned(Builder.getInt32Ty(), | ||
-LI->getDataLayout().getTypeAllocSize(EltTy)), | ||
Mask, EVL}; | ||
NewLI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, | ||
{DataTy, PtrTy, Builder.getInt32Ty()}, | ||
Operands, nullptr, "vp.reverse.load"); | ||
} else { | ||
VectorBuilder VBuilder(Builder); | ||
VBuilder.setEVL(EVL).setMask(Mask); | ||
|
@@ -2650,8 +2647,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { | |
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment)); | ||
State.addMetadata(NewLI, LI); | ||
Instruction *Res = NewLI; | ||
if (isReverse()) | ||
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse"); | ||
State.set(this, Res); | ||
} | ||
|
||
|
@@ -2670,14 +2665,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, | |
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); | ||
unsigned AS = | ||
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient)); | ||
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( | ||
Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); | ||
if (!Reverse) | ||
return Cost; | ||
return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, | ||
AS, Ctx.CostKind); | ||
|
||
return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, | ||
cast<VectorType>(Ty), {}, Ctx.CostKind, | ||
0); | ||
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, | ||
getAddr()->getUnderlyingValue(), false, | ||
Alignment, Ctx.CostKind); | ||
Comment on lines
+2672
to
+2674
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What will happen if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then the cost of VPlan will be invalid and we emit a marker? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Emitting Invalid in computeCost will cause the plan to be discarded, making vectorization impossible. At this stage, if we discover that the target does not support strided memory accesses, it is too late to fall back to using widen load + reverse for vectorizing reverse accesses. Additionally, on targets where strided memory accesses have worse performance, this would increase the vectorization cost, potentially leading to vectorization being abandoned. |
||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
@@ -2749,21 +2743,29 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { | |
CallInst *NewSI = nullptr; | ||
Value *StoredVal = State.get(StoredValue); | ||
Value *EVL = State.get(getEVL(), VPLane(0)); | ||
if (isReverse()) | ||
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); | ||
Value *Mask = nullptr; | ||
if (VPValue *VPMask = getMask()) { | ||
if (VPValue *VPMask = getMask()) | ||
Mask = State.get(VPMask); | ||
if (isReverse()) | ||
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); | ||
} else { | ||
else | ||
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); | ||
} | ||
|
||
Value *Addr = State.get(getAddr(), !CreateScatter); | ||
if (CreateScatter) { | ||
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()), | ||
Intrinsic::vp_scatter, | ||
{StoredVal, Addr, Mask, EVL}); | ||
} else if (isReverse()) { | ||
wangpc-pp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Type *StoredValTy = StoredVal->getType(); | ||
auto *EltTy = cast<VectorType>(StoredValTy)->getElementType(); | ||
auto *PtrTy = Addr->getType(); | ||
Value *Operands[] = { | ||
StoredVal, Addr, | ||
ConstantInt::getSigned(Builder.getInt32Ty(), | ||
-SI->getDataLayout().getTypeAllocSize(EltTy)), | ||
Mask, EVL}; | ||
NewSI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store, | ||
{StoredValTy, PtrTy, Builder.getInt32Ty()}, | ||
Operands); | ||
wangpc-pp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} else { | ||
VectorBuilder VBuilder(Builder); | ||
VBuilder.setEVL(EVL).setMask(Mask); | ||
|
@@ -2791,14 +2793,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, | |
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); | ||
unsigned AS = | ||
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient)); | ||
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( | ||
Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); | ||
if (!Reverse) | ||
wangpc-pp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return Cost; | ||
return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, | ||
AS, Ctx.CostKind); | ||
|
||
return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, | ||
cast<VectorType>(Ty), {}, Ctx.CostKind, | ||
0); | ||
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, | ||
getAddr()->getUnderlyingValue(), false, | ||
Alignment, Ctx.CostKind); | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
Uh oh!
There was an error while loading. Please reload this page.