Skip to content

Commit 7b66981

Browse files
committed
[LV][VPlan] For reverse iterative continuous load and store access, use unit load/store
1 parent b6d5fa0 commit 7b66981

File tree

2 files changed

+50
-41
lines changed

2 files changed

+50
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7487,6 +7487,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
74877487
// comparing against the legacy cost isn't desirable.
74887488
if (isa<VPPartialReductionRecipe>(&R))
74897489
return true;
7490+
7491+
// The VPlan-based cost model may calculate the cost of strided load/store
7492+
// which can't be modeled in the legacy cost model.
7493+
if (isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(&R))
7494+
if (cast<VPWidenMemoryRecipe>(&R)->isReverse())
7495+
return true;
7496+
74907497
if (Instruction *UI = GetInstructionForCost(&R))
74917498
SeenInstrs.insert(UI);
74927499
}
@@ -8344,7 +8351,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83448351
auto *GEP = dyn_cast<GetElementPtrInst>(
83458352
Ptr->getUnderlyingValue()->stripPointerCasts());
83468353
VPSingleDefRecipe *VectorPtr;
8347-
if (Reverse) {
8354+
if (Reverse && !CM.foldTailWithEVL()) {
83488355
// When folding the tail, we may compute an address that we don't in the
83498356
// original scalar loop and it may not be inbounds. Drop Inbounds in that
83508357
// case.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,17 +2640,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
26402640
}
26412641
#endif
26422642

2643-
/// Use all-true mask for reverse rather than actual mask, as it avoids a
2644-
/// dependence w/o affecting the result.
2645-
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
2646-
Value *EVL, const Twine &Name) {
2647-
VectorType *ValTy = cast<VectorType>(Operand->getType());
2648-
Value *AllTrueMask =
2649-
Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2650-
return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2651-
{Operand, AllTrueMask, EVL}, nullptr, Name);
2652-
}
2653-
26542643
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26552644
auto *LI = cast<LoadInst>(&Ingredient);
26562645

@@ -2665,19 +2654,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26652654
Value *EVL = State.get(getEVL(), VPLane(0));
26662655
Value *Addr = State.get(getAddr(), !CreateGather);
26672656
Value *Mask = nullptr;
2668-
if (VPValue *VPMask = getMask()) {
2657+
if (VPValue *VPMask = getMask())
26692658
Mask = State.get(VPMask);
2670-
if (isReverse())
2671-
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2672-
} else {
2659+
else
26732660
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2674-
}
26752661

26762662
if (CreateGather) {
26772663
NewLI =
26782664
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
26792665
nullptr, "wide.masked.gather");
26802666
} else {
2667+
if (isReverse()) {
2668+
auto *EltTy = DataTy->getElementType();
2669+
// if (EltTy->getScalarSizeInBits() !=
2670+
// EVL->getType()->getScalarSizeInBits())
2671+
// EVL = ConstantInt::getSigned(EVL->getType(),
2672+
// static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2673+
auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
2674+
Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
2675+
Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
2676+
}
26812677
VectorBuilder VBuilder(Builder);
26822678
VBuilder.setEVL(EVL).setMask(Mask);
26832679
NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2686,10 +2682,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26862682
NewLI->addParamAttr(
26872683
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
26882684
State.addMetadata(NewLI, LI);
2689-
Instruction *Res = NewLI;
2690-
if (isReverse())
2691-
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2692-
State.set(this, Res);
2685+
State.set(this, NewLI);
26932686
}
26942687

26952688
InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
@@ -2707,14 +2700,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
27072700
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
27082701
unsigned AS =
27092702
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2710-
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
2711-
Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
27122703
if (!Reverse)
2713-
return Cost;
2704+
return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2705+
AS, Ctx.CostKind);
27142706

2715-
return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
2716-
cast<VectorType>(Ty), {}, Ctx.CostKind,
2717-
0);
2707+
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
2708+
getAddr()->getUnderlyingValue(), false,
2709+
Alignment, Ctx.CostKind);
27182710
}
27192711

27202712
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2775,7 +2767,8 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
27752767

27762768
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27772769
auto *SI = cast<StoreInst>(&Ingredient);
2778-
2770+
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2771+
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
27792772
VPValue *StoredValue = getStoredValue();
27802773
bool CreateScatter = !isConsecutive();
27812774
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2786,22 +2779,32 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27862779
CallInst *NewSI = nullptr;
27872780
Value *StoredVal = State.get(StoredValue);
27882781
Value *EVL = State.get(getEVL(), VPLane(0));
2789-
if (isReverse())
2790-
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
27912782
Value *Mask = nullptr;
2792-
if (VPValue *VPMask = getMask()) {
2783+
if (VPValue *VPMask = getMask())
27932784
Mask = State.get(VPMask);
2794-
if (isReverse())
2795-
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2796-
} else {
2785+
else
27972786
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2798-
}
2787+
27992788
Value *Addr = State.get(getAddr(), !CreateScatter);
28002789
if (CreateScatter) {
28012790
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
28022791
Intrinsic::vp_scatter,
28032792
{StoredVal, Addr, Mask, EVL});
28042793
} else {
2794+
if (isReverse()) {
2795+
auto *EltTy = DataTy->getElementType();
2796+
// FIXME: we may need not deal with the size, the InstCombine will deal
2797+
// with the Offset Type if (EltTy->getScalarSizeInBits() !=
2798+
// EVL->getType()->getScalarSizeInBits())
2799+
// EVL = ConstantInt::getSigned(EVL->getType(),
2800+
// static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2801+
auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
2802+
// Value *Offset =
2803+
// Builder.CreateSub(State.Builder.getIntN(EVL->getType()->getScalarSizeInBits(),
2804+
// 1), EVL);
2805+
Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
2806+
Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
2807+
}
28052808
VectorBuilder VBuilder(Builder);
28062809
VBuilder.setEVL(EVL).setMask(Mask);
28072810
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2828,14 +2831,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
28282831
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
28292832
unsigned AS =
28302833
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2831-
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
2832-
Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
28332834
if (!Reverse)
2834-
return Cost;
2835+
return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2836+
AS, Ctx.CostKind);
28352837

2836-
return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
2837-
cast<VectorType>(Ty), {}, Ctx.CostKind,
2838-
0);
2838+
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
2839+
getAddr()->getUnderlyingValue(), false,
2840+
Alignment, Ctx.CostKind);
28392841
}
28402842

28412843
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

0 commit comments

Comments
 (0)