Skip to content

Commit

Permalink
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe
Browse files Browse the repository at this point in the history
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the
runtime VF, similar to #95305.

Since only reverse vector pointers require the runtime VF, the patch
sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector
pointers. As a result, the generation of reverse vector pointers is
moved into a separate recipe.
  • Loading branch information
arcbbb committed Oct 3, 2024
1 parent 6c331e5 commit 24ec15c
Show file tree
Hide file tree
Showing 12 changed files with 314 additions and 256 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
Expand Down Expand Up @@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
auto *VectorPtr = new VPVectorPointerRecipe(
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
I->getDebugLoc());
VPSingleDefRecipe *VectorPtr;
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->isInBounds() : false,
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
Expand Down
63 changes: 55 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
Expand Down Expand Up @@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}

Expand Down Expand Up @@ -1785,20 +1787,65 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};

/// A recipe to compute the pointers for widened memory accesses of IndexTy
/// in reverse order per part.
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<2> {
Type *IndexedTy;

public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
bool IsInBounds, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
ArrayRef<VPValue *>({Ptr, VF}),
GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)

VPValue *getVFValue() { return getOperand(1); }
const VPValue *getVFValue() const { return getOperand(1); }

void execute(VPTransformState &State) override;

bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}

/// Returns true if the recipe only uses the first part of operand \p Op.
bool onlyFirstPartUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
return true;
}

VPReverseVectorPointerRecipe *clone() override {
return new VPReverseVectorPointerRecipe(
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// A recipe to compute the pointers for widened memory accesses of IndexTy for
/// all parts. If IsReverse is true, compute pointers for accessing the input in
/// reverse order per part.
/// all parts.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
bool IsReverse;

public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
bool IsInBounds, DebugLoc DL)
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)

Expand All @@ -1819,8 +1866,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
isInBounds(), getDebugLoc());
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
[this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
Expand Down
65 changes: 43 additions & 22 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1813,38 +1813,61 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Type *IndexTy = State.VF.isScalable()
? DL.getIndexType(IndexedTy->getPointerTo())
: Builder.getInt32Ty();
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();

// the wide store needs to start at the last vector element.
// RunTimeVF = VScale * VF.getKnownMinValue()
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
if (IndexTy != RunTimeVF->getType())
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
// NumElt = -CurrentPart * RunTimeVF
Value *NumElt = Builder.CreateMul(
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);

State.set(this, ResultPtr, /*IsScalar*/ true);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << " = reverse-vector-pointer ";
printOperands(O, SlotTracker);
}
#endif

void VPVectorPointerRecipe ::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0)
? DL.getIndexType(Builder.getPtrTy(0))
: Builder.getInt32Ty();
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();

Value *ResultPtr = nullptr;
if (IsReverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
// RunTimeVF = VScale * VF.getKnownMinValue()
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
// NumElt = -CurrentPart * RunTimeVF
Value *NumElt = Builder.CreateMul(
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
// LastLane = 1 - RunTimeVF
Value *LastLane =
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
} else {
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
}
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand All @@ -1855,8 +1878,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
if (IsReverse)
O << "(reverse) ";

printOperands(O, SlotTracker);
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
VPVectorPointerRecipe>(Copy) ||
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));

if (isa<VPVectorPointerRecipe>(R))
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
Expand Down
Loading

0 comments on commit 24ec15c

Please sign in to comment.