Skip to content

[VPlan] Replace VPWidenCastRecipe by VPInstructionWithType (NFC) (WIP). #129712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,13 @@ class VPBuilder {
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
}

VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy, DebugLoc DL) {
return tryInsertInstruction(
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
}

VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
VPInstructionWithType *createCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy, DebugLoc DL = {},
const Twine &Name = "",
Instruction *CI = nullptr) {
auto *VPI = new VPInstructionWithType(Opcode, {Op}, ResultTy, DL, Name);
VPI->setUnderlyingValue(CI);
return tryInsertInstruction(VPI);
}

VPScalarIVStepsRecipe *
Expand Down
48 changes: 30 additions & 18 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4434,8 +4434,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
[](const auto *R) { return Instruction::Load; })
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
[](const auto *R) { return Instruction::Call; })
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe>(
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe>(
[](const auto *R) { return R->getOpcode(); })
.Case<VPInterleaveRecipe>([](const VPInterleaveRecipe *R) {
return R->getStoredValues().empty() ? Instruction::Load
Expand Down Expand Up @@ -4496,15 +4495,11 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
if (EphemeralRecipes.contains(&R))
continue;
// Continue early if the recipe is considered to not produce a vector
// result. Note that this includes VPInstruction where some opcodes may
// produce a vector, to preserve existing behavior as VPInstructions model
// aspects not directly mapped to existing IR instructions.
// result.
switch (R.getVPDefID()) {
case VPDef::VPDerivedIVSC:
case VPDef::VPScalarIVStepsSC:
case VPDef::VPScalarCastSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPReverseVectorPointerSC:
Expand All @@ -4517,7 +4512,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPActiveLaneMaskPHISC:
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
Expand All @@ -4534,6 +4528,15 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenStoreEVLSC:
case VPDef::VPWidenStoreSC:
break;
case VPDef::VPInstructionSC: {
// Note that for VPInstruction some opcodes may produce a vector. To
// preserve existing behavior only consider them vector-generating if
// they are casts with an underlying value.
if (Instruction::isCast(cast<VPInstruction>(&R)->getOpcode()) &&
R.getVPSingleValue()->getUnderlyingValue())
break;
continue;
}
default:
llvm_unreachable("unhandled recipe");
}
Expand Down Expand Up @@ -8938,8 +8941,15 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
}

if (auto *CI = dyn_cast<CastInst>(Instr)) {
return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(),
*CI);
auto *VPI =
isa<PossiblyNonNegInst>(CI)
? new VPInstructionWithType(CI->getOpcode(), {Operands[0]},
CI->getType(), {CI->hasNonNeg()}, {})
: new VPInstructionWithType(CI->getOpcode(), {Operands[0]},
CI->getType(), {});

VPI->setUnderlyingValue(CI);
return VPI;
}

return tryToWiden(Instr, Operands);
Expand Down Expand Up @@ -9061,9 +9071,9 @@ static VPInstruction *addResumePhiRecipeForInduction(
// the widest induction) and thus may be wider than the induction here.
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
ScalarTypeOfWideIV,
WideIV->getDebugLoc());
EndValue =
VectorPHBuilder.createCast(Instruction::Trunc, EndValue,
ScalarTypeOfWideIV, WideIV->getDebugLoc());
}

auto *ResumePhiRecipe =
Expand Down Expand Up @@ -9861,12 +9871,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RdxDesc.getRecurrenceKind())) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
auto *Trunc = new VPInstructionWithType(Instruction::Trunc, NewExitingVPV,
RdxTy, {});
auto *Extnd =
RdxDesc.isSigned()
? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
: new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
? new VPInstructionWithType(Instruction::SExt, Trunc, PhiTy, {})
: new VPInstructionWithType(Instruction::ZExt, Trunc, PhiTy, {});

Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
Extnd->insertAfter(Trunc);
Expand Down Expand Up @@ -10396,8 +10406,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
isa<VPScalarCastRecipe>(U) ||
isa<VPDerivedIVRecipe>(U) ||
Instruction::isCast(
cast<VPInstruction>(U)->getOpcode()) ||

cast<VPInstruction>(U)->getOpcode() ==
Instruction::Add;
}) &&
Expand Down
177 changes: 71 additions & 106 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
Expand All @@ -533,7 +532,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPScalarCastSC:
case VPRecipeBase::VPScalarPHISC:
case VPRecipeBase::VPPartialReductionSC:
return true;
Expand Down Expand Up @@ -599,13 +597,15 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};

struct NonNegFlagsTy {
char NonNeg : 1;
NonNegFlagsTy(bool IsNonNeg = false) : NonNeg(IsNonNeg) {}
};

private:
struct ExactFlagsTy {
char IsExact : 1;
};
struct NonNegFlagsTy {
char NonNeg : 1;
};
struct FastMathFlagsTy {
char AllowReassoc : 1;
char NoNaNs : 1;
Expand Down Expand Up @@ -699,6 +699,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
DisjointFlags(DisjointFlags) {}

template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
NonNegFlagsTy NonNegFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::NonNegOp),
NonNegFlags(NonNegFlags) {}

protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
Expand All @@ -711,7 +717,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
Expand Down Expand Up @@ -954,6 +959,12 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");

VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
NonNegFlagsTy NonNegFlags, DebugLoc DL = {},
const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, NonNegFlags, DL),
Opcode(Opcode), Name(Name.str()) {}

VP_CLASSOF_IMPL(VPDef::VPInstructionSC)

VPInstruction *clone() override {
Expand Down Expand Up @@ -1026,6 +1037,60 @@ class VPInstruction : public VPRecipeWithIRFlags,
StringRef getName() const { return Name; }
};

/// A specialization of VPInstruction augmenting it with a dedicated result
/// type, to be used when the opcode and operands of the VPInstruction don't
/// directly determine the result type.
class VPInstructionWithType : public VPInstruction {
/// Scalar result type produced by the recipe.
Type *ResultTy;

Value *generate(VPTransformState &State);

public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}

VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, NonNegFlagsTy Flags, DebugLoc DL,
const Twine &Name = "")
: VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}

static inline bool classof(const VPRecipeBase *R) {
auto *VPI = dyn_cast<VPInstruction>(R);
return VPI && Instruction::isCast(VPI->getOpcode());
}

static inline bool classof(const VPUser *R) {
return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
}

VPInstruction *clone() override {
auto *New =
new VPInstructionWithType(getOpcode(), {getOperand(0)}, getResultType(),
{}, getDebugLoc(), getName());
New->setUnderlyingValue(getUnderlyingValue());
New->transferFlags(*this);
return New;
}

void execute(VPTransformState &State) override;

/// Return the cost of this VPIRInstruction.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Type *getResultType() const { return ResultTy; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

bool onlyFirstLaneUsed(const VPValue *Op) const override;
};

/// A recipe to wrap on original IR instruction not to be modified during
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
/// and it is used to add a new incoming value for the single predecessor VPBB.
Expand Down Expand Up @@ -1131,106 +1196,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
#endif
};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
Instruction::CastOps Opcode;

/// Result type for the cast.
Type *ResultTy;

public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
CastInst &UI)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}

VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}

~VPWidenCastRecipe() override = default;

VPWidenCastRecipe *clone() override {
if (auto *UV = getUnderlyingValue())
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
*cast<CastInst>(UV));

return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}

VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)

/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenCastRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

Instruction::CastOps getOpcode() const { return Opcode; }

/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }
};

/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;

Type *ResultTy;

Value *generate(VPTransformState &State);

public:
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
DebugLoc DL)
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
ResultTy(ResultTy) {}

~VPScalarCastRecipe() override = default;

VPScalarCastRecipe *clone() override {
return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)

void execute(VPTransformState &State) override;

/// Return the cost of this VPScalarCastRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
// TODO: Compute accurate cost after retiring the legacy cost model.
return 0;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }

bool onlyFirstLaneUsed(const VPValue *Op) const override {
// At the moment, only uniform codegen is implemented.
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
};

/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,20 +252,15 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe>(
[](const auto *R) { return R->getResultType(); })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
return R->getResultType();
})
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
})
.Case<VPWidenCastRecipe>(
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
.Case<VPScalarCastRecipe>(
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
return R->getSCEV()->getType();
})
Expand Down
Loading
Loading