Skip to content

[VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI). #98764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4356,8 +4356,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
SmallVector<RecipeVFPair> InvalidCosts;
for (const auto &Plan : VPlans) {
for (ElementCount VF : Plan->vectorFactors()) {
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx,
CM);
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
LLVMCtx, CM);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (auto &R : *VPBB) {
Expand Down Expand Up @@ -7064,7 +7064,8 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
LLVMCtx, CM);

// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
Expand Down
22 changes: 15 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,14 +736,16 @@ class VPLiveOut : public VPUser {
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
VPTypeAnalysis Types;
LLVMContext &LLVMCtx;
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;

VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
: TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
Type *CanIVTy, LLVMContext &LLVMCtx,
LoopVectorizationCostModel &CM)
: TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}

/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
Expand Down Expand Up @@ -796,7 +798,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Return the cost of this recipe, taking into account if the cost
/// computation should be skipped and the ForceTargetInstructionCost flag.
/// Also takes care of printing the cost for debugging.
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
InstructionCost cost(ElementCount VF, VPCostContext &Ctx);

/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
Expand Down Expand Up @@ -860,9 +862,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
DebugLoc getDebugLoc() const { return DL; }

protected:
/// Compute the cost of this recipe using the legacy cost model and the
/// underlying instructions.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
/// Compute the cost of this recipe either using a recipe's specialized
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"either using" >> "using either"
"or using" >> "or"

/// implementation or using the legacy cost model and the underlying
/// instructions.
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
};

// Helper macro to define common classof implementations for recipes.
Expand Down Expand Up @@ -1426,6 +1430,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
/// processing State.VF elements.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenRecipe.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Return" >> "Compute and return"

InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

unsigned getOpcode() const { return Opcode; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
74 changes: 74 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,80 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#endif
}

InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (Opcode) {
case Instruction::FNeg: {
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
}

case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// More complex computation, let the legacy cost-model handle this for now.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"// TODO: ..."

return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
VPValue *RHS = getOperand(1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"RHS" >> "Operand2" ?
"RHSInfo" >> "Operand2Info"?

// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueInfo RHSInfo = {
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
if (RHS->isLiveIn())
RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());

if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
getOperand(1)->isDefinedOutsideVectorRegions())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"getOperand(1)" >> "RHS" (or "Operand2")

RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VPWidenRecipe must have an underlying value, which must be an Instruction?


SmallVector<const Value *, 4> Operands;
if (CtxI)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this branch always taken?

Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
RHSInfo, Operands, CtxI, &Ctx.TLI);
}
case Instruction::Freeze: {
// This opcode is unknown. Assume that it is the same as 'mul'.
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
CostKind, CtxI);
}
default:
llvm_unreachable("Unsupported opcode for instruction");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want some kind of fall back?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think VPWidenRecipe recipes should only be created for the matched opcodes above and if there are any missed cases this should highlight them via a crash, similar to how we handle similar cases elsewhere.

}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
Loading