Skip to content

[LV][EVL] Support icmp/fcmp instruction with EVL-vectorization #108533

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/IR/VectorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ class VectorBuilder {
const Twine &Name = Twine());

/// Emit a VP reduction intrinsic call for recurrence kind.
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
/// \param ID The intrinsic ID of call Intrinsic
/// \param ValTy The type of operand which the reduction operation is
/// performed.
/// \param VecOpArray The operand list.
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
ArrayRef<Value *> VecOpArray,
const Twine &Name = Twine());
};
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/IR/VectorBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
}

Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
Type *ValTy,
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
ArrayRef<Value *> InstOpArray,
const Twine &Name) {
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
"No VPIntrinsic for this reduction");
auto VPID = VPIntrinsic::getForIntrinsic(ID);
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
"No VPIntrinsic for this Intrinsic");
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::vp_udiv:
case Intrinsic::vp_urem:
case Intrinsic::vp_xor:
// TODO: add new patch for it.
case Intrinsic::vp_fneg:
// vp float arithmetic ops.
case Intrinsic::vp_fadd:
case Intrinsic::vp_fsub:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
Type *SrcEltTy = SrcTy->getElementType();
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
Value *Ops[] = {Iden, Src};
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
}

Value *llvm::createReduction(IRBuilderBase &B,
Expand Down Expand Up @@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
auto *SrcTy = cast<VectorType>(Src->getType());
Value *Ops[] = {Start, Src};
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
}

void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11720,7 +11720,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
FMF);
FMF, II);
InstructionCost IntrCost =
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
InstructionCost CallCost = TTI->getCallInstrCost(
Expand Down
79 changes: 13 additions & 66 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
Expand Down Expand Up @@ -1063,7 +1062,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
Expand Down Expand Up @@ -1436,16 +1434,11 @@ class VPIRInstruction : public VPRecipeBase {
class VPWidenRecipe : public VPRecipeWithIRFlags {
unsigned Opcode;

protected:
template <typename IterT>
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
iterator_range<IterT> Operands)
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}

public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
Opcode(I.getOpcode()) {}

~VPWidenRecipe() override = default;

Expand All @@ -1455,15 +1448,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
return R;
}

static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
}

static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
}
VP_CLASSOF_IMPL(VPDef::VPWidenSC)

/// Produce a widened instruction using the opcode and operands of the recipe,
/// processing State.VF elements.
Expand All @@ -1482,54 +1467,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
#endif
};

/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {
using VPRecipeWithIRFlags::transferFlags;

public:
template <typename IterT>
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
addOperand(&EVL);
}
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
transferFlags(W);
}

~VPWidenEVLRecipe() override = default;

VPWidenRecipe *clone() override final {
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
return nullptr;
}

VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);

VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }

/// Produce a vp-intrinsic using the opcode and operands of the recipe,
/// processing EVL elements.
void execute(VPTransformState &State) override final;

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// EVL in that recipe is always the last operand, thus any use before means
// the VPValue should be vectorized.
return getEVL() == Op;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override final;
#endif
};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
Expand Down Expand Up @@ -1648,6 +1585,16 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
MayWriteToMemory(CI.mayWriteToMemory()),
MayHaveSideEffects(CI.mayHaveSideEffects()) {}

template <typename IterT>
VPWidenIntrinsicRecipe(Instruction &I, Intrinsic::ID VectorIntrinsicID,
iterator_range<IterT> Operands, Type *Ty,
DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, Operands, I),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
MayReadFromMemory(I.mayReadFromMemory()),
MayWriteToMemory(I.mayWriteToMemory()),
MayHaveSideEffects(I.mayHaveSideEffects()) {}

VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
return R->getResultType();
Expand Down
116 changes: 54 additions & 62 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -970,24 +967,52 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
Args.push_back(Arg);
}

// Use vector version of the intrinsic.
Module *M = State.Builder.GetInsertBlock()->getModule();
Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
VectorIntrinsicID != Intrinsic::vp_select) {
VectorBuilder VBuilder(State.Builder);
Value *Mask =
State.Builder.CreateVectorSplat(State.VF, State.Builder.getTrue());
VBuilder.setMask(Mask).setEVL(Args.back());
// Remove EVL from Args
Args.pop_back();

if (VPCmpIntrinsic::isVPCmp(VectorIntrinsicID)) {
auto &Ctx = State.Builder.getContext();
Value *Pred = MetadataAsValue::get(
Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
Args.push_back(Pred);
}

auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
SmallVector<OperandBundleDef, 1> OpBundles;
if (CI)
CI->getOperandBundlesAsDefs(OpBundles);
Value *VPInst = VBuilder.createSimpleIntrinsic(
VectorIntrinsicID, TysForDecl[0], Args, "vp.call");

CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
if (isa<FPMathOperator>(VPInst))
setFlags(cast<Instruction>(VPInst));

setFlags(V);
if (!VPInst->getType()->isVoidTy())
State.set(this, VPInst);
State.addMetadata(VPInst,
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
} else {
// Use vector version of the intrinsic.
Module *M = State.Builder.GetInsertBlock()->getModule();
Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");

if (!V->getType()->isVoidTy())
State.set(this, V);
State.addMetadata(V, CI);
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
SmallVector<OperandBundleDef, 1> OpBundles;
if (CI)
CI->getOperandBundlesAsDefs(OpBundles);

CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);

setFlags(V);

if (!V->getType()->isVoidTy())
State.set(this, V);
State.addMetadata(V, CI);
}
}

InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
Expand Down Expand Up @@ -1019,6 +1044,18 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
ParamTys.push_back(
ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));

if (std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(VectorIntrinsicID)) {
if (VPCmpIntrinsic::isVPCmp(VectorIntrinsicID)) {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getCmpSelInstrCost(FOp.value(), VectorTy, nullptr,
getPredicate(), CostKind,
{TTI::OK_AnyValue, TTI::OP_None},
{TTI::OK_AnyValue, TTI::OP_None}, CtxI);
}
}

// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
IntrinsicCostAttributes CostAttrs(
Expand Down Expand Up @@ -1430,42 +1467,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
}
}

void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");

State.setDebugLocFrom(getDebugLoc());

assert(State.get(getOperand(0))->getType()->isVectorTy() &&
"VPWidenEVLRecipe should not be used for scalars");

VPValue *EVL = getEVL();
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
IRBuilderBase &BuilderIR = State.Builder;
VectorBuilder Builder(BuilderIR);
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());

SmallVector<Value *, 4> Ops;
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
VPValue *VPOp = getOperand(I);
Ops.push_back(State.get(VPOp));
}

Builder.setMask(Mask).setEVL(EVLArg);
Value *VPInst =
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
// Currently vp-intrinsics only accept FMF flags.
// TODO: Enable other flags when support is added.
if (isa<FPMathOperator>(VPInst))
setFlags(cast<Instruction>(VPInst));

State.set(this, VPInst);
State.addMetadata(VPInst,
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand All @@ -1475,15 +1476,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printFlags(O);
printOperands(O, SlotTracker);
}

void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
printOperands(O, SlotTracker);
}
#endif

void VPWidenCastRecipe::execute(VPTransformState &State) {
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1465,10 +1465,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
})
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) &&
!Instruction::isUnaryOp(Opcode))
if (Opcode == Instruction::Freeze)
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
auto *I = cast<Instruction>(W->getUnderlyingInstr());
SmallVector<VPValue *> Ops(W->operands());
Ops.push_back(&EVL);
Intrinsic::ID VPID = VPIntrinsic::getForOpcode(W->getOpcode());
return new VPWidenIntrinsicRecipe(
*I, VPID, make_range(Ops.begin(), Ops.end()), I->getType(),
I->getDebugLoc());
})
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ class VPDef {
VPWidenStoreEVLSC,
VPWidenStoreSC,
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
Expand Down
Loading
Loading