Skip to content
Merged
35 changes: 20 additions & 15 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4363,7 +4363,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
[](const auto *R) { return Instruction::Store; })
.Case<VPWidenLoadRecipe>(
[](const auto *R) { return Instruction::Load; })
.Case<VPWidenCallRecipe>(
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
[](const auto *R) { return Instruction::Call; })
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe>(
Expand All @@ -4387,12 +4387,18 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
OS << "):";
if (Opcode == Instruction::Call) {
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
OS << " call to " << CalledFn->getName();
StringRef Name = "";
if (auto *Int = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
Name = Int->getIntrinsicName();
} else {
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
Name = CalledFn->getName();
}
OS << " call to " << Name;
} else
OS << " " << Instruction::getOpcodeName(Opcode);
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
Expand Down Expand Up @@ -4443,6 +4449,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
case VPDef::VPBlendSC:
Expand Down Expand Up @@ -8266,7 +8273,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
return new VPBlendRecipe(Phi, OperandsWithMask);
}

VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
Expand All @@ -8286,7 +8293,6 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
return nullptr;

SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
Ops.push_back(Operands.back());

// Is it beneficial to perform intrinsic call compared to lib call?
bool ShouldUseVectorIntrinsic =
Expand All @@ -8297,8 +8303,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
},
Range);
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID,
CI->getDebugLoc());
return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(),
CI->getDebugLoc());

Function *Variant = nullptr;
std::optional<unsigned> MaskPos;
Expand Down Expand Up @@ -8350,9 +8356,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
Ops.insert(Ops.begin() + *MaskPos, Mask);
}

return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()),
Intrinsic::not_intrinsic, CI->getDebugLoc(),
Variant);
Ops.push_back(Operands.back());
return new VPWidenCallRecipe(CI, Variant, Ops, CI->getDebugLoc());
}

return nullptr;
Expand Down Expand Up @@ -9225,7 +9230,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) &&
"Expected instruction to be a call to the llvm.fmuladd intrinsic");
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
isa<VPWidenCallRecipe>(CurrentLink)) &&
isa<VPWidenIntrinsicRecipe>(CurrentLink)) &&
CurrentLink->getOperand(2) == PreviousLink &&
"expected a call where the previous link is the added operand");

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class VPRecipeBuilder {
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);

/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
/// decision from \p Range.Start to \p Range.End.
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
Expand Down
81 changes: 66 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
Expand Down Expand Up @@ -1613,25 +1614,75 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};

/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to call when widening the call. If set the
/// Intrinsic::not_intrinsic, a library call will be used instead.
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
Intrinsic::ID VectorIntrinsicID;
/// If this recipe represents a library call, Variant stores a pointer to
/// the chosen function. There is a 1:1 mapping between a given VF and the
/// chosen vectorized variant, so there will be a different vplan for each
/// VF with a valid variant.

/// Scalar type of the result produced by the intrinsic.
Type *ResultTy;

bool MayWriteToMemory;
bool MayReadFromMemory;
bool MayHaveSideEffects;

public:
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
MayWriteToMemory(CI.mayWriteToMemory()),
MayReadFromMemory(CI.mayReadFromMemory()),
MayHaveSideEffects(CI.mayHaveSideEffects()) {}

~VPWidenIntrinsicRecipe() override = default;

VPWidenIntrinsicRecipe *clone() override {
return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
VectorIntrinsicID, {op_begin(), op_end()},
ResultTy, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)

/// Produce a widened version of the vector intrinsic.
void execute(VPTransformState &State) override;

/// Return the cost of this vector intrinsic.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Type *getResultTy() const { return ResultTy; }

/// Return to name of the intrinsic as string.
StringRef getIntrinsicName() const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
bool mayWriteToMemory() const { return MayWriteToMemory; }

bool mayReadFromMemory() const { return MayReadFromMemory; }

bool mayHaveSideEffects() const { return MayHaveSideEffects; }
};

/// A recipe for widening Call instructions using library calls.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
/// between a given VF and the chosen vectorized variant, so there will be a
/// different VPlan for each VF with a valid variant.
Function *Variant;

public:
template <typename IterT>
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
Function *Variant = nullptr)
VPWidenCallRecipe(Value *UV, Function *Variant,
ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
*cast<Instruction>(UV)),
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
Variant(Variant) {
assert(
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
"last operand must be the called function");
Expand All @@ -1640,8 +1691,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
~VPWidenCallRecipe() override = default;

VPWidenCallRecipe *clone() override {
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
VectorIntrinsicID, getDebugLoc(), Variant);
return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
{op_begin(), op_end()}, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>(
[](const VPWidenIntrinsicRecipe *R) { return R->getResultTy(); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
Expand Down
Loading
Loading