@@ -883,6 +883,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
883883 case VPRecipeBase::VPScalarIVStepsSC:
884884 case VPRecipeBase::VPVectorPointerSC:
885885 case VPRecipeBase::VPWidenCallSC:
886+ case VPRecipeBase::VPWidenCallEVLSC:
886887 case VPRecipeBase::VPWidenCanonicalIVSC:
887888 case VPRecipeBase::VPWidenCastSC:
888889 case VPRecipeBase::VPWidenGEPSC:
@@ -1610,6 +1611,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
16101611
16111612// / A recipe for widening Call instructions.
16121613class VPWidenCallRecipe : public VPSingleDefRecipe {
1614+ public:
16131615 // / ID of the vector intrinsic to call when widening the call. If set the
16141616 // / Intrinsic::not_intrinsic, a library call will be used instead.
16151617 Intrinsic::ID VectorIntrinsicID;
@@ -1619,26 +1621,48 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
16191621 // / VF with a valid variant.
16201622 Function *Variant;
16211623
1622- public :
1624+ protected :
16231625 template <typename IterT>
1624- VPWidenCallRecipe (Value *UV, iterator_range<IterT> CallArguments,
1626+ VPWidenCallRecipe (unsigned VPDefOpcode, Value *UV,
1627+ iterator_range<IterT> CallArguments,
16251628 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
16261629 Function *Variant = nullptr )
1627- : VPSingleDefRecipe(VPDef::VPWidenCallSC , CallArguments, UV, DL),
1630+ : VPSingleDefRecipe(VPDefOpcode , CallArguments, UV, DL),
16281631 VectorIntrinsicID (VectorIntrinsicID), Variant(Variant) {
16291632 assert (
16301633 isa<Function>(getOperand (getNumOperands () - 1 )->getLiveInIRValue ()) &&
16311634 " last operand must be the called function" );
16321635 }
16331636
1637+ public:
1638+ template <typename IterT>
1639+ VPWidenCallRecipe (Value *UV, iterator_range<IterT> CallArguments,
1640+ Intrinsic::ID VectorIntrinsicID, DebugLoc DL)
1641+ : VPWidenCallRecipe(VPDef::VPWidenCallSC, UV, CallArguments,
1642+ VectorIntrinsicID, DL) {}
1643+
1644+ template <typename IterT>
1645+ VPWidenCallRecipe (Value *UV, iterator_range<IterT> CallArguments,
1646+ Intrinsic::ID VectorIntrinsicID, DebugLoc DL,
1647+ Function *Variant)
1648+ : VPWidenCallRecipe(VPDef::VPWidenCallSC, UV, CallArguments,
1649+ VectorIntrinsicID, DL, Variant) {}
1650+
16341651 ~VPWidenCallRecipe () override = default ;
16351652
16361653 VPWidenCallRecipe *clone () override {
16371654 return new VPWidenCallRecipe (getUnderlyingValue (), operands (),
16381655 VectorIntrinsicID, getDebugLoc (), Variant);
16391656 }
1657+ static inline bool classof (const VPRecipeBase *R) {
1658+ return R->getVPDefID () == VPRecipeBase::VPWidenCallSC ||
1659+ R->getVPDefID () == VPRecipeBase::VPWidenCallEVLSC;
1660+ }
16401661
1641- VP_CLASSOF_IMPL (VPDef::VPWidenCallSC)
1662+ static inline bool classof (const VPUser *U) {
1663+ auto *R = dyn_cast<VPRecipeBase>(U);
1664+ return R && classof (R);
1665+ }
16421666
16431667 // / Produce a widened version of the call instruction.
16441668 void execute (VPTransformState &State) override ;
@@ -1665,6 +1689,74 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
16651689#endif
16661690};
16671691
1692+ // / A recipe for widening Call instructions with vector-predication intrinsics
1693+ // / with explicit vector length (EVL).
1694+ class VPWidenCallEVLRecipe : public VPWidenCallRecipe {
1695+ // using VPRecipeWithIRFlags::transferFlags;
1696+ // Intrinsic::ID VectorIntrinsicID;
1697+
1698+ public:
1699+ template <typename IterT>
1700+ VPWidenCallEVLRecipe (Value *UV, iterator_range<IterT> CallArguments,
1701+ Intrinsic::ID VectorIntrinsicID, DebugLoc DL,
1702+ VPValue &EVL)
1703+ : VPWidenCallRecipe(VPDef::VPWidenCallEVLSC, UV, CallArguments,
1704+ VectorIntrinsicID, DL) {
1705+ addOperand (&EVL);
1706+ }
1707+
1708+ VPWidenCallEVLRecipe (VPWidenCallRecipe &W, Intrinsic::ID VectorIntrinsicID,
1709+ DebugLoc DL, VPValue &EVL)
1710+ : VPWidenCallEVLRecipe(W.getUnderlyingValue(), W.operands(),
1711+ VectorIntrinsicID, DL, EVL) {}
1712+
1713+ ~VPWidenCallEVLRecipe () override = default ;
1714+
1715+ VPWidenCallEVLRecipe *clone () override {
1716+ llvm_unreachable (" VPWidenCallEVLRecipe cannot be cloned" );
1717+ return nullptr ;
1718+ }
1719+
1720+ VPValue *getEVL () { return getOperand (getNumOperands () - 1 ); }
1721+ const VPValue *getEVL () const { return getOperand (getNumOperands () - 1 ); }
1722+
1723+ // Intrinsic::ID getVectorIntrinsicID() {
1724+ // return VectorIntrinsicID;
1725+ // }
1726+
1727+ VP_CLASSOF_IMPL (VPDef::VPWidenCallEVLSC)
1728+
1729+ InstructionCost computeCost (ElementCount VF, VPCostContext &Ctx) const final ;
1730+
1731+ Function *getCalledScalarFunction () const {
1732+ return cast<Function>(getOperand (getNumOperands () - 2 )->getLiveInIRValue ());
1733+ }
1734+
1735+ operand_range arg_operands () {
1736+ return make_range (op_begin (), op_begin () + getNumOperands () - 2 );
1737+ }
1738+ const_operand_range arg_operands () const {
1739+ return make_range (op_begin (), op_begin () + getNumOperands () - 2 );
1740+ }
1741+ // / Produce a widened version of the call instruction.
1742+ void execute (VPTransformState &State) final ;
1743+
1744+ // / Returns true if the recipe only uses the first lane of operand \p Op.
1745+ bool onlyFirstLaneUsed (const VPValue *Op) const override {
1746+ assert (is_contained (operands (), Op) &&
1747+ " Op must be an operand of the recipe" );
1748+ // EVL in that recipe is always the last operand, thus any use before means
1749+ // the VPValue should be vectorized.
1750+ return getEVL () == Op;
1751+ }
1752+
1753+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1754+ // / Print the recipe.
1755+ void print (raw_ostream &O, const Twine &Indent,
1756+ VPSlotTracker &SlotTracker) const final ;
1757+ #endif
1758+ };
1759+
16681760// / A recipe representing a sequence of load -> update -> store as part of
16691761// / a histogram operation. This means there may be aliasing between vector
16701762// / lanes, which is handled by the llvm.experimental.vector.histogram family
0 commit comments