@@ -962,17 +962,21 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
962962void VPWidenIntrinsicRecipe::execute (VPTransformState &State) {
963963 assert (State.VF .isVector () && " not widening" );
964964 State.setDebugLocFrom (getDebugLoc ());
965-
965+ Intrinsic::ID FuncID =
966+ VPIntrinsic::isVPIntrinsic (VectorIntrinsicID)
967+ ? VPIntrinsic::getFunctionalIntrinsicIDForVP (VectorIntrinsicID)
968+ .value ()
969+ : VectorIntrinsicID;
966970 SmallVector<Type *, 2 > TysForDecl;
967971 // Add return type if intrinsic is overloaded on it.
968- if (isVectorIntrinsicWithOverloadTypeAtArg (VectorIntrinsicID , -1 ))
972+ if (isVectorIntrinsicWithOverloadTypeAtArg (FuncID , -1 ))
969973 TysForDecl.push_back (VectorType::get (getResultType (), State.VF ));
970974 SmallVector<Value *, 4 > Args;
971- for (const auto &I : enumerate(operands ())) {
975+ for (const auto &I : enumerate(arg_operands ())) {
972976 // Some intrinsics have a scalar argument - don't replace it with a
973977 // vector.
974978 Value *Arg;
975- if (isVectorIntrinsicWithScalarOpAtArg (VectorIntrinsicID , I.index ()))
979+ if (isVectorIntrinsicWithScalarOpAtArg (FuncID , I.index ()))
976980 Arg = State.get (I.value (), VPLane (0 ));
977981 else
978982 Arg = State.get (I.value (), onlyFirstLaneUsed (I.value ()));
@@ -981,18 +985,34 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
981985 Args.push_back (Arg);
982986 }
983987
984- // Use vector version of the intrinsic.
985- Module *M = State.Builder .GetInsertBlock ()->getModule ();
986- Function *VectorF =
987- Intrinsic::getOrInsertDeclaration (M, VectorIntrinsicID, TysForDecl);
988- assert (VectorF && " Can't retrieve vector intrinsic." );
989-
988+ CallInst *V = nullptr ;
990989 auto *CI = cast_or_null<CallInst>(getUnderlyingValue ());
991990 SmallVector<OperandBundleDef, 1 > OpBundles;
992991 if (CI)
993992 CI->getOperandBundlesAsDefs (OpBundles);
994993
995- CallInst *V = State.Builder .CreateCall (VectorF, Args, OpBundles);
994+ if (VPIntrinsic::isVPIntrinsic (VectorIntrinsicID)) {
995+ // Use vector version of the vector predicate Intrinsic
996+ IRBuilderBase &BuilderIR = State.Builder ;
997+ VectorBuilder VBuilder (BuilderIR);
998+ Value *Mask = BuilderIR.CreateVectorSplat (State.VF , BuilderIR.getTrue ());
999+ // VPValue EVL = getOperand(getNumOperands() - 1);
1000+ VBuilder.setMask (Mask).setEVL (
1001+ State.get (getOperand (getNumOperands () - 1 ), /* NeedsScalar=*/ true ));
1002+ auto *TyReturn = VectorType::get (getResultType (), State.VF );
1003+ Value *VPInst = VBuilder.createSimpleIntrinsic (VectorIntrinsicID, TyReturn,
1004+ Args, " vp.call" );
1005+ if (VPInst) {
1006+ V = cast<CallInst>(VPInst);
1007+ }
1008+ } else {
1009+ // Use vector version of the intrinsic.
1010+ Module *M = State.Builder .GetInsertBlock ()->getModule ();
1011+ Function *VectorF =
1012+ Intrinsic::getOrInsertDeclaration (M, VectorIntrinsicID, TysForDecl);
1013+ assert (VectorF && " Can't retrieve vector intrinsic." );
1014+ V = State.Builder .CreateCall (VectorF, Args, OpBundles);
1015+ }
9961016
9971017 setFlags (V);
9981018
@@ -1011,7 +1031,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10111031 // clear Arguments.
10121032 // TODO: Rework TTI interface to be independent of concrete IR values.
10131033 SmallVector<const Value *> Arguments;
1014- for (const auto &[Idx, Op] : enumerate(operands ())) {
1034+ for (const auto &[Idx, Op] : enumerate(arg_operands ())) {
10151035 auto *V = Op->getUnderlyingValue ();
10161036 if (!V) {
10171037 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue ())) {
@@ -1042,6 +1062,14 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
10421062 return Intrinsic::getBaseName (VectorIntrinsicID);
10431063}
10441064
1065+ bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed (const VPValue *Op) const {
1066+ assert (is_contained (operands (), Op) && " Op must be an operand of the recipe" );
1067+ // Vector predication intrinsics only demand the the first lane the last
1068+ // operand (the EVL operand).
1069+ return VPIntrinsic::isVPIntrinsic (VectorIntrinsicID) &&
1070+ Op == getOperand (getNumOperands () - 1 );
1071+ }
1072+
10451073#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10461074void VPWidenIntrinsicRecipe::print (raw_ostream &O, const Twine &Indent,
10471075 VPSlotTracker &SlotTracker) const {
0 commit comments