Skip to content

Commit 29a14c1

Browse files
committed
[VPlan] Introduce VPInstructionWithType, use instead of VPScalarCast (NFC)
There are some opcodes that currently require specialized recipes, due to their result type not being implied by their operands, including casts. This leads to duplication from defining multiple full recipes. This patch introduces a new VPInstructionWithType subclass that also stores the result type. The general idea is to have opcodes needing to specify a result type to use this general recipe. The current patch replaces VPScalarCastRecipe with VInstructionWithType, a similar patch for VPWidenCastRecipe will follow soon. There are a few proposed opcodes that should also benefit, without the need of workarounds: * llvm#129508 * llvm#119284
1 parent b2d70e8 commit 29a14c1

15 files changed

+135
-124
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,10 @@ class VPBuilder {
246246
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
247247
}
248248

249-
VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
250-
Type *ResultTy, DebugLoc DL) {
249+
VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
250+
Type *ResultTy, DebugLoc DL) {
251251
return tryInsertInstruction(
252-
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
252+
new VPInstructionWithType(Opcode, Op, ResultTy, DL));
253253
}
254254

255255
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -4502,7 +4502,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45024502
switch (R.getVPDefID()) {
45034503
case VPDef::VPDerivedIVSC:
45044504
case VPDef::VPScalarIVStepsSC:
4505-
case VPDef::VPScalarCastSC:
45064505
case VPDef::VPReplicateSC:
45074506
case VPDef::VPInstructionSC:
45084507
case VPDef::VPCanonicalIVPHISC:
@@ -10396,8 +10395,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1039610395
assert(all_of(IV->users(),
1039710396
[](const VPUser *U) {
1039810397
return isa<VPScalarIVStepsRecipe>(U) ||
10399-
isa<VPScalarCastRecipe>(U) ||
1040010398
isa<VPDerivedIVRecipe>(U) ||
10399+
Instruction::isCast(
10400+
cast<VPInstruction>(U)->getOpcode()) ||
10401+
1040110402
cast<VPInstruction>(U)->getOpcode() ==
1040210403
Instruction::Add;
1040310404
}) &&

llvm/lib/Transforms/Vectorize/VPlan.h

+50-49
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
533533
case VPRecipeBase::VPWidenIntOrFpInductionSC:
534534
case VPRecipeBase::VPWidenPointerInductionSC:
535535
case VPRecipeBase::VPReductionPHISC:
536-
case VPRecipeBase::VPScalarCastSC:
537536
case VPRecipeBase::VPScalarPHISC:
538537
case VPRecipeBase::VPPartialReductionSC:
539538
return true;
@@ -1026,6 +1025,56 @@ class VPInstruction : public VPRecipeWithIRFlags,
10261025
StringRef getName() const { return Name; }
10271026
};
10281027

1028+
/// A specialization of VPInstruction augmenting it with a dedicated result
1029+
/// type, to be used when the opcode and operands of the VPInstruction don't
1030+
/// directly determine the result type.
1031+
class VPInstructionWithType : public VPInstruction {
1032+
/// Scalar result type produced by the recipe.
1033+
Type *ResultTy;
1034+
1035+
Value *generate(VPTransformState &State);
1036+
1037+
public:
1038+
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1039+
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
1040+
: VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}
1041+
1042+
static inline bool classof(const VPRecipeBase *R) {
1043+
auto *VPI = dyn_cast<VPInstruction>(R);
1044+
return VPI && Instruction::isCast(VPI->getOpcode());
1045+
}
1046+
1047+
static inline bool classof(const VPUser *R) {
1048+
return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
1049+
}
1050+
1051+
VPInstruction *clone() override {
1052+
auto *New =
1053+
new VPInstructionWithType(getOpcode(), {getOperand(0)}, getResultType(),
1054+
getDebugLoc(), getName());
1055+
New->setUnderlyingValue(getUnderlyingValue());
1056+
return New;
1057+
}
1058+
1059+
void execute(VPTransformState &State) override;
1060+
1061+
/// Return the cost of this VPIRInstruction.
1062+
InstructionCost computeCost(ElementCount VF,
1063+
VPCostContext &Ctx) const override {
1064+
return 0;
1065+
}
1066+
1067+
Type *getResultType() const { return ResultTy; }
1068+
1069+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1070+
/// Print the recipe.
1071+
void print(raw_ostream &O, const Twine &Indent,
1072+
VPSlotTracker &SlotTracker) const override;
1073+
#endif
1074+
1075+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
1076+
};
1077+
10291078
/// A recipe to wrap on original IR instruction not to be modified during
10301079
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
10311080
/// and it is used to add a new incoming value for the single predecessor VPBB.
@@ -1183,54 +1232,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
11831232
Type *getResultType() const { return ResultTy; }
11841233
};
11851234

1186-
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1187-
class VPScalarCastRecipe : public VPSingleDefRecipe {
1188-
Instruction::CastOps Opcode;
1189-
1190-
Type *ResultTy;
1191-
1192-
Value *generate(VPTransformState &State);
1193-
1194-
public:
1195-
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1196-
DebugLoc DL)
1197-
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1198-
ResultTy(ResultTy) {}
1199-
1200-
~VPScalarCastRecipe() override = default;
1201-
1202-
VPScalarCastRecipe *clone() override {
1203-
return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1204-
getDebugLoc());
1205-
}
1206-
1207-
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1208-
1209-
void execute(VPTransformState &State) override;
1210-
1211-
/// Return the cost of this VPScalarCastRecipe.
1212-
InstructionCost computeCost(ElementCount VF,
1213-
VPCostContext &Ctx) const override {
1214-
// TODO: Compute accurate cost after retiring the legacy cost model.
1215-
return 0;
1216-
}
1217-
1218-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1219-
void print(raw_ostream &O, const Twine &Indent,
1220-
VPSlotTracker &SlotTracker) const override;
1221-
#endif
1222-
1223-
/// Returns the result type of the cast.
1224-
Type *getResultType() const { return ResultTy; }
1225-
1226-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1227-
// At the moment, only uniform codegen is implemented.
1228-
assert(is_contained(operands(), Op) &&
1229-
"Op must be an operand of the recipe");
1230-
return true;
1231-
}
1232-
};
1233-
12341235
/// A recipe for widening vector intrinsics.
12351236
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
12361237
/// ID of the vector intrinsic to widen.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -252,20 +252,17 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
252252
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
253253
return inferScalarType(R->getOperand(0));
254254
})
255+
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe>(
256+
[](const auto *R) { return R->getResultType(); })
255257
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
256258
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
257259
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
258-
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
259-
return R->getResultType();
260-
})
261260
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
262261
// TODO: Use info from interleave group.
263262
return V->getUnderlyingValue()->getType();
264263
})
265264
.Case<VPWidenCastRecipe>(
266265
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
267-
.Case<VPScalarCastRecipe>(
268-
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
269266
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
270267
return R->getSCEV()->getType();
271268
})

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+38-34
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
148148
switch (getVPDefID()) {
149149
case VPDerivedIVSC:
150150
case VPPredInstPHISC:
151-
case VPScalarCastSC:
152151
case VPReverseVectorPointerSC:
153152
return false;
154153
case VPInstructionSC:
@@ -413,7 +412,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const {
413412
}
414413

415414
bool VPInstruction::canGenerateScalarForFirstLane() const {
416-
if (Instruction::isBinaryOp(getOpcode()))
415+
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
417416
return true;
418417
if (isSingleScalar() || isVectorToScalar())
419418
return true;
@@ -961,6 +960,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
961960
}
962961
#endif
963962

963+
Value *VPInstructionWithType::generate(VPTransformState &State) {
964+
State.setDebugLocFrom(getDebugLoc());
965+
assert(vputils::onlyFirstLaneUsed(this) &&
966+
"Codegen only implemented for first lane.");
967+
switch (getOpcode()) {
968+
case Instruction::SExt:
969+
case Instruction::ZExt:
970+
case Instruction::Trunc: {
971+
// Note: SExt/ZExt not used yet.
972+
Value *Op = State.get(getOperand(0), VPLane(0));
973+
return State.Builder.CreateCast(Instruction::CastOps(getOpcode()), Op,
974+
ResultTy);
975+
}
976+
default:
977+
llvm_unreachable("opcode not implemented yet");
978+
}
979+
}
980+
981+
void VPInstructionWithType::execute(VPTransformState &State) {
982+
State.set(this, generate(State), VPLane(0));
983+
}
984+
985+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
986+
void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
987+
VPSlotTracker &SlotTracker) const {
988+
O << Indent << "EMIT ";
989+
printAsOperand(O, SlotTracker);
990+
O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
991+
printOperands(O, SlotTracker);
992+
O << " to " << *ResultTy;
993+
}
994+
#endif
995+
996+
bool VPInstructionWithType::onlyFirstLaneUsed(const VPValue *Op) const {
997+
return vputils::onlyFirstLaneUsed(this);
998+
}
999+
9641000
void VPIRInstruction::execute(VPTransformState &State) {
9651001
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
9661002
"Only PHINodes can have extra operands");
@@ -2436,38 +2472,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
24362472
}
24372473
#endif
24382474

2439-
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2440-
State.setDebugLocFrom(getDebugLoc());
2441-
assert(vputils::onlyFirstLaneUsed(this) &&
2442-
"Codegen only implemented for first lane.");
2443-
switch (Opcode) {
2444-
case Instruction::SExt:
2445-
case Instruction::ZExt:
2446-
case Instruction::Trunc: {
2447-
// Note: SExt/ZExt not used yet.
2448-
Value *Op = State.get(getOperand(0), VPLane(0));
2449-
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2450-
}
2451-
default:
2452-
llvm_unreachable("opcode not implemented yet");
2453-
}
2454-
}
2455-
2456-
void VPScalarCastRecipe ::execute(VPTransformState &State) {
2457-
State.set(this, generate(State), VPLane(0));
2458-
}
2459-
2460-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2461-
void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2462-
VPSlotTracker &SlotTracker) const {
2463-
O << Indent << "SCALAR-CAST ";
2464-
printAsOperand(O, SlotTracker);
2465-
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2466-
printOperands(O, SlotTracker);
2467-
O << " to " << *ResultTy;
2468-
}
2469-
#endif
2470-
24712475
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
24722476
State.setDebugLocFrom(getDebugLoc());
24732477
assert(State.Lane && "Branch on Mask works only on single instance.");

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,16 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
113113
all_of(R->operands(),
114114
[](VPValue *Op) { return isUniformAcrossVFsAndUFs(Op); });
115115
})
116-
.Case<VPScalarCastRecipe, VPWidenCastRecipe>([](const auto *R) {
116+
.Case<VPInstruction>([](const auto *VPI) {
117+
return Instruction::isCast(VPI->getOpcode())
118+
? all_of(VPI->operands(),
119+
[](VPValue *Op) {
120+
return isUniformAcrossVFsAndUFs(Op);
121+
})
122+
: false;
123+
})
124+
125+
.Case<VPWidenCastRecipe>([](const auto *R) {
117126
// A cast is uniform according to its operand.
118127
return isUniformAcrossVFsAndUFs(R->getOperand(0));
119128
})

llvm/lib/Transforms/Vectorize/VPlanValue.h

-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,6 @@ class VPDef {
332332
VPReductionSC,
333333
VPPartialReductionSC,
334334
VPReplicateSC,
335-
VPScalarCastSC,
336335
VPScalarIVStepsSC,
337336
VPVectorPointerSC,
338337
VPReverseVectorPointerSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
146146
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe,
147147
VPScalarPHIRecipe>(
148148
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
149-
.Case<VPScalarCastRecipe>(
150-
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
149+
.Case<VPInstructionWithType>(
150+
[&](const auto *S) { return VerifyEVLUse(*S, 0); })
151151
.Case<VPInstruction>([&](const VPInstruction *I) {
152152
if (I->getOpcode() != Instruction::Add) {
153153
errs() << "EVL is used as an operand in non-VPInstruction::Add\n";

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll

+9-9
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
3434
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
3535
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
3636
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
37-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
37+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
3838
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
3939
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
4040
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -90,7 +90,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
9090
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
9191
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
9292
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
93-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
93+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
9494
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
9595
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
9696
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -146,7 +146,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
146146
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
147147
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
148148
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
149-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
149+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
150150
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
151151
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
152152
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -202,7 +202,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
202202
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
203203
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
204204
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
205-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
205+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
206206
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
207207
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
208208
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -255,7 +255,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
255255
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
256256
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
257257
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
258-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
258+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
259259
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
260260
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
261261
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -306,7 +306,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
306306
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
307307
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
308308
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
309-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
309+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
310310
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
311311
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
312312
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -359,7 +359,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
359359
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
360360
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
361361
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
362-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
362+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
363363
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
364364
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
365365
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -414,7 +414,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
414414
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
415415
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
416416
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
417-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
417+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
418418
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
419419
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
420420
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -467,7 +467,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
467467
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
468468
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
469469
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
470-
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
470+
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
471471
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
472472
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
473473
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>

0 commit comments

Comments
 (0)