Skip to content

Commit 760eb57

Browse files
committed
Interpret VPWidenPointerInductionRecipe as a scalar instead
1 parent d6837fe commit 760eb57

File tree

4 files changed

+41
-57
lines changed

4 files changed

+41
-57
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -959,10 +959,6 @@ class LoopVectorizationCostModel {
959959
return expectedCost(UserVF).isValid();
960960
}
961961

962-
/// \return True if maximizing vector bandwidth is enabled by the target or
963-
/// user options.
964-
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
965-
966962
/// \return The size (in bits) of the smallest and widest types in the code
967963
/// that needs to be vectorized. We ignore values that remain scalar such as
968964
/// 64 bit loop indices.
@@ -3948,14 +3944,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39483944
return FixedScalableVFPair::getNone();
39493945
}
39503946

3951-
bool LoopVectorizationCostModel::useMaxBandwidth(
3952-
TargetTransformInfo::RegisterKind RegKind) {
3953-
return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
3954-
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3955-
(UseWiderVFIfCallVariantsPresent &&
3956-
Legal->hasVectorCallVariants())));
3957-
}
3958-
39593947
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39603948
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39613949
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -4021,7 +4009,10 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
40214009
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
40224010
: TargetTransformInfo::RGK_FixedWidthVector;
40234011
ElementCount MaxVF = MaxVectorElementCount;
4024-
if (useMaxBandwidth(RegKind)) {
4012+
if (MaximizeBandwidth ||
4013+
(MaximizeBandwidth.getNumOccurrences() == 0 &&
4014+
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
4015+
(UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
40254016
auto MaxVectorElementCountMaxBW = ElementCount::get(
40264017
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
40274018
ComputeScalableMaxVF);
@@ -4385,24 +4376,15 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43854376
for (auto &P : VPlans) {
43864377
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
43874378
P->vectorFactors().end());
4388-
4389-
SmallVector<LoopVectorizationCostModel::RegisterUsage, 8> RUs;
4390-
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
4391-
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
4392-
RUs = ::calculateRegisterUsage(*P, VFs, TTI, CM.ValuesToIgnore);
4393-
4394-
for (unsigned I = 0; I < VFs.size(); I++) {
4395-
ElementCount VF = VFs[I];
4379+
auto RUs = ::calculateRegisterUsage(*P, VFs, TTI, CM.ValuesToIgnore);
4380+
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
43964381
// The cost for scalar VF=1 is already calculated, so ignore it.
43974382
if (VF.isScalar())
43984383
continue;
43994384

44004385
/// Don't consider the VF if it exceeds the number of registers for the
44014386
/// target.
4402-
if (CM.useMaxBandwidth(VF.isScalable()
4403-
? TargetTransformInfo::RGK_ScalableVector
4404-
: TargetTransformInfo::RGK_FixedWidthVector) &&
4405-
RUs[I].exceedsMaxNumRegs(TTI))
4387+
if (RU.exceedsMaxNumRegs(TTI))
44064388
continue;
44074389

44084390
InstructionCost C = CM.expectedCost(VF);
@@ -4874,7 +4856,7 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
48744856

48754857
if (VFs[J].isScalar() ||
48764858
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
4877-
VPScalarIVStepsRecipe>(R) ||
4859+
VPWidenPointerInductionRecipe, VPScalarIVStepsRecipe>(R) ||
48784860
(isa<VPInstruction>(R) &&
48794861
all_of(cast<VPSingleDefRecipe>(R)->users(),
48804862
[&](VPUser *U) {
@@ -7453,14 +7435,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
74537435
for (auto &P : VPlans) {
74547436
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
74557437
P->vectorFactors().end());
7456-
7457-
SmallVector<LoopVectorizationCostModel::RegisterUsage, 8> RUs;
7458-
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
7459-
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
7460-
RUs = ::calculateRegisterUsage(*P, VFs, TTI, CM.ValuesToIgnore);
7461-
7462-
for (unsigned I = 0; I < VFs.size(); I++) {
7463-
ElementCount VF = VFs[I];
7438+
auto RUs = ::calculateRegisterUsage(*P, VFs, TTI, CM.ValuesToIgnore);
7439+
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
74647440
if (VF.isScalar())
74657441
continue;
74667442
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
@@ -7482,10 +7458,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
74827458
InstructionCost Cost = cost(*P, VF);
74837459
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
74847460

7485-
if (CM.useMaxBandwidth(VF.isScalable()
7486-
? TargetTransformInfo::RGK_ScalableVector
7487-
: TargetTransformInfo::RGK_FixedWidthVector) &&
7488-
RUs[I].exceedsMaxNumRegs(TTI)) {
7461+
if (RU.exceedsMaxNumRegs(TTI)) {
74897462
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
74907463
<< VF << " because it uses too many registers\n");
74917464
continue;

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
55
; CHECK-LABEL: add
6-
; CHECK: LV(REG): Found max usage: 2 item
6+
; CHECK: LV(REG): VF = 8
7+
; CHECK-NEXT: LV(REG): Found max usage: 2 item
78
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
89
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
910
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44

55
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
66
; CHECK-LABEL: add
7-
; ZVFH: LV(REG): Found max usage: 2 item
7+
; ZVFH: LV(REG): VF = 8
8+
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
89
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
910
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1011
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1112
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
12-
; ZVFHMIN: LV(REG): Found max usage: 2 item
13+
; ZVFHMIN: LV(REG): VF = 8
14+
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
1315
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
1416
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1517
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,28 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
2828
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
2929
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
3030
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
31-
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
31+
; CHECK-LMUL1: LV(REG): VF = 2
32+
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
3233
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
3334
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3435
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
3536
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
36-
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
37+
; CHECK-LMUL2: LV(REG): VF = 4
38+
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
3739
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
38-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
40+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3941
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
4042
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
41-
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
43+
; CHECK-LMUL4: LV(REG): VF = 8
44+
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
4245
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
43-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
46+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
4447
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
4548
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
46-
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
49+
; CHECK-LMUL8: LV(REG): VF = 16
50+
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
4751
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
48-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
52+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
4953
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
5054
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
5155

@@ -76,17 +80,21 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
7680
; CHECK-SCALAR: LV(REG): VF = 1
7781
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
7882
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
79-
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
80-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
81-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
82-
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
83-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
83+
; CHECK-LMUL1: LV(REG): VF = 2
84+
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
85+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
86+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
87+
; CHECK-LMUL2: LV(REG): VF = 4
88+
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
89+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
8490
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
85-
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
86-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
91+
; CHECK-LMUL4: LV(REG): VF = 8
92+
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
93+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
8794
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
88-
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
89-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
95+
; CHECK-LMUL8: LV(REG): VF = 16
96+
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
97+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
9098
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
9199
entry:
92100
%cmp3 = icmp sgt i32 %n, 0

0 commit comments

Comments
 (0)