@@ -959,10 +959,6 @@ class LoopVectorizationCostModel {
959
959
return expectedCost (UserVF).isValid ();
960
960
}
961
961
962
- // / \return True if maximizing vector bandwidth is enabled by the target or
963
- // / user options.
964
- bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
965
-
966
962
// / \return The size (in bits) of the smallest and widest types in the code
967
963
// / that needs to be vectorized. We ignore values that remain scalar such as
968
964
// / 64 bit loop indices.
@@ -3948,14 +3944,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3948
3944
return FixedScalableVFPair::getNone ();
3949
3945
}
3950
3946
3951
- bool LoopVectorizationCostModel::useMaxBandwidth (
3952
- TargetTransformInfo::RegisterKind RegKind) {
3953
- return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3954
- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3955
- (UseWiderVFIfCallVariantsPresent &&
3956
- Legal->hasVectorCallVariants ())));
3957
- }
3958
-
3959
3947
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
3960
3948
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
3961
3949
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -4021,7 +4009,10 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
4021
4009
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
4022
4010
: TargetTransformInfo::RGK_FixedWidthVector;
4023
4011
ElementCount MaxVF = MaxVectorElementCount;
4024
- if (useMaxBandwidth (RegKind)) {
4012
+ if (MaximizeBandwidth ||
4013
+ (MaximizeBandwidth.getNumOccurrences () == 0 &&
4014
+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
4015
+ (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
4025
4016
auto MaxVectorElementCountMaxBW = ElementCount::get (
4026
4017
llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
4027
4018
ComputeScalableMaxVF);
@@ -4385,24 +4376,15 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4385
4376
for (auto &P : VPlans) {
4386
4377
ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
4387
4378
P->vectorFactors ().end ());
4388
-
4389
- SmallVector<LoopVectorizationCostModel::RegisterUsage, 8 > RUs;
4390
- if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
4391
- CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
4392
- RUs = ::calculateRegisterUsage (*P, VFs, TTI, CM.ValuesToIgnore );
4393
-
4394
- for (unsigned I = 0 ; I < VFs.size (); I++) {
4395
- ElementCount VF = VFs[I];
4379
+ auto RUs = ::calculateRegisterUsage (*P, VFs, TTI, CM.ValuesToIgnore );
4380
+ for (auto [VF, RU] : zip_equal (VFs, RUs)) {
4396
4381
// The cost for scalar VF=1 is already calculated, so ignore it.
4397
4382
if (VF.isScalar ())
4398
4383
continue ;
4399
4384
4400
4385
// / Don't consider the VF if it exceeds the number of registers for the
4401
4386
// / target.
4402
- if (CM.useMaxBandwidth (VF.isScalable ()
4403
- ? TargetTransformInfo::RGK_ScalableVector
4404
- : TargetTransformInfo::RGK_FixedWidthVector) &&
4405
- RUs[I].exceedsMaxNumRegs (TTI))
4387
+ if (RU.exceedsMaxNumRegs (TTI))
4406
4388
continue ;
4407
4389
4408
4390
InstructionCost C = CM.expectedCost (VF);
@@ -4874,7 +4856,7 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
4874
4856
4875
4857
if (VFs[J].isScalar () ||
4876
4858
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
4877
- VPScalarIVStepsRecipe>(R) ||
4859
+ VPWidenPointerInductionRecipe, VPScalarIVStepsRecipe>(R) ||
4878
4860
(isa<VPInstruction>(R) &&
4879
4861
all_of (cast<VPSingleDefRecipe>(R)->users (),
4880
4862
[&](VPUser *U) {
@@ -7453,14 +7435,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7453
7435
for (auto &P : VPlans) {
7454
7436
ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
7455
7437
P->vectorFactors ().end ());
7456
-
7457
- SmallVector<LoopVectorizationCostModel::RegisterUsage, 8 > RUs;
7458
- if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
7459
- CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
7460
- RUs = ::calculateRegisterUsage (*P, VFs, TTI, CM.ValuesToIgnore );
7461
-
7462
- for (unsigned I = 0 ; I < VFs.size (); I++) {
7463
- ElementCount VF = VFs[I];
7438
+ auto RUs = ::calculateRegisterUsage (*P, VFs, TTI, CM.ValuesToIgnore );
7439
+ for (auto [VF, RU] : zip_equal (VFs, RUs)) {
7464
7440
if (VF.isScalar ())
7465
7441
continue ;
7466
7442
if (!ForceVectorization && !willGenerateVectors (*P, VF, TTI)) {
@@ -7482,10 +7458,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7482
7458
InstructionCost Cost = cost (*P, VF);
7483
7459
VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7484
7460
7485
- if (CM.useMaxBandwidth (VF.isScalable ()
7486
- ? TargetTransformInfo::RGK_ScalableVector
7487
- : TargetTransformInfo::RGK_FixedWidthVector) &&
7488
- RUs[I].exceedsMaxNumRegs (TTI)) {
7461
+ if (RU.exceedsMaxNumRegs (TTI)) {
7489
7462
LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
7490
7463
<< VF << " because it uses too many registers\n " );
7491
7464
continue ;
0 commit comments