Skip to content

Commit 8150ab9

Browse files
[LoopVectorize] Use CodeSize as the cost kind for minsize (llvm#124119)
Functions marked with minsize should aim for minimum code size, so the vectorizer should use CodeSize for the cost kind and also the cost we compare should be the cost for the entire loop: it shouldn't be divided by the number of vector elements and block costs shouldn't be divided by the block probability. Possibly we should also be doing this for optsize as well, but there are a lot of tests that assume the current behaviour and the definition of optsize is less clear than minsize (for minsize the goal is to "keep the code size of this function as small as possible" whereas for optsize it's "keep the code size of this function low").
1 parent 63caaa2 commit 8150ab9

File tree

5 files changed

+2335
-10
lines changed

5 files changed

+2335
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -989,9 +989,10 @@ class LoopVectorizationCostModel {
989989
InterleavedAccessInfo &IAI)
990990
: ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
991991
TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
992-
Hints(Hints), InterleaveInfo(IAI), CostKind(TTI::TCK_RecipThroughput) {
992+
Hints(Hints), InterleaveInfo(IAI) {
993993
if (TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors)
994994
initializeVScaleForTuning();
995+
CostKind = F->hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput;
995996
}
996997

997998
/// \return An upper bound for the vectorization factors (both fixed and
@@ -3393,7 +3394,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
33933394
// Scale the cost by the probability of executing the predicated blocks.
33943395
// This assumes the predicated block for each vector lane is equally
33953396
// likely.
3396-
ScalarizationCost = ScalarizationCost / getReciprocalPredBlockProb();
3397+
ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor(CostKind);
33973398
}
33983399
InstructionCost SafeDivisorCost = 0;
33993400

@@ -4311,6 +4312,13 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43114312
EstimatedWidthB *= *VScale;
43124313
}
43134314

4315+
// When optimizing for size choose whichever is smallest, which will be the
4316+
// one with the smallest cost for the whole loop. On a tie pick the larger
4317+
// vector width, on the assumption that throughput will be greater.
4318+
if (CM.CostKind == TTI::TCK_CodeSize)
4319+
return CostA < CostB ||
4320+
(CostA == CostB && EstimatedWidthA > EstimatedWidthB);
4321+
43144322
// Assume vscale may be larger than 1 (or the value being tuned for),
43154323
// so that scalable vectorization is slightly favorable over fixed-width
43164324
// vectorization.
@@ -5553,7 +5561,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55535561
}
55545562

55555563
// Scale the total scalar cost by block probability.
5556-
ScalarCost /= getReciprocalPredBlockProb();
5564+
ScalarCost /= getPredBlockCostDivisor(CostKind);
55575565

55585566
// Compute the discount. A non-negative discount means the vector version
55595567
// of the instruction costs more, and scalarizing would be beneficial.
@@ -5606,7 +5614,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
56065614
// cost by the probability of executing it. blockNeedsPredication from
56075615
// Legal is used so as to not include all blocks in tail folded loops.
56085616
if (VF.isScalar() && Legal->blockNeedsPredication(BB))
5609-
BlockCost /= getReciprocalPredBlockProb();
5617+
BlockCost /= getPredBlockCostDivisor(CostKind);
56105618

56115619
Cost += BlockCost;
56125620
}
@@ -5684,7 +5692,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
56845692
// conditional branches, but may not be executed for each vector lane. Scale
56855693
// the cost by the probability of executing the predicated block.
56865694
if (isPredicatedInst(I)) {
5687-
Cost /= getReciprocalPredBlockProb();
5695+
Cost /= getPredBlockCostDivisor(CostKind);
56885696

56895697
// Add the cost of an i1 extract and a branch
56905698
auto *VecI1Ty =

llvm/lib/Transforms/Vectorize/VPlan.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
827827
// For the scalar case, we may not always execute the original predicated
828828
// block, Thus, scale the block's cost by the probability of executing it.
829829
if (VF.isScalar())
830-
return ThenCost / getReciprocalPredBlockProb();
830+
return ThenCost / getPredBlockCostDivisor(Ctx.CostKind);
831831

832832
return ThenCost;
833833
}

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,20 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
4848
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
4949
int64_t Step);
5050

51-
/// A helper function that returns the reciprocal of the block probability of
52-
/// predicated blocks. If we return X, we are assuming the predicated block
53-
/// will execute once for every X iterations of the loop header.
51+
/// A helper function that returns how much we should divide the cost of a
52+
/// predicated block by. Typically this is the reciprocal of the block
53+
/// probability, i.e. if we return X we are assuming the predicated block will
54+
/// execute once for every X iterations of the loop header so the block should
55+
/// only contribute 1/X of its cost to the total cost calculation, but when
56+
/// optimizing for code size it will just be 1 as code size costs don't depend
57+
/// on execution probabilities.
5458
///
5559
/// TODO: We should use actual block probability here, if available. Currently,
5660
/// we always assume predicated blocks have a 50% chance of executing.
57-
inline unsigned getReciprocalPredBlockProb() { return 2; }
61+
inline unsigned
62+
getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) {
63+
return CostKind == TTI::TCK_CodeSize ? 1 : 2;
64+
}
5865

5966
/// A range of powers-of-2 vectorization factors with fixed start and
6067
/// adjustable end. The range includes start and excludes end, e.g.,:

0 commit comments

Comments
 (0)