Skip to content

Commit 4ac2721

Browse files
authored
[AArch64] Add costs for ST3 and ST4 instructions, modelled as store(shuffle). (llvm#87934)
This tries to add some costs for the shuffle in a ST3/ST4 instruction, which are represented in LLVM IR as store(interleaving shuffle). In order to detect the store, it needs to add a CxtI context instruction to check the users of the shuffle. LD3 and LD4 are added, LD2 should be a zip1 shuffle, which will be added in another patch. It should help fix some of the regressions from llvm#87510.
1 parent e280407 commit 4ac2721

22 files changed

+121
-92
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+13-13
Original file line numberDiff line numberDiff line change
@@ -1291,12 +1291,11 @@ class TargetTransformInfo {
12911291
/// passed through \p Args, which helps improve the cost estimation in some
12921292
/// cases, like in broadcast loads.
12931293
/// NOTE: For subvector extractions Tp represents the source type.
1294-
InstructionCost
1295-
getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1296-
ArrayRef<int> Mask = std::nullopt,
1297-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1298-
int Index = 0, VectorType *SubTp = nullptr,
1299-
ArrayRef<const Value *> Args = std::nullopt) const;
1294+
InstructionCost getShuffleCost(
1295+
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
1296+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
1297+
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
1298+
const Instruction *CxtI = nullptr) const;
13001299

13011300
/// Represents a hint about the context in which a cast is used.
13021301
///
@@ -2008,11 +2007,10 @@ class TargetTransformInfo::Concept {
20082007
const SmallBitVector &OpcodeMask,
20092008
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
20102009

2011-
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2012-
ArrayRef<int> Mask,
2013-
TTI::TargetCostKind CostKind,
2014-
int Index, VectorType *SubTp,
2015-
ArrayRef<const Value *> Args) = 0;
2010+
virtual InstructionCost
2011+
getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
2012+
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
2013+
ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
20162014
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
20172015
Type *Src, CastContextHint CCH,
20182016
TTI::TargetCostKind CostKind,
@@ -2647,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
26472645
ArrayRef<int> Mask,
26482646
TTI::TargetCostKind CostKind, int Index,
26492647
VectorType *SubTp,
2650-
ArrayRef<const Value *> Args) override {
2651-
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2648+
ArrayRef<const Value *> Args,
2649+
const Instruction *CxtI) override {
2650+
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2651+
CxtI);
26522652
}
26532653
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
26542654
CastContextHint CCH,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+25-19
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,12 @@ class TargetTransformInfoImplBase {
579579
return InstructionCost::getInvalid();
580580
}
581581

582-
InstructionCost
583-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
584-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
585-
ArrayRef<const Value *> Args = std::nullopt) const {
582+
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
583+
ArrayRef<int> Mask,
584+
TTI::TargetCostKind CostKind, int Index,
585+
VectorType *SubTp,
586+
ArrayRef<const Value *> Args = std::nullopt,
587+
const Instruction *CxtI = nullptr) const {
586588
return 1;
587589
}
588590

@@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
13411343
if (Shuffle->isExtractSubvectorMask(SubIndex))
13421344
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
13431345
Mask, CostKind, SubIndex, VecTy,
1344-
Operands);
1346+
Operands, Shuffle);
13451347

13461348
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
13471349
return TargetTTI->getShuffleCost(
13481350
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
13491351
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1350-
Operands);
1352+
Operands, Shuffle);
13511353

13521354
int ReplicationFactor, VF;
13531355
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
@@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
13741376

13751377
return TargetTTI->getShuffleCost(
13761378
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1377-
AdjustMask, CostKind, 0, nullptr);
1379+
AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
13781380
}
13791381

13801382
// Narrowing shuffle - perform shuffle at original wider width and
@@ -1383,49 +1385,53 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
13831385

13841386
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
13851387
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1386-
VecSrcTy, AdjustMask, CostKind, 0, nullptr);
1388+
VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
13871389

13881390
SmallVector<int, 16> ExtractMask(Mask.size());
13891391
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1390-
return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector,
1391-
VecSrcTy, ExtractMask,
1392-
CostKind, 0, VecTy);
1392+
return ShuffleCost + TargetTTI->getShuffleCost(
1393+
TTI::SK_ExtractSubvector, VecSrcTy,
1394+
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
13931395
}
13941396

13951397
if (Shuffle->isIdentity())
13961398
return 0;
13971399

13981400
if (Shuffle->isReverse())
13991401
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1400-
0, nullptr, Operands);
1402+
0, nullptr, Operands, Shuffle);
14011403

14021404
if (Shuffle->isSelect())
14031405
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1404-
0, nullptr, Operands);
1406+
0, nullptr, Operands, Shuffle);
14051407

14061408
if (Shuffle->isTranspose())
14071409
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1408-
CostKind, 0, nullptr, Operands);
1410+
CostKind, 0, nullptr, Operands,
1411+
Shuffle);
14091412

14101413
if (Shuffle->isZeroEltSplat())
14111414
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1412-
CostKind, 0, nullptr, Operands);
1415+
CostKind, 0, nullptr, Operands,
1416+
Shuffle);
14131417

14141418
if (Shuffle->isSingleSource())
14151419
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1416-
CostKind, 0, nullptr, Operands);
1420+
CostKind, 0, nullptr, Operands,
1421+
Shuffle);
14171422

14181423
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
14191424
return TargetTTI->getShuffleCost(
14201425
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1421-
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
1426+
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1427+
Shuffle);
14221428

14231429
if (Shuffle->isSplice(SubIndex))
14241430
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1425-
SubIndex, nullptr, Operands);
1431+
SubIndex, nullptr, Operands, Shuffle);
14261432

14271433
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1428-
CostKind, 0, nullptr, Operands);
1434+
CostKind, 0, nullptr, Operands, Shuffle);
14291435
}
14301436
case Instruction::ExtractElement: {
14311437
auto *EEI = dyn_cast<ExtractElementInst>(U);

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
10181018
ArrayRef<int> Mask,
10191019
TTI::TargetCostKind CostKind, int Index,
10201020
VectorType *SubTp,
1021-
ArrayRef<const Value *> Args = std::nullopt) {
1021+
ArrayRef<const Value *> Args = std::nullopt,
1022+
const Instruction *CxtI = nullptr) {
10221023
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
10231024
case TTI::SK_Broadcast:
10241025
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))

llvm/lib/Analysis/TargetTransformInfo.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
916916
InstructionCost TargetTransformInfo::getShuffleCost(
917917
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
918918
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
919-
ArrayRef<const Value *> Args) const {
920-
InstructionCost Cost =
921-
TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
919+
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
920+
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
921+
Index, SubTp, Args, CxtI);
922922
assert(Cost >= 0 && "TTI should not produce negative costs!");
923923
return Cost;
924924
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+20-8
Original file line numberDiff line numberDiff line change
@@ -3815,18 +3815,29 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
38153815
return LegalizationCost * LT.first;
38163816
}
38173817

3818-
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
3819-
VectorType *Tp,
3820-
ArrayRef<int> Mask,
3821-
TTI::TargetCostKind CostKind,
3822-
int Index, VectorType *SubTp,
3823-
ArrayRef<const Value *> Args) {
3818+
InstructionCost AArch64TTIImpl::getShuffleCost(
3819+
TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
3820+
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
3821+
ArrayRef<const Value *> Args, const Instruction *CxtI) {
38243822
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
3823+
38253824
// If we have a Mask, and the LT is being legalized somehow, split the Mask
38263825
// into smaller vectors and sum the cost of each shuffle.
38273826
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
38283827
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
38293828
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
3829+
3830+
// Check for ST3/ST4 instructions, which are represented in llvm IR as
3831+
// store(interleaving-shuffle). The shuffle cost could potentially be free,
3832+
// but we model it with a cost of LT.first so that LD3/LD3 have a higher
3833+
// cost than just the store.
3834+
if (CxtI && CxtI->hasOneUse() && isa<StoreInst>(*CxtI->user_begin()) &&
3835+
(ShuffleVectorInst::isInterleaveMask(
3836+
Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) ||
3837+
ShuffleVectorInst::isInterleaveMask(
3838+
Mask, 3, Tp->getElementCount().getKnownMinValue() * 2)))
3839+
return LT.first;
3840+
38303841
unsigned TpNumElts = Mask.size();
38313842
unsigned LTNumElts = LT.second.getVectorNumElements();
38323843
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
@@ -3874,7 +3885,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
38743885
if (NumSources <= 2)
38753886
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
38763887
: TTI::SK_PermuteTwoSrc,
3877-
NTp, NMask, CostKind, 0, nullptr, Args);
3888+
NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
38783889
else if (any_of(enumerate(NMask), [&](const auto &ME) {
38793890
return ME.value() % LTNumElts == ME.index();
38803891
}))
@@ -4055,7 +4066,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
40554066
// Restore optimal kind.
40564067
if (IsExtractSubvector)
40574068
Kind = TTI::SK_ExtractSubvector;
4058-
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
4069+
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
4070+
CxtI);
40594071
}
40604072

40614073
static bool containsDecreasingPointers(Loop *TheLoop,

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
393393
ArrayRef<int> Mask,
394394
TTI::TargetCostKind CostKind, int Index,
395395
VectorType *SubTp,
396-
ArrayRef<const Value *> Args = std::nullopt);
396+
ArrayRef<const Value *> Args = std::nullopt,
397+
const Instruction *CxtI = nullptr);
397398

398399
InstructionCost getScalarizationOverhead(VectorType *Ty,
399400
const APInt &DemandedElts,

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11271127
VectorType *VT, ArrayRef<int> Mask,
11281128
TTI::TargetCostKind CostKind,
11291129
int Index, VectorType *SubTp,
1130-
ArrayRef<const Value *> Args) {
1130+
ArrayRef<const Value *> Args,
1131+
const Instruction *CxtI) {
11311132
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
11321133
// Treat extractsubvector as single op permutation.
11331134
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
234234
ArrayRef<int> Mask,
235235
TTI::TargetCostKind CostKind, int Index,
236236
VectorType *SubTp,
237-
ArrayRef<const Value *> Args = std::nullopt);
237+
ArrayRef<const Value *> Args = std::nullopt,
238+
const Instruction *CxtI = nullptr);
238239

239240
bool areInlineCompatible(const Function *Caller,
240241
const Function *Callee) const;

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
12121212
VectorType *Tp, ArrayRef<int> Mask,
12131213
TTI::TargetCostKind CostKind,
12141214
int Index, VectorType *SubTp,
1215-
ArrayRef<const Value *> Args) {
1215+
ArrayRef<const Value *> Args,
1216+
const Instruction *CxtI) {
12161217
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
12171218
// Treat extractsubvector as single op permutation.
12181219
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
220220
ArrayRef<int> Mask,
221221
TTI::TargetCostKind CostKind, int Index,
222222
VectorType *SubTp,
223-
ArrayRef<const Value *> Args = std::nullopt);
223+
ArrayRef<const Value *> Args = std::nullopt,
224+
const Instruction *CxtI = nullptr);
224225

225226
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
226227
TTI::ReductionFlags Flags) const;

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
230230
ArrayRef<int> Mask,
231231
TTI::TargetCostKind CostKind,
232232
int Index, Type *SubTp,
233-
ArrayRef<const Value *> Args) {
233+
ArrayRef<const Value *> Args,
234+
const Instruction *CxtI) {
234235
return 1;
235236
}
236237

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
122122
ArrayRef<int> Mask,
123123
TTI::TargetCostKind CostKind, int Index,
124124
Type *SubTp,
125-
ArrayRef<const Value *> Args = std::nullopt);
125+
ArrayRef<const Value *> Args = std::nullopt,
126+
const Instruction *CxtI = nullptr);
126127
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
127128
const Value *Ptr, bool VariableMask,
128129
Align Alignment,

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,8 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
607607
ArrayRef<int> Mask,
608608
TTI::TargetCostKind CostKind,
609609
int Index, Type *SubTp,
610-
ArrayRef<const Value *> Args) {
610+
ArrayRef<const Value *> Args,
611+
const Instruction *CxtI) {
611612

612613
InstructionCost CostFactor =
613614
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
112112
ArrayRef<int> Mask,
113113
TTI::TargetCostKind CostKind, int Index,
114114
Type *SubTp,
115-
ArrayRef<const Value *> Args = std::nullopt);
115+
ArrayRef<const Value *> Args = std::nullopt,
116+
const Instruction *CxtI = nullptr);
116117
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
117118
TTI::CastContextHint CCH,
118119
TTI::TargetCostKind CostKind,

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
329329
VectorType *Tp, ArrayRef<int> Mask,
330330
TTI::TargetCostKind CostKind,
331331
int Index, VectorType *SubTp,
332-
ArrayRef<const Value *> Args) {
332+
ArrayRef<const Value *> Args,
333+
const Instruction *CxtI) {
333334
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
334335

335336
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
146146
ArrayRef<int> Mask,
147147
TTI::TargetCostKind CostKind, int Index,
148148
VectorType *SubTp,
149-
ArrayRef<const Value *> Args = std::nullopt);
149+
ArrayRef<const Value *> Args = std::nullopt,
150+
const Instruction *CxtI = nullptr);
150151

151152
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
152153
TTI::TargetCostKind CostKind);

0 commit comments

Comments
 (0)