Skip to content

Commit bcc59ca

Browse files
committed
[DAG] shouldReduceLoadWidth - add optional<unsigned> byte offset argument
Based off feedback for #129695 - we need to be able to determine the load offset of smaller loads when trying to determine whether a multiple use load should be split (in particular for AVX subvector extractions). This patch adds a std::optional<unsigned> ByteOffset argument to shouldReduceLoadWidth calls for where we know the constant offset to allow targets to make use of it in future patches.
1 parent c607180 commit bcc59ca

File tree

12 files changed

+56
-42
lines changed

12 files changed

+56
-42
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,8 +1824,10 @@ class TargetLoweringBase {
18241824

18251825
/// Return true if it is profitable to reduce a load to a smaller type.
18261826
/// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1827-
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1828-
EVT NewVT) const {
1827+
/// Example: (i16 (trunc (srl (i32 (load x)), 16)) -> i16 load x+2
1828+
virtual bool shouldReduceLoadWidth(
1829+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1830+
std::optional<unsigned> ByteOffset = std::nullopt) const {
18291831
// By default, assume that it is cheaper to extract a subvector from a wide
18301832
// vector load rather than creating multiple narrow vector loads.
18311833
if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6690,7 +6690,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
66906690
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
66916691
return false;
66926692

6693-
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6693+
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
66946694
return false;
66956695

66966696
return true;
@@ -6701,9 +6701,11 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67016701
unsigned ShAmt) {
67026702
if (!LDST)
67036703
return false;
6704+
67046705
// Only allow byte offsets.
67056706
if (ShAmt % 8)
67066707
return false;
6708+
const unsigned ByteShAmt = ShAmt / 8;
67076709

67086710
// Do not generate loads of non-round integer types since these can
67096711
// be expensive (and would be wrong if the type is not byte sized).
@@ -6727,8 +6729,6 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67276729

67286730
// Ensure that this isn't going to produce an unsupported memory access.
67296731
if (ShAmt) {
6730-
assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6731-
const unsigned ByteShAmt = ShAmt / 8;
67326732
const Align LDSTAlign = LDST->getAlign();
67336733
const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
67346734
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
@@ -6768,7 +6768,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
67686768
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
67696769
return false;
67706770

6771-
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6771+
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
67726772
return false;
67736773
} else {
67746774
assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
@@ -25268,7 +25268,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
2526825268
TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
2526925269

2527025270
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25271-
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25271+
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT,
25272+
/*ByteOffset=*/Offset.getFixedValue()))
2527225273
return SDValue();
2527325274

2527425275
// The narrow load will be offset from the base address of the old load if

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4726,8 +4726,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
47264726
// for the narrowed load.
47274727
for (unsigned width = 8; width < origWidth; width *= 2) {
47284728
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4729-
if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4730-
continue;
47314729
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
47324730
// Avoid accessing any padding here for now (we could use memWidth
47334731
// instead of origWidth here otherwise).
@@ -4737,8 +4735,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
47374735
unsigned ptrOffset =
47384736
Layout.isLittleEndian() ? offset : memWidth - width - offset;
47394737
unsigned IsFast = 0;
4738+
assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
47404739
Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4741-
if (allowsMemoryAccess(
4740+
if (shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT,
4741+
ptrOffset / 8) &&
4742+
allowsMemoryAccess(
47424743
*DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
47434744
NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
47444745
IsFast) {
@@ -12175,24 +12176,27 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
1217512176

1217612177
ISD::LoadExtType ExtTy =
1217712178
ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12178-
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
12179-
!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
12179+
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
1218012180
return SDValue();
1218112181

12182+
std::optional<unsigned> ByteOffset;
1218212183
Align Alignment = OriginalLoad->getAlign();
1218312184
MachinePointerInfo MPI;
1218412185
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
1218512186
int Elt = ConstEltNo->getZExtValue();
12186-
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12187-
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12188-
Alignment = commonAlignment(Alignment, PtrOff);
12187+
ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12188+
MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12189+
Alignment = commonAlignment(Alignment, *ByteOffset);
1218912190
} else {
1219012191
// Discard the pointer info except the address space because the memory
1219112192
// operand can't represent this new access since the offset is variable.
1219212193
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
1219312194
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
1219412195
}
1219512196

12197+
if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12198+
return SDValue();
12199+
1219612200
unsigned IsFast = 0;
1219712201
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
1219812202
OriginalLoad->getAddressSpace(), Alignment,

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16519,11 +16519,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1651916519
return false;
1652016520
}
1652116521

16522-
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
16523-
ISD::LoadExtType ExtTy,
16524-
EVT NewVT) const {
16522+
bool AArch64TargetLowering::shouldReduceLoadWidth(
16523+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
16524+
std::optional<unsigned> ByteOffset) const {
1652516525
// TODO: This may be worth removing. Check regression tests for diffs.
16526-
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
16526+
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
16527+
ByteOffset))
1652716528
return false;
1652816529

1652916530
// If we're reducing the load width in order to avoid having to use an extra

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,8 @@ class AArch64TargetLowering : public TargetLowering {
695695
MachineFunction &MF,
696696
unsigned Intrinsic) const override;
697697

698-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
699-
EVT NewVT) const override;
698+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
699+
std::optional<unsigned> ByteOffset) const override;
700700

701701
bool shouldRemoveRedundantExtend(SDValue Op) const override;
702702

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -819,11 +819,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
819819
return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
820820
}
821821

822-
bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
823-
ISD::LoadExtType ExtTy,
824-
EVT NewVT) const {
822+
bool AMDGPUTargetLowering::shouldReduceLoadWidth(
823+
SDNode *N, ISD::LoadExtType ExtTy, EVT NewVT,
824+
std::optional<unsigned> ByteOffset) const {
825825
// TODO: This may be worth removing. Check regression tests for diffs.
826-
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
826+
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT, ByteOffset))
827827
return false;
828828

829829
unsigned NewSize = NewVT.getStoreSizeInBits();

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,8 @@ class AMDGPUTargetLowering : public TargetLowering {
215215
bool isFPImmLegal(const APFloat &Imm, EVT VT,
216216
bool ForCodeSize) const override;
217217
bool ShouldShrinkFPConstant(EVT VT) const override;
218-
bool shouldReduceLoadWidth(SDNode *Load,
219-
ISD::LoadExtType ExtType,
220-
EVT ExtVT) const override;
218+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT,
219+
std::optional<unsigned> ByteOffset) const override;
221220

222221
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
223222
const MachineMemOperand &MMO) const final;

llvm/lib/Target/BPF/BPFISelLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,9 @@ class BPFTargetLowering : public TargetLowering {
135135
// ctx = ctx + reloc_offset
136136
// ... (*(u8 *)(ctx + 1)) & 0x80 ...
137137
// which will be rejected by the verifier.
138-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
139-
EVT NewVT) const override {
138+
bool
139+
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
140+
std::optional<unsigned> ByteOffset) const override {
140141
return false;
141142
}
142143

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3821,19 +3821,21 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
38213821
return TargetLowering::findRepresentativeClass(TRI, VT);
38223822
}
38233823

3824-
bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
3825-
ISD::LoadExtType ExtTy, EVT NewVT) const {
3824+
bool HexagonTargetLowering::shouldReduceLoadWidth(
3825+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3826+
std::optional<unsigned> ByteOffset) const {
38263827
// TODO: This may be worth removing. Check regression tests for diffs.
3827-
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3828+
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
3829+
ByteOffset))
38283830
return false;
38293831

38303832
auto *L = cast<LoadSDNode>(Load);
3831-
std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
3833+
std::pair<SDValue, int> BO = getBaseAndOffset(L->getBasePtr());
38323834
// Small-data object, do not shrink.
38333835
if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
38343836
return false;
38353837
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3836-
auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3838+
auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
38373839
const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
38383840
return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
38393841
}

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,8 @@ class HexagonTargetLowering : public TargetLowering {
342342
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
343343
const override;
344344

345-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
346-
EVT NewVT) const override;
345+
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
346+
std::optional<unsigned> ByteOffset) const override;
347347

348348
void AdjustInstrPostInstrSelection(MachineInstr &MI,
349349
SDNode *Node) const override;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3258,9 +3258,9 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
32583258
return false;
32593259
}
32603260

3261-
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
3262-
ISD::LoadExtType ExtTy,
3263-
EVT NewVT) const {
3261+
bool X86TargetLowering::shouldReduceLoadWidth(
3262+
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3263+
std::optional<unsigned> ByteOffset) const {
32643264
assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
32653265

32663266
// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,9 @@ namespace llvm {
13461346
Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
13471347

13481348
return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1349+
Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
1350+
(Op.getOpcode() == ISD::LOAD &&
1351+
getTargetConstantFromLoad(cast<LoadSDNode>(Op))) ||
13491352
TargetLowering::isTargetCanonicalConstantNode(Op);
13501353
}
13511354

@@ -1501,8 +1504,9 @@ namespace llvm {
15011504

15021505
/// Return true if we believe it is correct and profitable to reduce the
15031506
/// load node to a smaller type.
1504-
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1505-
EVT NewVT) const override;
1507+
bool
1508+
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1509+
std::optional<unsigned> ByteOffset) const override;
15061510

15071511
/// Return true if the specified scalar FP type is computed in an SSE
15081512
/// register, not on the X87 floating point stack.

0 commit comments

Comments
 (0)