Skip to content

[DAG] shouldReduceLoadWidth - add optional<unsigned> byte offset argument #136723

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1823,9 +1823,13 @@ class TargetLoweringBase {
virtual bool ShouldShrinkFPConstant(EVT) const { return true; }

/// Return true if it is profitable to reduce a load to a smaller type.
/// \p ByteOffset is only set if we know the pointer offset at compile time
/// otherwise we should assume that additional pointer math is required.
/// Example: (i16 (trunc (i32 (load x))) -> i16 load x
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const {
/// Example: (i16 (trunc (srl (i32 (load x)), 16)) -> i16 load x+2
virtual bool shouldReduceLoadWidth(
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset = std::nullopt) const {
// By default, assume that it is cheaper to extract a subvector from a wide
// vector load rather than creating multiple narrow vector loads.
if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())
Expand Down
13 changes: 8 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6693,7 +6693,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
return false;

if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
return false;

return true;
Expand All @@ -6704,9 +6704,11 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
unsigned ShAmt) {
if (!LDST)
return false;

// Only allow byte offsets.
if (ShAmt % 8)
return false;
const unsigned ByteShAmt = ShAmt / 8;

// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
Expand All @@ -6730,8 +6732,6 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,

// Ensure that this isn't going to produce an unsupported memory access.
if (ShAmt) {
assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
const unsigned ByteShAmt = ShAmt / 8;
const Align LDSTAlign = LDST->getAlign();
const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
Expand Down Expand Up @@ -6771,7 +6771,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
return false;

if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
return false;
} else {
assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
Expand Down Expand Up @@ -25268,9 +25268,12 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {

// It's fine to use TypeSize here as we know the offset will not be negative.
TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
std::optional<unsigned> ByteOffset;
if (Offset.isFixed())
ByteOffset = Offset.getFixedValue();

const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
return SDValue();

// The narrow load will be offset from the base address of the old load if
Expand Down
20 changes: 12 additions & 8 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4726,8 +4726,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// for the narrowed load.
for (unsigned width = 8; width < origWidth; width *= 2) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
continue;
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
// Avoid accessing any padding here for now (we could use memWidth
// instead of origWidth here otherwise).
Expand All @@ -4737,8 +4735,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
unsigned ptrOffset =
Layout.isLittleEndian() ? offset : memWidth - width - offset;
unsigned IsFast = 0;
assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
if (allowsMemoryAccess(
if (shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT,
ptrOffset / 8) &&
allowsMemoryAccess(
*DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
IsFast) {
Expand Down Expand Up @@ -12175,24 +12176,27 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,

ISD::LoadExtType ExtTy =
ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();

std::optional<unsigned> ByteOffset;
Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
Alignment = commonAlignment(Alignment, PtrOff);
ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
Alignment = commonAlignment(Alignment, *ByteOffset);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}

if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
return SDValue();

unsigned IsFast = 0;
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
OriginalLoad->getAddressSpace(), Alignment,
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16521,11 +16521,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}

bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
bool AArch64TargetLowering::shouldReduceLoadWidth(
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
ByteOffset))
return false;

// If we're reducing the load width in order to avoid having to use an extra
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,8 @@ class AArch64TargetLowering : public TargetLowering {
MachineFunction &MF,
unsigned Intrinsic) const override;

bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const override;

bool shouldRemoveRedundantExtend(SDValue Op) const override;

Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,11 +819,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
}

bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
bool AMDGPUTargetLowering::shouldReduceLoadWidth(
SDNode *N, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT, ByteOffset))
return false;

unsigned NewSize = NewVT.getStoreSizeInBits();
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,8 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool ShouldShrinkFPConstant(EVT VT) const override;
bool shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtType,
EVT ExtVT) const override;
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT,
std::optional<unsigned> ByteOffset) const override;

bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
const MachineMemOperand &MMO) const final;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/BPF/BPFISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ class BPFTargetLowering : public TargetLowering {
// ctx = ctx + reloc_offset
// ... (*(u8 *)(ctx + 1)) & 0x80 ...
// which will be rejected by the verifier.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override {
bool
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const override {
return false;
}

Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3821,19 +3821,21 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
return TargetLowering::findRepresentativeClass(TRI, VT);
}

bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy, EVT NewVT) const {
bool HexagonTargetLowering::shouldReduceLoadWidth(
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
ByteOffset))
return false;

auto *L = cast<LoadSDNode>(Load);
std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
std::pair<SDValue, int> BO = getBaseAndOffset(L->getBasePtr());
// Small-data object, do not shrink.
if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
return false;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ class HexagonTargetLowering : public TargetLowering {
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
const override;

bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const override;

void AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const override;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3258,9 +3258,9 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
}

bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
bool X86TargetLowering::shouldReduceLoadWidth(
SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const {
assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");

// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1501,8 +1501,9 @@ namespace llvm {

/// Return true if we believe it is correct and profitable to reduce the
/// load node to a smaller type.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;
bool
shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
std::optional<unsigned> ByteOffset) const override;

/// Return true if the specified scalar FP type is computed in an SSE
/// register, not on the X87 floating point stack.
Expand Down
Loading