Skip to content

Commit 9fe563b

Browse files
committed
[SelectionDAG][X86] Remove unused elements from atomic vector.
After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8
1 parent eda6b72 commit 9fe563b

File tree

5 files changed

+90
-172
lines changed

5 files changed

+90
-172
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,7 +1873,7 @@ class SelectionDAG {
18731873
/// chain to the token factor. This ensures that the new memory node will have
18741874
/// the same relative memory dependency position as the old load. Returns the
18751875
/// new merged load chain.
1876-
SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
1876+
SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
18771877

18781878
/// Topological-sort the AllNodes list and a
18791879
/// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
23112311
/// merged. Check that both are nonvolatile and if LD is loading
23122312
/// 'Bytes' bytes from a location that is 'Dist' units away from the
23132313
/// location that the 'Base' load is loading from.
2314-
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
2314+
bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
23152315
unsigned Bytes, int Dist) const;
23162316

23172317
/// Infer alignment of a load / store address. Return std::nullopt if it

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
1221512215
return TokenFactor;
1221612216
}
1221712217

12218-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
12218+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
1221912219
SDValue NewMemOp) {
1222012220
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
1222112221
SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
1290512905
getBuildVector(NewOvVT, dl, OvScalars));
1290612906
}
1290712907

12908-
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
12909-
LoadSDNode *Base,
12908+
bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
12909+
MemSDNode *Base,
1291012910
unsigned Bytes,
1291112911
int Dist) const {
1291212912
if (LD->isVolatile() || Base->isVolatile())
1291312913
return false;
12914-
// TODO: probably too restrictive for atomics, revisit
12915-
if (!LD->isSimple())
12916-
return false;
12917-
if (LD->isIndexed() || Base->isIndexed())
12918-
return false;
12914+
if (auto Ld = dyn_cast<LoadSDNode>(LD)) {
12915+
if (!Ld->isSimple())
12916+
return false;
12917+
if (Ld->isIndexed())
12918+
return false;
12919+
}
12920+
if (auto Ld = dyn_cast<LoadSDNode>(Base))
12921+
if (Ld->isIndexed())
12922+
return false;
1291912923
if (LD->getChain() != Base->getChain())
1292012924
return false;
1292112925
EVT VT = LD->getMemoryVT();

llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
195195
}
196196

197197
/// Parses tree in Ptr for base, index, offset addresses.
198-
static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
199-
const SelectionDAG &DAG) {
198+
template <typename T>
199+
static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
200200
SDValue Ptr = N->getBasePtr();
201201

202202
// (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
206206
bool IsIndexSignExt = false;
207207

208208
// pre-inc/pre-dec ops are components of EA.
209-
if (N->getAddressingMode() == ISD::PRE_INC) {
210-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
211-
Offset += C->getSExtValue();
212-
else // If unknown, give up now.
213-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
214-
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
215-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
216-
Offset -= C->getSExtValue();
217-
else // If unknown, give up now.
218-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
209+
if constexpr (std::is_same_v<T, LSBaseSDNode>) {
210+
if (N->getAddressingMode() == ISD::PRE_INC) {
211+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
212+
Offset += C->getSExtValue();
213+
else // If unknown, give up now.
214+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
215+
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
216+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
217+
Offset -= C->getSExtValue();
218+
else // If unknown, give up now.
219+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
220+
}
219221
}
220222

221223
// Consume constant adds & ors with appropriate masking.
@@ -300,8 +302,10 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
300302

301303
BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
302304
const SelectionDAG &DAG) {
305+
if (const auto *AN = dyn_cast<AtomicSDNode>(N))
306+
return matchSDNode(AN, DAG);
303307
if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
304-
return matchLSNode(LS0, DAG);
308+
return matchSDNode(LS0, DAG);
305309
if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
306310
if (LN->hasOffset())
307311
return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7193,15 +7193,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
71937193
}
71947194

71957195
// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
7196-
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
7197-
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7198-
auto *BaseLd = cast<LoadSDNode>(Elt);
7199-
if (!BaseLd->isSimple())
7200-
return false;
7196+
static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) {
7197+
if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) {
72017198
Ld = BaseLd;
72027199
ByteOffset = 0;
72037200
return true;
7204-
}
7201+
} else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt))
7202+
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7203+
if (!BaseLd->isSimple())
7204+
return false;
7205+
Ld = BaseLd;
7206+
ByteOffset = 0;
7207+
return true;
7208+
}
72057209

72067210
switch (Elt.getOpcode()) {
72077211
case ISD::BITCAST:
@@ -7254,7 +7258,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72547258
APInt ZeroMask = APInt::getZero(NumElems);
72557259
APInt UndefMask = APInt::getZero(NumElems);
72567260

7257-
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
7261+
SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr);
72587262
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
72597263

72607264
// For each element in the initializer, see if we've found a load, zero or an
@@ -7304,7 +7308,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73047308
EVT EltBaseVT = EltBase.getValueType();
73057309
assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
73067310
"Register/Memory size mismatch");
7307-
LoadSDNode *LDBase = Loads[FirstLoadedElt];
7311+
MemSDNode *LDBase = Loads[FirstLoadedElt];
73087312
assert(LDBase && "Did not find base load for merging consecutive loads");
73097313
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
73107314
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
@@ -7318,8 +7322,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73187322

73197323
// Check to see if the element's load is consecutive to the base load
73207324
// or offset from a previous (already checked) load.
7321-
auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
7322-
LoadSDNode *Ld = Loads[EltIdx];
7325+
auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) {
7326+
MemSDNode *Ld = Loads[EltIdx];
73237327
int64_t ByteOffset = ByteOffsets[EltIdx];
73247328
if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
73257329
int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
@@ -7347,7 +7351,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73477351
}
73487352
}
73497353

7350-
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
7354+
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) {
73517355
auto MMOFlags = LDBase->getMemOperand()->getFlags();
73527356
assert(LDBase->isSimple() &&
73537357
"Cannot merge volatile or atomic loads.");
@@ -9452,8 +9456,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
94529456
{
94539457
SmallVector<SDValue, 64> Ops(Op->ops().take_front(NumElems));
94549458
if (SDValue LD =
9455-
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
9459+
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) {
94569460
return LD;
9461+
}
94579462
}
94589463

94599464
// If this is a splat of pairs of 32-bit elements, we can use a narrower
@@ -60388,6 +60393,35 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
6038860393
return SDValue();
6038960394
}
6039060395

60396+
static SDValue combineVZEXT_LOAD(SDNode *N, SelectionDAG &DAG,
60397+
TargetLowering::DAGCombinerInfo &DCI) {
60398+
// Find the TokenFactor to locate the associated AtomicLoad.
60399+
SDNode *ALD = nullptr;
60400+
for (auto &TF : DAG.allnodes())
60401+
if (TF.getOpcode() == ISD::TokenFactor) {
60402+
SDValue L = TF.getOperand(0);
60403+
SDValue R = TF.getOperand(1);
60404+
if (L.getNode() == N)
60405+
ALD = R.getNode();
60406+
else if (R.getNode() == N)
60407+
ALD = L.getNode();
60408+
}
60409+
60410+
if (!ALD)
60411+
return SDValue();
60412+
if (!isa<AtomicSDNode>(ALD))
60413+
return SDValue();
60414+
60415+
// Replace the VZEXT_LOAD with the AtomicLoad.
60416+
SDLoc dl(N);
60417+
SDValue SV =
60418+
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
60419+
N->getValueType(0).changeTypeToInteger(), SDValue(ALD, 0));
60420+
SDValue BC = DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), SV);
60421+
BC = DCI.CombineTo(N, BC, SDValue(ALD, 1));
60422+
return BC;
60423+
}
60424+
6039160425
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6039260426
DAGCombinerInfo &DCI) const {
6039360427
SelectionDAG &DAG = DCI.DAG;
@@ -60584,6 +60618,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6058460618
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
6058560619
case ISD::FP_TO_SINT_SAT:
6058660620
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
60621+
case X86ISD::VZEXT_LOAD: return combineVZEXT_LOAD(N, DAG, DCI);
6058760622
// clang-format on
6058860623
}
6058960624

0 commit comments

Comments
 (0)