Skip to content

Commit 3a82883

Browse files
committed
[SelectionDAG][X86] Widen <2 x T> vector types for atomic load
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1
1 parent e1008c7 commit 3a82883

File tree

4 files changed

+159
-6
lines changed

4 files changed

+159
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10461046
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
10471047
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
10481048
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
1049+
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
10491050
SDValue WidenVecRes_LOAD(SDNode* N);
10501051
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
10511052
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
11291130
/// resulting wider type. It takes:
11301131
/// LdChain: list of chains for the load to be generated.
11311132
/// Ld: load to widen
1132-
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
1133-
LoadSDNode *LD);
1133+
template <typename T>
1134+
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, T *LD,
1135+
bool IsAtomic = false);
11341136

11351137
/// Helper function to generate a set of extension loads to load a vector with
11361138
/// a resulting wider type. It takes:

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
45154515
break;
45164516
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
45174517
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
4518+
case ISD::ATOMIC_LOAD:
4519+
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
4520+
break;
45184521
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
45194522
case ISD::STEP_VECTOR:
45204523
case ISD::SPLAT_VECTOR:
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
59015904
N->getOperand(1), N->getOperand(2));
59025905
}
59035906

5907+
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
5908+
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
5909+
SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
5910+
5911+
if (Result) {
5912+
// If we generate a single load, we can use that for the chain. Otherwise,
5913+
// build a factor node to remember the multiple loads are independent and
5914+
// chain to that.
5915+
SDValue NewChain;
5916+
if (LdChain.size() == 1)
5917+
NewChain = LdChain[0];
5918+
else
5919+
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
5920+
5921+
// Modified the chain - switch anything that used the old chain to use
5922+
// the new one.
5923+
ReplaceValueWith(SDValue(N, 1), NewChain);
5924+
5925+
return Result;
5926+
}
5927+
5928+
report_fatal_error("Unable to widen atomic vector load");
5929+
}
5930+
59045931
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
59055932
LoadSDNode *LD = cast<LoadSDNode>(N);
59065933
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
76997726
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
77007727
}
77017728

7729+
template <typename T>
77027730
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
7703-
LoadSDNode *LD) {
7731+
T *LD, bool IsAtomic) {
77047732
// The strategy assumes that we can efficiently load power-of-two widths.
77057733
// The routine chops the vector into the largest vector loads with the same
77067734
// element type or scalar loads and then recombines it to the widen vector
@@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
77577785
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
77587786
}
77597787

7760-
SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7761-
LD->getOriginalAlign(), MMOFlags, AAInfo);
7788+
SDValue LdOp;
7789+
if (IsAtomic)
7790+
LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
7791+
BasePtr, LD->getMemOperand());
7792+
else
7793+
LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7794+
LD->getOriginalAlign(), MMOFlags, AAInfo);
77627795
LdChain.push_back(LdOp.getValue(1));
77637796

77647797
// Check if we can load the element with one instruction.

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2672,7 +2672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
26722672
ISD::STRICT_FP_ROUND,
26732673
ISD::INTRINSIC_VOID,
26742674
ISD::INTRINSIC_WO_CHAIN,
2675-
ISD::INTRINSIC_W_CHAIN});
2675+
ISD::INTRINSIC_W_CHAIN,
2676+
ISD::ATOMIC_LOAD});
26762677

26772678
computeRegisterProperties(Subtarget.getRegisterInfo());
26782679

@@ -52242,6 +52243,81 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
5224252243
return SDValue();
5224352244
}
5224452245

52246+
static MVT getScalarTypeFromVectorType(MVT VT) {
52247+
if (VT == MVT::v2i8)
52248+
return MVT::i16;
52249+
if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16)
52250+
return MVT::i32;
52251+
if (VT == MVT::v2i32 || VT == MVT::v2f32)
52252+
return MVT::i64;
52253+
LLVM_DEBUG(dbgs() << VT << '\n');
52254+
llvm_unreachable("Invalid VT for scalar type translation");
52255+
}
52256+
52257+
static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
52258+
TargetLowering::DAGCombinerInfo &DCI,
52259+
const X86Subtarget &Subtarget) {
52260+
auto &MRI = DAG.getMachineFunction().getRegInfo();
52261+
auto &TRI = *Subtarget.getRegisterInfo();
52262+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
52263+
auto *Ld = cast<AtomicSDNode>(N);
52264+
SDLoc dl(Ld);
52265+
EVT RegVT = Ld->getValueType(0);
52266+
assert(RegVT == Ld->getMemoryVT());
52267+
EVT OldVT = N->getValueType(0);
52268+
52269+
// If N has a vector type, then load the elements together.
52270+
// i.e. `v2i16 = AtomicLoad` is treated as
52271+
// `i32 = AtomicLoad`.
52272+
if (OldVT.isVector() && OldVT.getVectorNumElements() == 2) {
52273+
MVT VT = getScalarTypeFromVectorType(N->getValueType(0).getSimpleVT());
52274+
52275+
SDValue NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT,
52276+
Ld->getChain(), Ld->getBasePtr(),
52277+
Ld->getMemOperand());
52278+
52279+
// Find the register to copy into so that its vector register may be
52280+
// rewritten with a scalar register.
52281+
SDNode *CopyToReg = nullptr;
52282+
for (SDNode *User : N->users())
52283+
if (User->getOpcode() == ISD::CopyToReg) {
52284+
CopyToReg = User;
52285+
break;
52286+
}
52287+
52288+
if (CopyToReg) {
52289+
Register RegToCopyTo = cast<RegisterSDNode>(CopyToReg->getOperand(1))->getReg();
52290+
52291+
// Check if it is legal to replace the register.
52292+
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(RegToCopyTo);
52293+
assert(RC);
52294+
unsigned PhysRegSize = TRI.getRegSizeInBits(*RC);
52295+
const bool IsPhysRegToReplace = Register::isPhysicalRegister(RegToCopyTo) &&
52296+
(PhysRegSize == VT.getSizeInBits() || RegToCopyTo == X86::XMM0);
52297+
const bool IsVirtRegToReplace = Register::isVirtualRegister(RegToCopyTo) &&
52298+
TLI.getRegClassFor(VT) == MRI.getRegClass(RegToCopyTo);
52299+
52300+
if (IsPhysRegToReplace || IsVirtRegToReplace) {
52301+
SDValue VecReg = CopyToReg->getOperand(1);
52302+
SDValue Glue = NewLd.getValue(0);
52303+
Register NewReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
52304+
SDValue Ret = DAG.getCopyToReg(NewLd, dl, NewReg, NewLd, Glue);
52305+
52306+
// Replace the register.
52307+
DAG.ReplaceAllUsesOfValueWith(VecReg, DAG.getRegister(NewReg, VT));
52308+
52309+
// Replace the nodes.
52310+
DAG.ReplaceAllUsesOfValueWith(SDValue(CopyToReg, 1), Ret.getValue(1));
52311+
DAG.ReplaceAllUsesOfValueWith(SDValue(CopyToReg, 0), Ret.getValue(0));
52312+
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLd);
52313+
return NewLd;
52314+
}
52315+
}
52316+
}
52317+
52318+
return SDValue();
52319+
}
52320+
5224552321
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
5224652322
TargetLowering::DAGCombinerInfo &DCI,
5224752323
const X86Subtarget &Subtarget) {
@@ -59172,6 +59248,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5917259248
case ISD::AVGFLOORU: return combineAVG(N, DAG, DCI, Subtarget);
5917359249
case X86ISD::BEXTR:
5917459250
case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
59251+
case ISD::ATOMIC_LOAD: return combineAtomicLoad(N, DAG, DCI, Subtarget);
5917559252
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
5917659253
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
5917759254
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,47 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146
ret <1 x i64> %ret
147147
}
148148

149+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
150+
; CHECK3-LABEL: atomic_vec2_i8:
151+
; CHECK3: ## %bb.0:
152+
; CHECK3-NEXT: movzwl (%rdi), %eax
153+
; CHECK3-NEXT: retq
154+
;
155+
; CHECK0-LABEL: atomic_vec2_i8:
156+
; CHECK0: ## %bb.0:
157+
; CHECK0-NEXT: movw (%rdi), %ax
158+
; CHECK0-NEXT: retq
159+
%ret = load atomic <2 x i8>, ptr %x acquire, align 4
160+
ret <2 x i8> %ret
161+
}
162+
163+
define <2 x i16> @atomic_vec2_i16(ptr %x) {
164+
; CHECK-LABEL: atomic_vec2_i16:
165+
; CHECK: ## %bb.0:
166+
; CHECK-NEXT: movl (%rdi), %eax
167+
; CHECK-NEXT: retq
168+
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
169+
ret <2 x i16> %ret
170+
}
171+
172+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
173+
; CHECK-LABEL: atomic_vec2_i32_align:
174+
; CHECK: ## %bb.0:
175+
; CHECK-NEXT: movq (%rdi), %rax
176+
; CHECK-NEXT: retq
177+
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
178+
ret <2 x i32> %ret
179+
}
180+
181+
define <2 x float> @atomic_vec2_float_align(ptr %x) {
182+
; CHECK-LABEL: atomic_vec2_float_align:
183+
; CHECK: ## %bb.0:
184+
; CHECK-NEXT: movq (%rdi), %rax
185+
; CHECK-NEXT: retq
186+
%ret = load atomic <2 x float>, ptr %x acquire, align 8
187+
ret <2 x float> %ret
188+
}
189+
149190
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
150191
; CHECK3-LABEL: atomic_vec1_ptr:
151192
; CHECK3: ## %bb.0:

0 commit comments

Comments
 (0)