Skip to content

Commit c8fe66e

Browse files
committed
[SelectionDAG] Widen <2 x T> vector types for atomic load
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1
1 parent 63a3178 commit c8fe66e

File tree

3 files changed

+153
-23
lines changed

3 files changed

+153
-23
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10621062
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
10631063
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
10641064
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
1065+
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
10651066
SDValue WidenVecRes_LOAD(SDNode* N);
10661067
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
10671068
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
46254625
break;
46264626
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
46274627
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
4628+
case ISD::ATOMIC_LOAD:
4629+
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
4630+
break;
46284631
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
46294632
case ISD::STEP_VECTOR:
46304633
case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
60146017
N->getOperand(1), N->getOperand(2));
60156018
}
60166019

6020+
/// Either return the same load or provide appropriate casts
6021+
/// from the load and return that.
6022+
static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
6023+
TypeSize LdWidth, TypeSize FirstVTWidth,
6024+
SDLoc dl, SelectionDAG &DAG) {
6025+
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
6026+
TypeSize WidenWidth = WidenVT.getSizeInBits();
6027+
if (!FirstVT.isVector()) {
6028+
unsigned NumElts =
6029+
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
6030+
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
6031+
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
6032+
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
6033+
}
6034+
assert(FirstVT == WidenVT);
6035+
return LdOp;
6036+
}
6037+
6038+
static std::optional<EVT> findMemType(SelectionDAG &DAG,
6039+
const TargetLowering &TLI, unsigned Width,
6040+
EVT WidenVT, unsigned Align,
6041+
unsigned WidenEx);
6042+
6043+
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
6044+
EVT WidenVT =
6045+
TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
6046+
EVT LdVT = LD->getMemoryVT();
6047+
SDLoc dl(LD);
6048+
assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
6049+
assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
6050+
"Must be scalable");
6051+
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
6052+
"Expected equivalent element types");
6053+
6054+
// Load information
6055+
SDValue Chain = LD->getChain();
6056+
SDValue BasePtr = LD->getBasePtr();
6057+
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
6058+
AAMDNodes AAInfo = LD->getAAInfo();
6059+
6060+
TypeSize LdWidth = LdVT.getSizeInBits();
6061+
TypeSize WidenWidth = WidenVT.getSizeInBits();
6062+
TypeSize WidthDiff = WidenWidth - LdWidth;
6063+
6064+
// Find the vector type that can load from.
6065+
std::optional<EVT> FirstVT =
6066+
findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
6067+
WidthDiff.getKnownMinValue());
6068+
6069+
if (!FirstVT)
6070+
return SDValue();
6071+
6072+
SmallVector<EVT, 8> MemVTs;
6073+
TypeSize FirstVTWidth = FirstVT->getSizeInBits();
6074+
6075+
SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
6076+
Chain, BasePtr, LD->getMemOperand());
6077+
6078+
// Load the element with one instruction.
6079+
SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
6080+
FirstVTWidth, dl, DAG);
6081+
6082+
// Modified the chain - switch anything that used the old chain to use
6083+
// the new one.
6084+
ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
6085+
return Result;
6086+
}
6087+
60176088
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
60186089
LoadSDNode *LD = cast<LoadSDNode>(N);
60196090
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7896,29 +7967,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
78967967
LdChain.push_back(LdOp.getValue(1));
78977968

78987969
// Check if we can load the element with one instruction.
7899-
if (MemVTs.empty()) {
7900-
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
7901-
if (!FirstVT->isVector()) {
7902-
unsigned NumElts =
7903-
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
7904-
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
7905-
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
7906-
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
7907-
}
7908-
if (FirstVT == WidenVT)
7909-
return LdOp;
7910-
7911-
// TODO: We don't currently have any tests that exercise this code path.
7912-
assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
7913-
unsigned NumConcat =
7914-
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
7915-
SmallVector<SDValue, 16> ConcatOps(NumConcat);
7916-
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
7917-
ConcatOps[0] = LdOp;
7918-
for (unsigned i = 1; i != NumConcat; ++i)
7919-
ConcatOps[i] = UndefVal;
7920-
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
7921-
}
7970+
if (MemVTs.empty())
7971+
return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
7972+
DAG);
79227973

79237974
// Load vector by using multiple loads from largest vector to scalar.
79247975
SmallVector<SDValue, 16> LdOps;

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,64 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146
ret <1 x i64> %ret
147147
}
148148

149+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
150+
; CHECK3-LABEL: atomic_vec2_i8:
151+
; CHECK3: ## %bb.0:
152+
; CHECK3-NEXT: movzwl (%rdi), %eax
153+
; CHECK3-NEXT: movd %eax, %xmm0
154+
; CHECK3-NEXT: retq
155+
;
156+
; CHECK0-LABEL: atomic_vec2_i8:
157+
; CHECK0: ## %bb.0:
158+
; CHECK0-NEXT: movw (%rdi), %cx
159+
; CHECK0-NEXT: ## implicit-def: $eax
160+
; CHECK0-NEXT: movw %cx, %ax
161+
; CHECK0-NEXT: movd %eax, %xmm0
162+
; CHECK0-NEXT: retq
163+
%ret = load atomic <2 x i8>, ptr %x acquire, align 4
164+
ret <2 x i8> %ret
165+
}
166+
167+
define <2 x i16> @atomic_vec2_i16(ptr %x) {
168+
; CHECK-LABEL: atomic_vec2_i16:
169+
; CHECK: ## %bb.0:
170+
; CHECK-NEXT: movl (%rdi), %eax
171+
; CHECK-NEXT: movd %eax, %xmm0
172+
; CHECK-NEXT: retq
173+
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
174+
ret <2 x i16> %ret
175+
}
176+
177+
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
178+
; CHECK-LABEL: atomic_vec2_ptr270:
179+
; CHECK: ## %bb.0:
180+
; CHECK-NEXT: movq (%rdi), %rax
181+
; CHECK-NEXT: movq %rax, %xmm0
182+
; CHECK-NEXT: retq
183+
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
184+
ret <2 x ptr addrspace(270)> %ret
185+
}
186+
187+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
188+
; CHECK-LABEL: atomic_vec2_i32_align:
189+
; CHECK: ## %bb.0:
190+
; CHECK-NEXT: movq (%rdi), %rax
191+
; CHECK-NEXT: movq %rax, %xmm0
192+
; CHECK-NEXT: retq
193+
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
194+
ret <2 x i32> %ret
195+
}
196+
197+
define <2 x float> @atomic_vec2_float_align(ptr %x) {
198+
; CHECK-LABEL: atomic_vec2_float_align:
199+
; CHECK: ## %bb.0:
200+
; CHECK-NEXT: movq (%rdi), %rax
201+
; CHECK-NEXT: movq %rax, %xmm0
202+
; CHECK-NEXT: retq
203+
%ret = load atomic <2 x float>, ptr %x acquire, align 8
204+
ret <2 x float> %ret
205+
}
206+
149207
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
150208
; CHECK3-LABEL: atomic_vec1_ptr:
151209
; CHECK3: ## %bb.0:
@@ -295,6 +353,26 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
295353
ret <2 x i32> %ret
296354
}
297355

356+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
357+
; CHECK-LABEL: atomic_vec4_i8:
358+
; CHECK: ## %bb.0:
359+
; CHECK-NEXT: movl (%rdi), %eax
360+
; CHECK-NEXT: movd %eax, %xmm0
361+
; CHECK-NEXT: retq
362+
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
363+
ret <4 x i8> %ret
364+
}
365+
366+
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
367+
; CHECK-LABEL: atomic_vec4_i16:
368+
; CHECK: ## %bb.0:
369+
; CHECK-NEXT: movq (%rdi), %rax
370+
; CHECK-NEXT: movq %rax, %xmm0
371+
; CHECK-NEXT: retq
372+
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
373+
ret <4 x i16> %ret
374+
}
375+
298376
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
299377
; CHECK-LABEL: atomic_vec4_float_align:
300378
; CHECK: ## %bb.0:

0 commit comments

Comments
 (0)