Skip to content

Commit 40b0a4e

Browse files
committed
[SelectionDAG] Split vector types for atomic load
Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
1 parent 539584c commit 40b0a4e

File tree

3 files changed

+209
-0
lines changed

3 files changed

+209
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
960960
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
961961
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
962962
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
963+
void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
963964
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
964965
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
965966
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11721172
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
11731173
break;
11741174
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
1175+
case ISD::ATOMIC_LOAD:
1176+
SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
1177+
break;
11751178
case ISD::LOAD:
11761179
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
11771180
break;
@@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
14211424
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
14221425
}
14231426

1427+
void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
1428+
SDValue &Hi) {
1429+
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
1430+
"Extended load during type legalization!");
1431+
SDLoc dl(LD);
1432+
EVT VT = LD->getValueType(0);
1433+
EVT LoVT, HiVT;
1434+
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1435+
1436+
SDValue Ch = LD->getChain();
1437+
SDValue Ptr = LD->getBasePtr();
1438+
1439+
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
1440+
EVT MemIntVT =
1441+
EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
1442+
SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch,
1443+
Ptr, LD->getMemOperand());
1444+
1445+
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
1446+
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
1447+
SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD);
1448+
SDValue ExtractHi =
1449+
DAG.getNode(ISD::SRL, dl, IntVT, ALD,
1450+
DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl));
1451+
ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi);
1452+
1453+
Lo = DAG.getBitcast(LoVT, ExtractLo);
1454+
Hi = DAG.getBitcast(HiVT, ExtractHi);
1455+
1456+
// Legalize the chain result - switch anything that used the old chain to
1457+
// use the new one.
1458+
ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
1459+
}
1460+
14241461
void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
14251462
MachinePointerInfo &MPI, SDValue &Ptr,
14261463
uint64_t *ScaledOffset) {

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205
ret <2 x float> %ret
206206
}
207207

208+
define <2 x half> @atomic_vec2_half(ptr %x) {
209+
; CHECK3-LABEL: atomic_vec2_half:
210+
; CHECK3: ## %bb.0:
211+
; CHECK3-NEXT: movl (%rdi), %eax
212+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
213+
; CHECK3-NEXT: shrl $16, %eax
214+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
215+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
216+
; CHECK3-NEXT: retq
217+
;
218+
; CHECK0-LABEL: atomic_vec2_half:
219+
; CHECK0: ## %bb.0:
220+
; CHECK0-NEXT: movl (%rdi), %eax
221+
; CHECK0-NEXT: movl %eax, %ecx
222+
; CHECK0-NEXT: shrl $16, %ecx
223+
; CHECK0-NEXT: movw %cx, %dx
224+
; CHECK0-NEXT: ## implicit-def: $ecx
225+
; CHECK0-NEXT: movw %dx, %cx
226+
; CHECK0-NEXT: ## implicit-def: $xmm1
227+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
228+
; CHECK0-NEXT: movw %ax, %cx
229+
; CHECK0-NEXT: ## implicit-def: $eax
230+
; CHECK0-NEXT: movw %cx, %ax
231+
; CHECK0-NEXT: ## implicit-def: $xmm0
232+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
233+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
234+
; CHECK0-NEXT: retq
235+
%ret = load atomic <2 x half>, ptr %x acquire, align 4
236+
ret <2 x half> %ret
237+
}
238+
239+
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
240+
; CHECK3-LABEL: atomic_vec2_bfloat:
241+
; CHECK3: ## %bb.0:
242+
; CHECK3-NEXT: movl (%rdi), %eax
243+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
244+
; CHECK3-NEXT: shrl $16, %eax
245+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
246+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
247+
; CHECK3-NEXT: retq
248+
;
249+
; CHECK0-LABEL: atomic_vec2_bfloat:
250+
; CHECK0: ## %bb.0:
251+
; CHECK0-NEXT: movl (%rdi), %eax
252+
; CHECK0-NEXT: movl %eax, %ecx
253+
; CHECK0-NEXT: shrl $16, %ecx
254+
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
255+
; CHECK0-NEXT: movw %ax, %dx
256+
; CHECK0-NEXT: ## implicit-def: $eax
257+
; CHECK0-NEXT: movw %dx, %ax
258+
; CHECK0-NEXT: ## implicit-def: $xmm0
259+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
260+
; CHECK0-NEXT: ## implicit-def: $eax
261+
; CHECK0-NEXT: movw %cx, %ax
262+
; CHECK0-NEXT: ## implicit-def: $xmm1
263+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
264+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
265+
; CHECK0-NEXT: retq
266+
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
267+
ret <2 x bfloat> %ret
268+
}
269+
208270
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
209271
; CHECK3-LABEL: atomic_vec1_ptr:
210272
; CHECK3: ## %bb.0:
@@ -377,6 +439,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
377439
ret <4 x i16> %ret
378440
}
379441

442+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
443+
; CHECK3-LABEL: atomic_vec4_half:
444+
; CHECK3: ## %bb.0:
445+
; CHECK3-NEXT: movq (%rdi), %rax
446+
; CHECK3-NEXT: movl %eax, %ecx
447+
; CHECK3-NEXT: shrl $16, %ecx
448+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
449+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
450+
; CHECK3-NEXT: movq %rax, %rcx
451+
; CHECK3-NEXT: shrq $32, %rcx
452+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
453+
; CHECK3-NEXT: shrq $48, %rax
454+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
455+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
456+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
457+
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
458+
; CHECK3-NEXT: retq
459+
;
460+
; CHECK0-LABEL: atomic_vec4_half:
461+
; CHECK0: ## %bb.0:
462+
; CHECK0-NEXT: movq (%rdi), %rax
463+
; CHECK0-NEXT: movl %eax, %ecx
464+
; CHECK0-NEXT: shrl $16, %ecx
465+
; CHECK0-NEXT: movw %cx, %dx
466+
; CHECK0-NEXT: ## implicit-def: $ecx
467+
; CHECK0-NEXT: movw %dx, %cx
468+
; CHECK0-NEXT: ## implicit-def: $xmm2
469+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
470+
; CHECK0-NEXT: movw %ax, %dx
471+
; CHECK0-NEXT: ## implicit-def: $ecx
472+
; CHECK0-NEXT: movw %dx, %cx
473+
; CHECK0-NEXT: ## implicit-def: $xmm0
474+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
475+
; CHECK0-NEXT: movq %rax, %rcx
476+
; CHECK0-NEXT: shrq $32, %rcx
477+
; CHECK0-NEXT: movw %cx, %dx
478+
; CHECK0-NEXT: ## implicit-def: $ecx
479+
; CHECK0-NEXT: movw %dx, %cx
480+
; CHECK0-NEXT: ## implicit-def: $xmm1
481+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
482+
; CHECK0-NEXT: shrq $48, %rax
483+
; CHECK0-NEXT: movw %ax, %cx
484+
; CHECK0-NEXT: ## implicit-def: $eax
485+
; CHECK0-NEXT: movw %cx, %ax
486+
; CHECK0-NEXT: ## implicit-def: $xmm3
487+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
488+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
489+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
490+
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
491+
; CHECK0-NEXT: retq
492+
%ret = load atomic <4 x half>, ptr %x acquire, align 8
493+
ret <4 x half> %ret
494+
}
495+
496+
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
497+
; CHECK3-LABEL: atomic_vec4_bfloat:
498+
; CHECK3: ## %bb.0:
499+
; CHECK3-NEXT: movq (%rdi), %rax
500+
; CHECK3-NEXT: movq %rax, %rcx
501+
; CHECK3-NEXT: movq %rax, %rdx
502+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
503+
; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
504+
; CHECK3-NEXT: shrl $16, %eax
505+
; CHECK3-NEXT: shrq $32, %rcx
506+
; CHECK3-NEXT: shrq $48, %rdx
507+
; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
508+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
509+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
510+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512+
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
513+
; CHECK3-NEXT: retq
514+
;
515+
; CHECK0-LABEL: atomic_vec4_bfloat:
516+
; CHECK0: ## %bb.0:
517+
; CHECK0-NEXT: movq (%rdi), %rax
518+
; CHECK0-NEXT: movl %eax, %ecx
519+
; CHECK0-NEXT: shrl $16, %ecx
520+
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
521+
; CHECK0-NEXT: movw %ax, %dx
522+
; CHECK0-NEXT: movq %rax, %rsi
523+
; CHECK0-NEXT: shrq $32, %rsi
524+
; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
525+
; CHECK0-NEXT: shrq $48, %rax
526+
; CHECK0-NEXT: movw %ax, %di
527+
; CHECK0-NEXT: ## implicit-def: $eax
528+
; CHECK0-NEXT: movw %di, %ax
529+
; CHECK0-NEXT: ## implicit-def: $xmm0
530+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
531+
; CHECK0-NEXT: ## implicit-def: $eax
532+
; CHECK0-NEXT: movw %si, %ax
533+
; CHECK0-NEXT: ## implicit-def: $xmm1
534+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
535+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
536+
; CHECK0-NEXT: ## implicit-def: $eax
537+
; CHECK0-NEXT: movw %dx, %ax
538+
; CHECK0-NEXT: ## implicit-def: $xmm0
539+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
540+
; CHECK0-NEXT: ## implicit-def: $eax
541+
; CHECK0-NEXT: movw %cx, %ax
542+
; CHECK0-NEXT: ## implicit-def: $xmm2
543+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
544+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
545+
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546+
; CHECK0-NEXT: retq
547+
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
548+
ret <4 x bfloat> %ret
549+
}
550+
380551
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
381552
; CHECK-LABEL: atomic_vec4_float_align:
382553
; CHECK: ## %bb.0:

0 commit comments

Comments
 (0)