Skip to content

Commit d074cd3

Browse files
committed
[X86] Cast atomic vectors in IR to support floats
This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX. commit-id:80b9b6a7
1 parent 21475ae commit d074cd3

File tree

3 files changed

+172
-18
lines changed

3 files changed

+172
-18
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3207032070
}
3207132071
}
3207232072

32073+
TargetLowering::AtomicExpansionKind
32074+
X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
32075+
if (LI->getType()->getScalarType()->isFloatingPointTy())
32076+
return AtomicExpansionKind::CastToInteger;
32077+
return AtomicExpansionKind::None;
32078+
}
32079+
3207332080
LoadInst *
3207432081
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
3207532082
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,6 +1839,8 @@ namespace llvm {
18391839
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
18401840
TargetLoweringBase::AtomicExpansionKind
18411841
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1842+
TargetLoweringBase::AtomicExpansionKind
1843+
shouldCastAtomicLoadInIR(LoadInst *LI) const override;
18421844
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
18431845
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
18441846

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 163 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,46 +207,43 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
207207
; CHECK-O3-LABEL: atomic_vec1_bfloat:
208208
; CHECK-O3: # %bb.0:
209209
; CHECK-O3-NEXT: movzwl (%rdi), %eax
210-
; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
210+
; CHECK-O3-NEXT: movd %eax, %xmm0
211211
; CHECK-O3-NEXT: retq
212212
;
213213
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
214214
; CHECK-SSE-O3: # %bb.0:
215215
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
216-
; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
216+
; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
217217
; CHECK-SSE-O3-NEXT: retq
218218
;
219219
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
220220
; CHECK-AVX-O3: # %bb.0:
221221
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
222-
; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
222+
; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
223223
; CHECK-AVX-O3-NEXT: retq
224224
;
225225
; CHECK-O0-LABEL: atomic_vec1_bfloat:
226226
; CHECK-O0: # %bb.0:
227227
; CHECK-O0-NEXT: movw (%rdi), %cx
228228
; CHECK-O0-NEXT: # implicit-def: $eax
229229
; CHECK-O0-NEXT: movw %cx, %ax
230-
; CHECK-O0-NEXT: # implicit-def: $xmm0
231-
; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
230+
; CHECK-O0-NEXT: movd %eax, %xmm0
232231
; CHECK-O0-NEXT: retq
233232
;
234233
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
235234
; CHECK-SSE-O0: # %bb.0:
236235
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
237236
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
238237
; CHECK-SSE-O0-NEXT: movw %cx, %ax
239-
; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
240-
; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
238+
; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
241239
; CHECK-SSE-O0-NEXT: retq
242240
;
243241
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
244242
; CHECK-AVX-O0: # %bb.0:
245243
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
246244
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
247245
; CHECK-AVX-O0-NEXT: movw %cx, %ax
248-
; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
249-
; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
246+
; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
250247
; CHECK-AVX-O0-NEXT: retq
251248
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
252249
ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
377374
ret <2 x float> %ret
378375
}
379376

377+
define <2 x half> @atomic_vec2_half(ptr %x) {
378+
; CHECK-O3-LABEL: atomic_vec2_half:
379+
; CHECK-O3: # %bb.0:
380+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
381+
; CHECK-O3-NEXT: retq
382+
;
383+
; CHECK-SSE-O3-LABEL: atomic_vec2_half:
384+
; CHECK-SSE-O3: # %bb.0:
385+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386+
; CHECK-SSE-O3-NEXT: retq
387+
;
388+
; CHECK-AVX-O3-LABEL: atomic_vec2_half:
389+
; CHECK-AVX-O3: # %bb.0:
390+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
391+
; CHECK-AVX-O3-NEXT: retq
392+
;
393+
; CHECK-O0-LABEL: atomic_vec2_half:
394+
; CHECK-O0: # %bb.0:
395+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
396+
; CHECK-O0-NEXT: retq
397+
;
398+
; CHECK-SSE-O0-LABEL: atomic_vec2_half:
399+
; CHECK-SSE-O0: # %bb.0:
400+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
401+
; CHECK-SSE-O0-NEXT: retq
402+
;
403+
; CHECK-AVX-O0-LABEL: atomic_vec2_half:
404+
; CHECK-AVX-O0: # %bb.0:
405+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
406+
; CHECK-AVX-O0-NEXT: retq
407+
%ret = load atomic <2 x half>, ptr %x acquire, align 4
408+
ret <2 x half> %ret
409+
}
410+
411+
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
412+
; CHECK-O3-LABEL: atomic_vec2_bfloat:
413+
; CHECK-O3: # %bb.0:
414+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
415+
; CHECK-O3-NEXT: retq
416+
;
417+
; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
418+
; CHECK-SSE-O3: # %bb.0:
419+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
420+
; CHECK-SSE-O3-NEXT: retq
421+
;
422+
; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
423+
; CHECK-AVX-O3: # %bb.0:
424+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
425+
; CHECK-AVX-O3-NEXT: retq
426+
;
427+
; CHECK-O0-LABEL: atomic_vec2_bfloat:
428+
; CHECK-O0: # %bb.0:
429+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
430+
; CHECK-O0-NEXT: retq
431+
;
432+
; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
433+
; CHECK-SSE-O0: # %bb.0:
434+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
435+
; CHECK-SSE-O0-NEXT: retq
436+
;
437+
; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
438+
; CHECK-AVX-O0: # %bb.0:
439+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
440+
; CHECK-AVX-O0-NEXT: retq
441+
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
442+
ret <2 x bfloat> %ret
443+
}
444+
380445
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
381446
; CHECK-O3-LABEL: atomic_vec1_ptr:
382447
; CHECK-O3: # %bb.0:
@@ -457,46 +522,43 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
457522
; CHECK-O3-LABEL: atomic_vec1_half:
458523
; CHECK-O3: # %bb.0:
459524
; CHECK-O3-NEXT: movzwl (%rdi), %eax
460-
; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
525+
; CHECK-O3-NEXT: movd %eax, %xmm0
461526
; CHECK-O3-NEXT: retq
462527
;
463528
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
464529
; CHECK-SSE-O3: # %bb.0:
465530
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
466-
; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
531+
; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
467532
; CHECK-SSE-O3-NEXT: retq
468533
;
469534
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
470535
; CHECK-AVX-O3: # %bb.0:
471536
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
472-
; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
537+
; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
473538
; CHECK-AVX-O3-NEXT: retq
474539
;
475540
; CHECK-O0-LABEL: atomic_vec1_half:
476541
; CHECK-O0: # %bb.0:
477542
; CHECK-O0-NEXT: movw (%rdi), %cx
478543
; CHECK-O0-NEXT: # implicit-def: $eax
479544
; CHECK-O0-NEXT: movw %cx, %ax
480-
; CHECK-O0-NEXT: # implicit-def: $xmm0
481-
; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
545+
; CHECK-O0-NEXT: movd %eax, %xmm0
482546
; CHECK-O0-NEXT: retq
483547
;
484548
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
485549
; CHECK-SSE-O0: # %bb.0:
486550
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
487551
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
488552
; CHECK-SSE-O0-NEXT: movw %cx, %ax
489-
; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
490-
; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
553+
; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
491554
; CHECK-SSE-O0-NEXT: retq
492555
;
493556
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
494557
; CHECK-AVX-O0: # %bb.0:
495558
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
496559
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
497560
; CHECK-AVX-O0-NEXT: movw %cx, %ax
498-
; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
499-
; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
561+
; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
500562
; CHECK-AVX-O0-NEXT: retq
501563
%ret = load atomic <1 x half>, ptr %x acquire, align 2
502564
ret <1 x half> %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
841903
ret <4 x i16> %ret
842904
}
843905

906+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
907+
; CHECK-LABEL: atomic_vec4_half:
908+
; CHECK: # %bb.0:
909+
; CHECK-NEXT: movq (%rdi), %xmm0
910+
; CHECK-NEXT: retq
911+
%ret = load atomic <4 x half>, ptr %x acquire, align 8
912+
ret <4 x half> %ret
913+
}
914+
915+
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
916+
; CHECK-LABEL: atomic_vec4_bfloat:
917+
; CHECK: # %bb.0:
918+
; CHECK-NEXT: movq (%rdi), %xmm0
919+
; CHECK-NEXT: retq
920+
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
921+
ret <4 x bfloat> %ret
922+
}
923+
924+
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
925+
; CHECK-O3-LABEL: atomic_vec4_float_align:
926+
; CHECK-O3: # %bb.0:
927+
; CHECK-O3-NEXT: pushq %rax
928+
; CHECK-O3-NEXT: movl $2, %esi
929+
; CHECK-O3-NEXT: callq __atomic_load_16@PLT
930+
; CHECK-O3-NEXT: movq %rdx, %xmm1
931+
; CHECK-O3-NEXT: movq %rax, %xmm0
932+
; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
933+
; CHECK-O3-NEXT: popq %rax
934+
; CHECK-O3-NEXT: retq
935+
;
936+
; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
937+
; CHECK-SSE-O3: # %bb.0:
938+
; CHECK-SSE-O3-NEXT: pushq %rbx
939+
; CHECK-SSE-O3-NEXT: xorl %eax, %eax
940+
; CHECK-SSE-O3-NEXT: xorl %edx, %edx
941+
; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
942+
; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
943+
; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
944+
; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
945+
; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
946+
; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
947+
; CHECK-SSE-O3-NEXT: popq %rbx
948+
; CHECK-SSE-O3-NEXT: retq
949+
;
950+
; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
951+
; CHECK-AVX-O3: # %bb.0:
952+
; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
953+
; CHECK-AVX-O3-NEXT: retq
954+
;
955+
; CHECK-O0-LABEL: atomic_vec4_float_align:
956+
; CHECK-O0: # %bb.0:
957+
; CHECK-O0-NEXT: pushq %rax
958+
; CHECK-O0-NEXT: movl $2, %esi
959+
; CHECK-O0-NEXT: callq __atomic_load_16@PLT
960+
; CHECK-O0-NEXT: movq %rdx, %xmm1
961+
; CHECK-O0-NEXT: movq %rax, %xmm0
962+
; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
963+
; CHECK-O0-NEXT: popq %rax
964+
; CHECK-O0-NEXT: retq
965+
;
966+
; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
967+
; CHECK-SSE-O0: # %bb.0:
968+
; CHECK-SSE-O0-NEXT: pushq %rbx
969+
; CHECK-SSE-O0-NEXT: xorl %eax, %eax
970+
; CHECK-SSE-O0-NEXT: movl %eax, %ebx
971+
; CHECK-SSE-O0-NEXT: movq %rbx, %rax
972+
; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
973+
; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
974+
; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
975+
; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
976+
; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
977+
; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
978+
; CHECK-SSE-O0-NEXT: popq %rbx
979+
; CHECK-SSE-O0-NEXT: retq
980+
;
981+
; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
982+
; CHECK-AVX-O0: # %bb.0:
983+
; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
984+
; CHECK-AVX-O0-NEXT: retq
985+
%ret = load atomic <4 x float>, ptr %x acquire, align 16
986+
ret <4 x float> %ret
987+
}
988+
844989
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
845990
; CHECK-O3-LABEL: atomic_vec4_float:
846991
; CHECK-O3: # %bb.0:

0 commit comments

Comments
 (0)