@@ -207,46 +207,43 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
207
207
; CHECK-O3-LABEL: atomic_vec1_bfloat:
208
208
; CHECK-O3: # %bb.0:
209
209
; CHECK-O3-NEXT: movzwl (%rdi), %eax
210
- ; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
210
+ ; CHECK-O3-NEXT: movd %eax, %xmm0
211
211
; CHECK-O3-NEXT: retq
212
212
;
213
213
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
214
214
; CHECK-SSE-O3: # %bb.0:
215
215
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
216
- ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
216
+ ; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
217
217
; CHECK-SSE-O3-NEXT: retq
218
218
;
219
219
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
220
220
; CHECK-AVX-O3: # %bb.0:
221
221
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
222
- ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0 , %xmm0
222
+ ; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
223
223
; CHECK-AVX-O3-NEXT: retq
224
224
;
225
225
; CHECK-O0-LABEL: atomic_vec1_bfloat:
226
226
; CHECK-O0: # %bb.0:
227
227
; CHECK-O0-NEXT: movw (%rdi), %cx
228
228
; CHECK-O0-NEXT: # implicit-def: $eax
229
229
; CHECK-O0-NEXT: movw %cx, %ax
230
- ; CHECK-O0-NEXT: # implicit-def: $xmm0
231
- ; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
230
+ ; CHECK-O0-NEXT: movd %eax, %xmm0
232
231
; CHECK-O0-NEXT: retq
233
232
;
234
233
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
235
234
; CHECK-SSE-O0: # %bb.0:
236
235
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
237
236
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
238
237
; CHECK-SSE-O0-NEXT: movw %cx, %ax
239
- ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
240
- ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
238
+ ; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
241
239
; CHECK-SSE-O0-NEXT: retq
242
240
;
243
241
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
244
242
; CHECK-AVX-O0: # %bb.0:
245
243
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
246
244
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
247
245
; CHECK-AVX-O0-NEXT: movw %cx, %ax
248
- ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
249
- ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
246
+ ; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
250
247
; CHECK-AVX-O0-NEXT: retq
251
248
%ret = load atomic <1 x bfloat>, ptr %x acquire , align 2
252
249
ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
377
374
ret <2 x float > %ret
378
375
}
379
376
377
+ define <2 x half > @atomic_vec2_half (ptr %x ) {
378
+ ; CHECK-O3-LABEL: atomic_vec2_half:
379
+ ; CHECK-O3: # %bb.0:
380
+ ; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
381
+ ; CHECK-O3-NEXT: retq
382
+ ;
383
+ ; CHECK-SSE-O3-LABEL: atomic_vec2_half:
384
+ ; CHECK-SSE-O3: # %bb.0:
385
+ ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386
+ ; CHECK-SSE-O3-NEXT: retq
387
+ ;
388
+ ; CHECK-AVX-O3-LABEL: atomic_vec2_half:
389
+ ; CHECK-AVX-O3: # %bb.0:
390
+ ; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
391
+ ; CHECK-AVX-O3-NEXT: retq
392
+ ;
393
+ ; CHECK-O0-LABEL: atomic_vec2_half:
394
+ ; CHECK-O0: # %bb.0:
395
+ ; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
396
+ ; CHECK-O0-NEXT: retq
397
+ ;
398
+ ; CHECK-SSE-O0-LABEL: atomic_vec2_half:
399
+ ; CHECK-SSE-O0: # %bb.0:
400
+ ; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
401
+ ; CHECK-SSE-O0-NEXT: retq
402
+ ;
403
+ ; CHECK-AVX-O0-LABEL: atomic_vec2_half:
404
+ ; CHECK-AVX-O0: # %bb.0:
405
+ ; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
406
+ ; CHECK-AVX-O0-NEXT: retq
407
+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
408
+ ret <2 x half > %ret
409
+ }
410
+
411
+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
412
+ ; CHECK-O3-LABEL: atomic_vec2_bfloat:
413
+ ; CHECK-O3: # %bb.0:
414
+ ; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
415
+ ; CHECK-O3-NEXT: retq
416
+ ;
417
+ ; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
418
+ ; CHECK-SSE-O3: # %bb.0:
419
+ ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
420
+ ; CHECK-SSE-O3-NEXT: retq
421
+ ;
422
+ ; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
423
+ ; CHECK-AVX-O3: # %bb.0:
424
+ ; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
425
+ ; CHECK-AVX-O3-NEXT: retq
426
+ ;
427
+ ; CHECK-O0-LABEL: atomic_vec2_bfloat:
428
+ ; CHECK-O0: # %bb.0:
429
+ ; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
430
+ ; CHECK-O0-NEXT: retq
431
+ ;
432
+ ; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
433
+ ; CHECK-SSE-O0: # %bb.0:
434
+ ; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
435
+ ; CHECK-SSE-O0-NEXT: retq
436
+ ;
437
+ ; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
438
+ ; CHECK-AVX-O0: # %bb.0:
439
+ ; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
440
+ ; CHECK-AVX-O0-NEXT: retq
441
+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
442
+ ret <2 x bfloat> %ret
443
+ }
444
+
380
445
define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
381
446
; CHECK-O3-LABEL: atomic_vec1_ptr:
382
447
; CHECK-O3: # %bb.0:
@@ -457,46 +522,43 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
457
522
; CHECK-O3-LABEL: atomic_vec1_half:
458
523
; CHECK-O3: # %bb.0:
459
524
; CHECK-O3-NEXT: movzwl (%rdi), %eax
460
- ; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
525
+ ; CHECK-O3-NEXT: movd %eax, %xmm0
461
526
; CHECK-O3-NEXT: retq
462
527
;
463
528
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
464
529
; CHECK-SSE-O3: # %bb.0:
465
530
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
466
- ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
531
+ ; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
467
532
; CHECK-SSE-O3-NEXT: retq
468
533
;
469
534
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
470
535
; CHECK-AVX-O3: # %bb.0:
471
536
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
472
- ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0 , %xmm0
537
+ ; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
473
538
; CHECK-AVX-O3-NEXT: retq
474
539
;
475
540
; CHECK-O0-LABEL: atomic_vec1_half:
476
541
; CHECK-O0: # %bb.0:
477
542
; CHECK-O0-NEXT: movw (%rdi), %cx
478
543
; CHECK-O0-NEXT: # implicit-def: $eax
479
544
; CHECK-O0-NEXT: movw %cx, %ax
480
- ; CHECK-O0-NEXT: # implicit-def: $xmm0
481
- ; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
545
+ ; CHECK-O0-NEXT: movd %eax, %xmm0
482
546
; CHECK-O0-NEXT: retq
483
547
;
484
548
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
485
549
; CHECK-SSE-O0: # %bb.0:
486
550
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
487
551
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
488
552
; CHECK-SSE-O0-NEXT: movw %cx, %ax
489
- ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
490
- ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
553
+ ; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
491
554
; CHECK-SSE-O0-NEXT: retq
492
555
;
493
556
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
494
557
; CHECK-AVX-O0: # %bb.0:
495
558
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
496
559
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
497
560
; CHECK-AVX-O0-NEXT: movw %cx, %ax
498
- ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
499
- ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
561
+ ; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
500
562
; CHECK-AVX-O0-NEXT: retq
501
563
%ret = load atomic <1 x half >, ptr %x acquire , align 2
502
564
ret <1 x half > %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
841
903
ret <4 x i16 > %ret
842
904
}
843
905
906
+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
907
+ ; CHECK-LABEL: atomic_vec4_half:
908
+ ; CHECK: # %bb.0:
909
+ ; CHECK-NEXT: movq (%rdi), %xmm0
910
+ ; CHECK-NEXT: retq
911
+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
912
+ ret <4 x half > %ret
913
+ }
914
+
915
+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
916
+ ; CHECK-LABEL: atomic_vec4_bfloat:
917
+ ; CHECK: # %bb.0:
918
+ ; CHECK-NEXT: movq (%rdi), %xmm0
919
+ ; CHECK-NEXT: retq
920
+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
921
+ ret <4 x bfloat> %ret
922
+ }
923
+
924
+ define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
925
+ ; CHECK-O3-LABEL: atomic_vec4_float_align:
926
+ ; CHECK-O3: # %bb.0:
927
+ ; CHECK-O3-NEXT: pushq %rax
928
+ ; CHECK-O3-NEXT: movl $2, %esi
929
+ ; CHECK-O3-NEXT: callq __atomic_load_16@PLT
930
+ ; CHECK-O3-NEXT: movq %rdx, %xmm1
931
+ ; CHECK-O3-NEXT: movq %rax, %xmm0
932
+ ; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
933
+ ; CHECK-O3-NEXT: popq %rax
934
+ ; CHECK-O3-NEXT: retq
935
+ ;
936
+ ; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
937
+ ; CHECK-SSE-O3: # %bb.0:
938
+ ; CHECK-SSE-O3-NEXT: pushq %rbx
939
+ ; CHECK-SSE-O3-NEXT: xorl %eax, %eax
940
+ ; CHECK-SSE-O3-NEXT: xorl %edx, %edx
941
+ ; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
942
+ ; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
943
+ ; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
944
+ ; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
945
+ ; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
946
+ ; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
947
+ ; CHECK-SSE-O3-NEXT: popq %rbx
948
+ ; CHECK-SSE-O3-NEXT: retq
949
+ ;
950
+ ; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
951
+ ; CHECK-AVX-O3: # %bb.0:
952
+ ; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
953
+ ; CHECK-AVX-O3-NEXT: retq
954
+ ;
955
+ ; CHECK-O0-LABEL: atomic_vec4_float_align:
956
+ ; CHECK-O0: # %bb.0:
957
+ ; CHECK-O0-NEXT: pushq %rax
958
+ ; CHECK-O0-NEXT: movl $2, %esi
959
+ ; CHECK-O0-NEXT: callq __atomic_load_16@PLT
960
+ ; CHECK-O0-NEXT: movq %rdx, %xmm1
961
+ ; CHECK-O0-NEXT: movq %rax, %xmm0
962
+ ; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
963
+ ; CHECK-O0-NEXT: popq %rax
964
+ ; CHECK-O0-NEXT: retq
965
+ ;
966
+ ; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
967
+ ; CHECK-SSE-O0: # %bb.0:
968
+ ; CHECK-SSE-O0-NEXT: pushq %rbx
969
+ ; CHECK-SSE-O0-NEXT: xorl %eax, %eax
970
+ ; CHECK-SSE-O0-NEXT: movl %eax, %ebx
971
+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rax
972
+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
973
+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
974
+ ; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
975
+ ; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
976
+ ; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
977
+ ; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
978
+ ; CHECK-SSE-O0-NEXT: popq %rbx
979
+ ; CHECK-SSE-O0-NEXT: retq
980
+ ;
981
+ ; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
982
+ ; CHECK-AVX-O0: # %bb.0:
983
+ ; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
984
+ ; CHECK-AVX-O0-NEXT: retq
985
+ %ret = load atomic <4 x float >, ptr %x acquire , align 16
986
+ ret <4 x float > %ret
987
+ }
988
+
844
989
define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
845
990
; CHECK-O3-LABEL: atomic_vec4_float:
846
991
; CHECK-O3: # %bb.0:
0 commit comments