@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205
205
ret <2 x float > %ret
206
206
}
207
207
208
+ define <2 x half > @atomic_vec2_half (ptr %x ) {
209
+ ; CHECK3-LABEL: atomic_vec2_half:
210
+ ; CHECK3: ## %bb.0:
211
+ ; CHECK3-NEXT: movl (%rdi), %eax
212
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
213
+ ; CHECK3-NEXT: shrl $16, %eax
214
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
215
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
216
+ ; CHECK3-NEXT: retq
217
+ ;
218
+ ; CHECK0-LABEL: atomic_vec2_half:
219
+ ; CHECK0: ## %bb.0:
220
+ ; CHECK0-NEXT: movl (%rdi), %eax
221
+ ; CHECK0-NEXT: movl %eax, %ecx
222
+ ; CHECK0-NEXT: shrl $16, %ecx
223
+ ; CHECK0-NEXT: movw %cx, %dx
224
+ ; CHECK0-NEXT: ## implicit-def: $ecx
225
+ ; CHECK0-NEXT: movw %dx, %cx
226
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
227
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
228
+ ; CHECK0-NEXT: movw %ax, %cx
229
+ ; CHECK0-NEXT: ## implicit-def: $eax
230
+ ; CHECK0-NEXT: movw %cx, %ax
231
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
232
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
233
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
234
+ ; CHECK0-NEXT: retq
235
+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
236
+ ret <2 x half > %ret
237
+ }
238
+
239
+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
240
+ ; CHECK3-LABEL: atomic_vec2_bfloat:
241
+ ; CHECK3: ## %bb.0:
242
+ ; CHECK3-NEXT: movl (%rdi), %eax
243
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
244
+ ; CHECK3-NEXT: shrl $16, %eax
245
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
246
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
247
+ ; CHECK3-NEXT: retq
248
+ ;
249
+ ; CHECK0-LABEL: atomic_vec2_bfloat:
250
+ ; CHECK0: ## %bb.0:
251
+ ; CHECK0-NEXT: movl (%rdi), %eax
252
+ ; CHECK0-NEXT: movl %eax, %ecx
253
+ ; CHECK0-NEXT: shrl $16, %ecx
254
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
255
+ ; CHECK0-NEXT: movw %ax, %dx
256
+ ; CHECK0-NEXT: ## implicit-def: $eax
257
+ ; CHECK0-NEXT: movw %dx, %ax
258
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
259
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
260
+ ; CHECK0-NEXT: ## implicit-def: $eax
261
+ ; CHECK0-NEXT: movw %cx, %ax
262
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
263
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
264
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
265
+ ; CHECK0-NEXT: retq
266
+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
267
+ ret <2 x bfloat> %ret
268
+ }
269
+
208
270
define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
209
271
; CHECK3-LABEL: atomic_vec1_ptr:
210
272
; CHECK3: ## %bb.0:
@@ -377,6 +439,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
377
439
ret <4 x i16 > %ret
378
440
}
379
441
442
+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
443
+ ; CHECK3-LABEL: atomic_vec4_half:
444
+ ; CHECK3: ## %bb.0:
445
+ ; CHECK3-NEXT: movq (%rdi), %rax
446
+ ; CHECK3-NEXT: movl %eax, %ecx
447
+ ; CHECK3-NEXT: shrl $16, %ecx
448
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
449
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
450
+ ; CHECK3-NEXT: movq %rax, %rcx
451
+ ; CHECK3-NEXT: shrq $32, %rcx
452
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
453
+ ; CHECK3-NEXT: shrq $48, %rax
454
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
455
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
456
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
457
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
458
+ ; CHECK3-NEXT: retq
459
+ ;
460
+ ; CHECK0-LABEL: atomic_vec4_half:
461
+ ; CHECK0: ## %bb.0:
462
+ ; CHECK0-NEXT: movq (%rdi), %rax
463
+ ; CHECK0-NEXT: movl %eax, %ecx
464
+ ; CHECK0-NEXT: shrl $16, %ecx
465
+ ; CHECK0-NEXT: movw %cx, %dx
466
+ ; CHECK0-NEXT: ## implicit-def: $ecx
467
+ ; CHECK0-NEXT: movw %dx, %cx
468
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
469
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
470
+ ; CHECK0-NEXT: movw %ax, %dx
471
+ ; CHECK0-NEXT: ## implicit-def: $ecx
472
+ ; CHECK0-NEXT: movw %dx, %cx
473
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
474
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
475
+ ; CHECK0-NEXT: movq %rax, %rcx
476
+ ; CHECK0-NEXT: shrq $32, %rcx
477
+ ; CHECK0-NEXT: movw %cx, %dx
478
+ ; CHECK0-NEXT: ## implicit-def: $ecx
479
+ ; CHECK0-NEXT: movw %dx, %cx
480
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
481
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
482
+ ; CHECK0-NEXT: shrq $48, %rax
483
+ ; CHECK0-NEXT: movw %ax, %cx
484
+ ; CHECK0-NEXT: ## implicit-def: $eax
485
+ ; CHECK0-NEXT: movw %cx, %ax
486
+ ; CHECK0-NEXT: ## implicit-def: $xmm3
487
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
488
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
489
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
490
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
491
+ ; CHECK0-NEXT: retq
492
+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
493
+ ret <4 x half > %ret
494
+ }
495
+
496
+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
497
+ ; CHECK3-LABEL: atomic_vec4_bfloat:
498
+ ; CHECK3: ## %bb.0:
499
+ ; CHECK3-NEXT: movq (%rdi), %rax
500
+ ; CHECK3-NEXT: movq %rax, %rcx
501
+ ; CHECK3-NEXT: movq %rax, %rdx
502
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
503
+ ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
504
+ ; CHECK3-NEXT: shrl $16, %eax
505
+ ; CHECK3-NEXT: shrq $32, %rcx
506
+ ; CHECK3-NEXT: shrq $48, %rdx
507
+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
508
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
509
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
510
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
513
+ ; CHECK3-NEXT: retq
514
+ ;
515
+ ; CHECK0-LABEL: atomic_vec4_bfloat:
516
+ ; CHECK0: ## %bb.0:
517
+ ; CHECK0-NEXT: movq (%rdi), %rax
518
+ ; CHECK0-NEXT: movl %eax, %ecx
519
+ ; CHECK0-NEXT: shrl $16, %ecx
520
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
521
+ ; CHECK0-NEXT: movw %ax, %dx
522
+ ; CHECK0-NEXT: movq %rax, %rsi
523
+ ; CHECK0-NEXT: shrq $32, %rsi
524
+ ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
525
+ ; CHECK0-NEXT: shrq $48, %rax
526
+ ; CHECK0-NEXT: movw %ax, %di
527
+ ; CHECK0-NEXT: ## implicit-def: $eax
528
+ ; CHECK0-NEXT: movw %di, %ax
529
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
530
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
531
+ ; CHECK0-NEXT: ## implicit-def: $eax
532
+ ; CHECK0-NEXT: movw %si, %ax
533
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
534
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
535
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
536
+ ; CHECK0-NEXT: ## implicit-def: $eax
537
+ ; CHECK0-NEXT: movw %dx, %ax
538
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
539
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
540
+ ; CHECK0-NEXT: ## implicit-def: $eax
541
+ ; CHECK0-NEXT: movw %cx, %ax
542
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
543
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
544
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
545
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546
+ ; CHECK0-NEXT: retq
547
+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
548
+ ret <4 x bfloat> %ret
549
+ }
550
+
380
551
define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
381
552
; CHECK-LABEL: atomic_vec4_float_align:
382
553
; CHECK: ## %bb.0:
0 commit comments