@@ -174,25 +174,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
174
174
;
175
175
; CHECK-NEWLOWERING-LABEL: usdot:
176
176
; CHECK-NEWLOWERING: // %bb.0: // %entry
177
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b
178
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
179
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
180
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
181
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
182
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h
183
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
184
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
185
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
186
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z1.h
187
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
188
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
189
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
190
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
191
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
192
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
193
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
194
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
195
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
177
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
196
178
; CHECK-NEWLOWERING-NEXT: ret
197
179
entry:
198
180
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -233,25 +215,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
233
215
;
234
216
; CHECK-NEWLOWERING-LABEL: sudot:
235
217
; CHECK-NEWLOWERING: // %bb.0: // %entry
236
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b
237
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
238
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
239
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
240
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
241
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h
242
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
243
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
244
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
245
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z1.h
246
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
247
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
248
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
249
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
250
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
251
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
252
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
253
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
254
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
218
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
255
219
; CHECK-NEWLOWERING-NEXT: ret
256
220
entry:
257
221
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -457,59 +421,12 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
457
421
;
458
422
; CHECK-NEWLOWERING-LABEL: usdot_8to64:
459
423
; CHECK-NEWLOWERING: // %bb.0: // %entry
460
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
461
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
462
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
463
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
464
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
465
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
466
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
467
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
468
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
469
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
470
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
471
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
472
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
473
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
474
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
475
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
476
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
477
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
478
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
479
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
480
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
481
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
482
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
483
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
484
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
485
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
486
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
487
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
488
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
489
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
490
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
491
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
492
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
493
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
494
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
495
- ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
496
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
497
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
498
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
499
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
500
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
501
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
502
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
503
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
504
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
505
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
506
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
507
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
508
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
509
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
510
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
511
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
512
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
424
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
425
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b
426
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
427
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
428
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
429
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
513
430
; CHECK-NEWLOWERING-NEXT: ret
514
431
entry:
515
432
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
@@ -590,59 +507,12 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
590
507
;
591
508
; CHECK-NEWLOWERING-LABEL: sudot_8to64:
592
509
; CHECK-NEWLOWERING: // %bb.0: // %entry
593
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
594
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
595
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
596
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
597
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
598
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
599
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
600
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
601
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
602
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
603
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
604
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
605
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
606
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
607
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
608
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
609
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
610
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
611
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
612
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
613
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
614
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
615
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
616
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
617
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
618
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
619
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
620
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
621
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
622
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
623
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
624
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
625
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
626
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
627
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
628
- ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
629
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
630
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
631
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
632
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
633
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
634
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
635
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
636
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
637
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
638
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
639
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
640
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
641
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
642
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
643
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
644
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
645
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
510
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
511
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b
512
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
513
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
514
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
515
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
646
516
; CHECK-NEWLOWERING-NEXT: ret
647
517
entry:
648
518
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
0 commit comments