@@ -106,25 +106,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
106
106
;
107
107
; CHECK-NEWLOWERING-LABEL: usdot:
108
108
; CHECK-NEWLOWERING: // %bb.0: // %entry
109
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b
110
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
111
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
112
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
113
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
114
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h
115
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
116
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
117
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
118
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z1.h
119
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
120
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
121
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
122
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
123
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
124
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
125
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
126
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
127
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
109
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
128
110
; CHECK-NEWLOWERING-NEXT: ret
129
111
entry:
130
112
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -165,25 +147,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
165
147
;
166
148
; CHECK-NEWLOWERING-LABEL: sudot:
167
149
; CHECK-NEWLOWERING: // %bb.0: // %entry
168
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b
169
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
170
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
171
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
172
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
173
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h
174
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
175
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
176
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
177
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z1.h
178
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
179
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
180
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
181
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
182
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
183
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
184
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
185
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
186
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
150
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
187
151
; CHECK-NEWLOWERING-NEXT: ret
188
152
entry:
189
153
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -389,59 +353,12 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
389
353
;
390
354
; CHECK-NEWLOWERING-LABEL: usdot_8to64:
391
355
; CHECK-NEWLOWERING: // %bb.0: // %entry
392
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
393
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
394
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
395
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
396
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
397
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
398
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
399
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
400
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
401
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
402
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
403
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
404
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
405
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
406
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
407
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
408
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
409
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
410
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
411
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
412
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
413
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
414
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
415
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
416
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
417
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
418
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
419
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
420
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
421
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
422
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
423
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
424
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
425
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
426
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
427
- ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
428
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
429
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
430
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
431
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
432
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
433
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
434
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
435
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
436
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
437
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
438
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
439
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
440
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
441
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
442
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
443
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
444
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
356
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
357
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b
358
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
359
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
360
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
361
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
445
362
; CHECK-NEWLOWERING-NEXT: ret
446
363
entry:
447
364
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
@@ -522,59 +439,12 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
522
439
;
523
440
; CHECK-NEWLOWERING-LABEL: sudot_8to64:
524
441
; CHECK-NEWLOWERING: // %bb.0: // %entry
525
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
526
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
527
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
528
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
529
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
530
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
531
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
532
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
533
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
534
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
535
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
536
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
537
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
538
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
539
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
540
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
541
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
542
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
543
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
544
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
545
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
546
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
547
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
548
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
549
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
550
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
551
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
552
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
553
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
554
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
555
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
556
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
557
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
558
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
559
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
560
- ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
561
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
562
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
563
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
564
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
565
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
566
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
567
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
568
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
569
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
570
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
571
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
572
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
573
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
574
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
575
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
576
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
577
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
442
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
443
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b
444
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
445
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
446
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
447
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
578
448
; CHECK-NEWLOWERING-NEXT: ret
579
449
entry:
580
450
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
0 commit comments