Skip to content

Commit 130813b

Browse files
committed
Fix up tests
1 parent 8019197 commit 130813b

File tree

2 files changed

+155
-87
lines changed

2 files changed

+155
-87
lines changed

llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -397,45 +397,45 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
397397
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
398398
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
399399
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
400-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
400+
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b
401+
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
401402
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
402-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
403403
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
404404
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
405405
; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
406+
; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h
407+
; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h
408+
; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
406409
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
407-
; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
408-
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
409-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
410410
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
411-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
412411
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
413-
; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
414-
; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
415-
; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
416-
; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
417-
; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
412+
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
413+
; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s
414+
; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s
415+
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
416+
; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s
417+
; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s
418+
; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s
419+
; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s
420+
; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s
421+
; CHECK-NEWLOWERING-NEXT: uunpklo z30.d, z4.s
418422
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
419-
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
423+
; CHECK-NEWLOWERING-NEXT: uunpklo z31.d, z2.s
424+
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
425+
; CHECK-NEWLOWERING-NEXT: sunpklo z8.d, z3.s
426+
; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z5.s
420427
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
421-
; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
422-
; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
423-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
424-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
425-
; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
426-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
427-
; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
428-
; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
429-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
428+
; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
429+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
430430
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
431-
; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
432-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
433-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
434-
; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
435-
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
431+
; CHECK-NEWLOWERING-NEXT: mul z6.d, z6.d, z25.d
432+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29.d
433+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5.d
434+
; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
435+
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31.d, z9.d
436436
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
437-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
438-
; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
437+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z3.d
438+
; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
439439
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
440440
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
441441
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
@@ -530,45 +530,45 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
530530
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
531531
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
532532
; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
533-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
533+
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b
534+
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
534535
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
535-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
536536
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
537537
; CHECK-NEWLOWERING-NEXT: ptrue p0.d
538538
; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
539+
; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h
540+
; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h
541+
; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
539542
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
540-
; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
541-
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
542-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
543543
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
544-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
545544
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
546-
; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
547-
; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
548-
; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
549-
; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
550-
; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
545+
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
546+
; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s
547+
; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s
548+
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
549+
; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s
550+
; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s
551+
; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s
552+
; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s
553+
; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s
554+
; CHECK-NEWLOWERING-NEXT: sunpklo z30.d, z4.s
551555
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
552-
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
556+
; CHECK-NEWLOWERING-NEXT: sunpklo z31.d, z2.s
557+
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
558+
; CHECK-NEWLOWERING-NEXT: uunpklo z8.d, z3.s
559+
; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z5.s
553560
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
554-
; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
555-
; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
556-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
557-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
558-
; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
559-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
560-
; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
561-
; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
562-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
561+
; CHECK-NEWLOWERING-NEXT: mul z7.d, z7.d, z24.d
562+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z28.d
563563
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
564-
; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
565-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
566-
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
567-
; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
568-
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
564+
; CHECK-NEWLOWERING-NEXT: mul z6.d, z6.d, z25.d
565+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z29.d
566+
; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z5.d
567+
; CHECK-NEWLOWERING-NEXT: movprfx z2, z7
568+
; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z31.d, z9.d
569569
; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
570-
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
571-
; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
570+
; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z3.d
571+
; CHECK-NEWLOWERING-NEXT: movprfx z3, z6
572572
; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
573573
; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
574574
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d

llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,35 @@ entry:
172172
}
173173

174174
define <vscale x 2 x i32> @signed_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
175-
; CHECK-LABEL: signed_wide_add_nxv4i16:
176-
; CHECK: // %bb.0: // %entry
177-
; CHECK-NEXT: ptrue p0.s
178-
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
179-
; CHECK-NEXT: uunpklo z2.d, z1.s
180-
; CHECK-NEXT: uunpkhi z1.d, z1.s
181-
; CHECK-NEXT: add z0.d, z0.d, z2.d
182-
; CHECK-NEXT: add z0.d, z1.d, z0.d
183-
; CHECK-NEXT: ret
175+
; CHECK-SVE2-LABEL: signed_wide_add_nxv4i16:
176+
; CHECK-SVE2: // %bb.0: // %entry
177+
; CHECK-SVE2-NEXT: ptrue p0.s
178+
; CHECK-SVE2-NEXT: sxth z1.s, p0/m, z1.s
179+
; CHECK-SVE2-NEXT: uunpklo z2.d, z1.s
180+
; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
181+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
182+
; CHECK-SVE2-NEXT: add z0.d, z1.d, z0.d
183+
; CHECK-SVE2-NEXT: ret
184+
;
185+
; CHECK-SVE-LABEL: signed_wide_add_nxv4i16:
186+
; CHECK-SVE: // %bb.0: // %entry
187+
; CHECK-SVE-NEXT: ptrue p0.s
188+
; CHECK-SVE-NEXT: sxth z1.s, p0/m, z1.s
189+
; CHECK-SVE-NEXT: uunpklo z2.d, z1.s
190+
; CHECK-SVE-NEXT: uunpkhi z1.d, z1.s
191+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
192+
; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d
193+
; CHECK-SVE-NEXT: ret
194+
;
195+
; CHECK-NEWLOWERING-LABEL: signed_wide_add_nxv4i16:
196+
; CHECK-NEWLOWERING: // %bb.0: // %entry
197+
; CHECK-NEWLOWERING-NEXT: ptrue p0.s
198+
; CHECK-NEWLOWERING-NEXT: sxth z1.s, p0/m, z1.s
199+
; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z1.s
200+
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.d, z1.s
201+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
202+
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
203+
; CHECK-NEWLOWERING-NEXT: ret
184204
entry:
185205
%input.wide = sext <vscale x 4 x i16> %input to <vscale x 4 x i32>
186206
%partial.reduce = tail call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
@@ -203,35 +223,83 @@ entry:
203223
}
204224

205225
define <vscale x 4 x i64> @signed_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
206-
; CHECK-LABEL: signed_wide_add_nxv8i32:
207-
; CHECK: // %bb.0: // %entry
208-
; CHECK-NEXT: sunpkhi z4.d, z2.s
209-
; CHECK-NEXT: sunpklo z2.d, z2.s
210-
; CHECK-NEXT: sunpkhi z5.d, z3.s
211-
; CHECK-NEXT: sunpklo z3.d, z3.s
212-
; CHECK-NEXT: add z0.d, z0.d, z2.d
213-
; CHECK-NEXT: add z1.d, z1.d, z4.d
214-
; CHECK-NEXT: add z0.d, z3.d, z0.d
215-
; CHECK-NEXT: add z1.d, z5.d, z1.d
216-
; CHECK-NEXT: ret
226+
; CHECK-SVE2-LABEL: signed_wide_add_nxv8i32:
227+
; CHECK-SVE2: // %bb.0: // %entry
228+
; CHECK-SVE2-NEXT: sunpkhi z4.d, z2.s
229+
; CHECK-SVE2-NEXT: sunpklo z2.d, z2.s
230+
; CHECK-SVE2-NEXT: sunpkhi z5.d, z3.s
231+
; CHECK-SVE2-NEXT: sunpklo z3.d, z3.s
232+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
233+
; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
234+
; CHECK-SVE2-NEXT: add z0.d, z3.d, z0.d
235+
; CHECK-SVE2-NEXT: add z1.d, z5.d, z1.d
236+
; CHECK-SVE2-NEXT: ret
237+
;
238+
; CHECK-SVE-LABEL: signed_wide_add_nxv8i32:
239+
; CHECK-SVE: // %bb.0: // %entry
240+
; CHECK-SVE-NEXT: sunpkhi z4.d, z2.s
241+
; CHECK-SVE-NEXT: sunpklo z2.d, z2.s
242+
; CHECK-SVE-NEXT: sunpkhi z5.d, z3.s
243+
; CHECK-SVE-NEXT: sunpklo z3.d, z3.s
244+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
245+
; CHECK-SVE-NEXT: add z1.d, z1.d, z4.d
246+
; CHECK-SVE-NEXT: add z0.d, z3.d, z0.d
247+
; CHECK-SVE-NEXT: add z1.d, z5.d, z1.d
248+
; CHECK-SVE-NEXT: ret
249+
;
250+
; CHECK-NEWLOWERING-LABEL: signed_wide_add_nxv8i32:
251+
; CHECK-NEWLOWERING: // %bb.0: // %entry
252+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.d, z2.s
253+
; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z3.s
254+
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
255+
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
256+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
257+
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z5.d
258+
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
259+
; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
260+
; CHECK-NEWLOWERING-NEXT: ret
217261
entry:
218262
%input.wide = sext <vscale x 8 x i32> %input to <vscale x 8 x i64>
219263
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
220264
ret <vscale x 4 x i64> %partial.reduce
221265
}
222266

223267
define <vscale x 4 x i64> @unsigned_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
224-
; CHECK-LABEL: unsigned_wide_add_nxv8i32:
225-
; CHECK: // %bb.0: // %entry
226-
; CHECK-NEXT: uunpkhi z4.d, z2.s
227-
; CHECK-NEXT: uunpklo z2.d, z2.s
228-
; CHECK-NEXT: uunpkhi z5.d, z3.s
229-
; CHECK-NEXT: uunpklo z3.d, z3.s
230-
; CHECK-NEXT: add z0.d, z0.d, z2.d
231-
; CHECK-NEXT: add z1.d, z1.d, z4.d
232-
; CHECK-NEXT: add z0.d, z3.d, z0.d
233-
; CHECK-NEXT: add z1.d, z5.d, z1.d
234-
; CHECK-NEXT: ret
268+
; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i32:
269+
; CHECK-SVE2: // %bb.0: // %entry
270+
; CHECK-SVE2-NEXT: uunpkhi z4.d, z2.s
271+
; CHECK-SVE2-NEXT: uunpklo z2.d, z2.s
272+
; CHECK-SVE2-NEXT: uunpkhi z5.d, z3.s
273+
; CHECK-SVE2-NEXT: uunpklo z3.d, z3.s
274+
; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
275+
; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
276+
; CHECK-SVE2-NEXT: add z0.d, z3.d, z0.d
277+
; CHECK-SVE2-NEXT: add z1.d, z5.d, z1.d
278+
; CHECK-SVE2-NEXT: ret
279+
;
280+
; CHECK-SVE-LABEL: unsigned_wide_add_nxv8i32:
281+
; CHECK-SVE: // %bb.0: // %entry
282+
; CHECK-SVE-NEXT: uunpkhi z4.d, z2.s
283+
; CHECK-SVE-NEXT: uunpklo z2.d, z2.s
284+
; CHECK-SVE-NEXT: uunpkhi z5.d, z3.s
285+
; CHECK-SVE-NEXT: uunpklo z3.d, z3.s
286+
; CHECK-SVE-NEXT: add z0.d, z0.d, z2.d
287+
; CHECK-SVE-NEXT: add z1.d, z1.d, z4.d
288+
; CHECK-SVE-NEXT: add z0.d, z3.d, z0.d
289+
; CHECK-SVE-NEXT: add z1.d, z5.d, z1.d
290+
; CHECK-SVE-NEXT: ret
291+
;
292+
; CHECK-NEWLOWERING-LABEL: unsigned_wide_add_nxv8i32:
293+
; CHECK-NEWLOWERING: // %bb.0: // %entry
294+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z2.s
295+
; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z3.s
296+
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
297+
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
298+
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
299+
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z5.d
300+
; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
301+
; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
302+
; CHECK-NEWLOWERING-NEXT: ret
235303
entry:
236304
%input.wide = zext <vscale x 8 x i32> %input to <vscale x 8 x i64>
237305
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)

0 commit comments

Comments
 (0)