@@ -332,24 +332,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
332
332
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333
333
; CHECK-NEXT: ptrue pn8.b
334
334
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335
- ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
336
- ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
335
+ ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2 , mul vl] // 32-byte Folded Spill
336
+ ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
337
337
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338
- ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
339
- ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16 , mul vl] // 32-byte Folded Spill
338
+ ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6 , mul vl] // 32-byte Folded Spill
339
+ ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
340
340
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341
- ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20 , mul vl] // 32-byte Folded Spill
342
- ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24 , mul vl] // 32-byte Folded Spill
341
+ ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10 , mul vl] // 32-byte Folded Spill
342
+ ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
343
343
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344
- ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28 , mul vl] // 32-byte Folded Spill
344
+ ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14 , mul vl] // 32-byte Folded Spill
345
345
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346
346
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347
347
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
348
348
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349
349
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350
350
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351
351
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352
- ; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
352
+ ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
353
+ ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353
354
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354
355
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355
356
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -372,15 +373,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
372
373
; CHECK-NEXT: addvl sp, sp, #1
373
374
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374
375
; CHECK-NEXT: ptrue pn8.b
376
+ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
377
+ ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
378
+ ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
379
+ ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
380
+ ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
381
+ ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
382
+ ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
383
+ ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
384
+ ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
375
385
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376
- ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377
- ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378
- ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379
- ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380
- ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381
- ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382
- ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383
- ; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384
386
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385
387
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386
388
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -427,24 +429,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
427
429
; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428
430
; FP-CHECK-NEXT: ptrue pn8.b
429
431
; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430
- ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
431
- ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
432
+ ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2 , mul vl] // 32-byte Folded Spill
433
+ ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
432
434
; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433
- ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
434
- ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16 , mul vl] // 32-byte Folded Spill
435
+ ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6 , mul vl] // 32-byte Folded Spill
436
+ ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
435
437
; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436
- ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20 , mul vl] // 32-byte Folded Spill
437
- ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24 , mul vl] // 32-byte Folded Spill
438
+ ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10 , mul vl] // 32-byte Folded Spill
439
+ ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
438
440
; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439
- ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28 , mul vl] // 32-byte Folded Spill
441
+ ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14 , mul vl] // 32-byte Folded Spill
440
442
; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441
443
; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442
444
; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
443
445
; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444
446
; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445
447
; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446
448
; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447
- ; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
449
+ ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
450
+ ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448
451
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449
452
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450
453
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -465,15 +468,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
465
468
; FP-CHECK-NEXT: .cfi_restore vg
466
469
; FP-CHECK-NEXT: addvl sp, sp, #1
467
470
; FP-CHECK-NEXT: ptrue pn8.b
471
+ ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
472
+ ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
473
+ ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
474
+ ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
475
+ ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
476
+ ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
477
+ ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
478
+ ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
479
+ ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
468
480
; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469
- ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470
- ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471
- ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472
- ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473
- ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474
- ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475
- ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476
- ; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477
481
; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478
482
; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479
483
; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
0 commit comments