Skip to content

Commit dc23185

Browse files
[SVE] Ensure SVE call operands passed via memory are correctly initialised.
The stores created when passing operands via memory don't typically maintain the chain, because they can be done in any order. Instead, a new chain is created based on all collated stores. SVE parameters passed via memory don't follow this idiom and try to maintain the chain, which unfortunately can result in them being incorrectly deadcoded when the chain is recreated. This patch brings the SVE side in line with the non-SVE side to ensure no stores become lost whilst also allowing greater flexibility when ordering the stores.
1 parent d61ba03 commit dc23185

File tree

3 files changed

+32
-26
lines changed

3 files changed

+32
-26
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7510,7 +7510,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
75107510
// Ensure we generate all stores for each tuple part, whilst updating the
75117511
// pointer after each store correctly using vscale.
75127512
while (NumParts) {
7513-
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
7513+
SDValue Store = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
7514+
MemOpChains.push_back(Store);
7515+
75147516
NumParts--;
75157517
if (NumParts > 0) {
75167518
SDValue BytesIncrement;

llvm/test/CodeGen/AArch64/arm64ec-varargs.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ define void @varargs_caller() nounwind {
3535
; CHECK-NEXT: sub sp, sp, #48
3636
; CHECK-NEXT: mov x4, sp
3737
; CHECK-NEXT: add x8, sp, #16
38-
; CHECK-NEXT: mov x9, #4617315517961601024
39-
; CHECK-NEXT: mov x0, #4607182418800017408
40-
; CHECK-NEXT: mov w1, #2
41-
; CHECK-NEXT: mov x2, #4613937818241073152
42-
; CHECK-NEXT: mov w3, #4
43-
; CHECK-NEXT: mov w5, #16
38+
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
39+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
40+
; CHECK-NEXT: mov w1, #2 // =0x2
41+
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
42+
; CHECK-NEXT: mov w3, #4 // =0x4
43+
; CHECK-NEXT: mov w5, #16 // =0x10
4444
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
45-
; CHECK-NEXT: stp x8, xzr, [sp, #8]
46-
; CHECK-NEXT: str x9, [sp]
45+
; CHECK-NEXT: stp x9, x8, [sp]
46+
; CHECK-NEXT: str xzr, [sp, #16]
4747
; CHECK-NEXT: bl varargs_callee
4848
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
4949
; CHECK-NEXT: add sp, sp, #48
@@ -70,17 +70,17 @@ define void @varargs_many_argscalleer() nounwind {
7070
; CHECK: // %bb.0:
7171
; CHECK-NEXT: sub sp, sp, #64
7272
; CHECK-NEXT: movi v0.2d, #0000000000000000
73-
; CHECK-NEXT: mov x4, sp
74-
; CHECK-NEXT: mov x8, #4618441417868443648
73+
; CHECK-NEXT: mov x8, #4618441417868443648 // =0x4018000000000000
7574
; CHECK-NEXT: add x9, sp, #16
7675
; CHECK-NEXT: add x3, sp, #32
77-
; CHECK-NEXT: mov x0, #4607182418800017408
78-
; CHECK-NEXT: mov x1, #4611686018427387904
79-
; CHECK-NEXT: mov x2, #4613937818241073152
80-
; CHECK-NEXT: mov w5, #16
76+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
77+
; CHECK-NEXT: mov x1, #4611686018427387904 // =0x4000000000000000
78+
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
79+
; CHECK-NEXT: mov x4, sp
80+
; CHECK-NEXT: mov w5, #16 // =0x10
8181
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
82-
; CHECK-NEXT: stp q0, q0, [sp, #16]
8382
; CHECK-NEXT: stp x9, x8, [sp]
83+
; CHECK-NEXT: stp q0, q0, [sp, #16]
8484
; CHECK-NEXT: bl varargs_many_argscallee
8585
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
8686
; CHECK-NEXT: add sp, sp, #64

llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ define float @foo1(ptr %x0, ptr %x1, ptr %x2) nounwind {
1818
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
1919
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
2020
; CHECK-NEXT: ptrue p0.d
21-
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
22-
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
23-
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
2421
; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl]
22+
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
23+
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
24+
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
2525
; CHECK-NEXT: bl callee1
2626
; CHECK-NEXT: addvl sp, sp, #4
2727
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -73,10 +73,10 @@ define float @foo2(ptr %x0, ptr %x1) nounwind {
7373
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
7474
; CHECK-NEXT: ptrue p0.d
7575
; CHECK-NEXT: mov w1, #1 // =0x1
76-
; CHECK-NEXT: st1d { z16.d }, p0, [x8]
77-
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
76+
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
7877
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
79-
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
78+
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
79+
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
8080
; CHECK-NEXT: str x8, [sp]
8181
; CHECK-NEXT: bl callee2
8282
; CHECK-NEXT: addvl sp, sp, #4
@@ -121,9 +121,9 @@ define float @foo3(ptr %x0, ptr %x1, ptr %x2) nounwind {
121121
; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1]
122122
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2]
123123
; CHECK-NEXT: ptrue p0.d
124-
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
125-
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
126124
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
125+
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
126+
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
127127
; CHECK-NEXT: bl callee3
128128
; CHECK-NEXT: addvl sp, sp, #3
129129
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -704,17 +704,21 @@ define void @verify_all_operands_are_initialised() {
704704
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
705705
; CHECK-NEXT: .cfi_offset w30, -8
706706
; CHECK-NEXT: .cfi_offset w29, -16
707+
; CHECK-NEXT: ptrue p0.s
707708
; CHECK-NEXT: movi d0, #0000000000000000
708709
; CHECK-NEXT: fmov s1, #1.00000000
710+
; CHECK-NEXT: fmov z16.s, #9.00000000
709711
; CHECK-NEXT: mov w8, #1090519040 // =0x41000000
712+
; CHECK-NEXT: add x0, sp, #16
710713
; CHECK-NEXT: fmov s2, #2.00000000
711714
; CHECK-NEXT: fmov s3, #3.00000000
712-
; CHECK-NEXT: add x0, sp, #16
715+
; CHECK-NEXT: add x9, sp, #16
713716
; CHECK-NEXT: fmov s4, #4.00000000
714717
; CHECK-NEXT: fmov s5, #5.00000000
715-
; CHECK-NEXT: str w8, [sp]
716718
; CHECK-NEXT: fmov s6, #6.00000000
717719
; CHECK-NEXT: fmov s7, #7.00000000
720+
; CHECK-NEXT: st1w { z16.s }, p0, [x9]
721+
; CHECK-NEXT: str w8, [sp]
718722
; CHECK-NEXT: bl func_f8_and_v0_passed_via_memory
719723
; CHECK-NEXT: addvl sp, sp, #1
720724
; CHECK-NEXT: add sp, sp, #16

0 commit comments

Comments
 (0)