Skip to content

Commit 2c04bdb

Browse files
paulwalker-armtru
authored andcommitted
[SVE] Ensure SVE call operands passed via memory are correctly initialised. (#66070)
The stores created when passing operands via memory don't typically maintain the chain, because they can be done in any order. Instead, a new chain is created based on all collated stores. SVE parameters passed via memory don't follow this idiom and try to maintain the chain, which unfortunately can result in them being incorrectly deadcoded when the chain is recreated. This patch brings the SVE side in line with the non-SVE side to ensure no stores become lost whilst also allowing greater flexibility when ordering the stores.
1 parent 701620d commit 2c04bdb

File tree

3 files changed

+29
-23
lines changed

3 files changed

+29
-23
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7388,7 +7388,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
73887388
// Ensure we generate all stores for each tuple part, whilst updating the
73897389
// pointer after each store correctly using vscale.
73907390
while (NumParts) {
7391-
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
7391+
SDValue Store = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
7392+
MemOpChains.push_back(Store);
7393+
73927394
NumParts--;
73937395
if (NumParts > 0) {
73947396
SDValue BytesIncrement;

llvm/test/CodeGen/AArch64/arm64ec-varargs.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ define void @varargs_caller() nounwind {
3535
; CHECK-NEXT: sub sp, sp, #48
3636
; CHECK-NEXT: mov x4, sp
3737
; CHECK-NEXT: add x8, sp, #16
38-
; CHECK-NEXT: mov x9, #4617315517961601024
39-
; CHECK-NEXT: mov x0, #4607182418800017408
40-
; CHECK-NEXT: mov w1, #2
41-
; CHECK-NEXT: mov x2, #4613937818241073152
42-
; CHECK-NEXT: mov w3, #4
43-
; CHECK-NEXT: mov w5, #16
38+
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
39+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
40+
; CHECK-NEXT: mov w1, #2 // =0x2
41+
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
42+
; CHECK-NEXT: mov w3, #4 // =0x4
43+
; CHECK-NEXT: mov w5, #16 // =0x10
4444
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
45-
; CHECK-NEXT: stp x8, xzr, [sp, #8]
46-
; CHECK-NEXT: str x9, [sp]
45+
; CHECK-NEXT: stp x9, x8, [sp]
46+
; CHECK-NEXT: str xzr, [sp, #16]
4747
; CHECK-NEXT: bl varargs_callee
4848
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
4949
; CHECK-NEXT: add sp, sp, #48
@@ -71,16 +71,16 @@ define void @varargs_many_argscalleer() nounwind {
7171
; CHECK-NEXT: sub sp, sp, #64
7272
; CHECK-NEXT: movi v0.2d, #0000000000000000
7373
; CHECK-NEXT: mov x4, sp
74-
; CHECK-NEXT: mov x8, #4618441417868443648
74+
; CHECK-NEXT: mov x8, #4618441417868443648 // =0x4018000000000000
7575
; CHECK-NEXT: add x9, sp, #16
7676
; CHECK-NEXT: add x3, sp, #32
77-
; CHECK-NEXT: mov x0, #4607182418800017408
78-
; CHECK-NEXT: mov x1, #4611686018427387904
79-
; CHECK-NEXT: mov x2, #4613937818241073152
80-
; CHECK-NEXT: mov w5, #16
77+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
78+
; CHECK-NEXT: mov x1, #4611686018427387904 // =0x4000000000000000
79+
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
80+
; CHECK-NEXT: mov w5, #16 // =0x10
8181
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
82-
; CHECK-NEXT: stp q0, q0, [sp, #16]
8382
; CHECK-NEXT: stp x9, x8, [sp]
83+
; CHECK-NEXT: stp q0, q0, [sp, #16]
8484
; CHECK-NEXT: bl varargs_many_argscallee
8585
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
8686
; CHECK-NEXT: add sp, sp, #64

llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ define float @foo1(ptr %x0, ptr %x1, ptr %x2) nounwind {
1818
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
1919
; CHECK-NEXT: mov x0, sp
2020
; CHECK-NEXT: ptrue p0.d
21-
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
22-
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
23-
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
2421
; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl]
22+
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
23+
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
24+
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
2525
; CHECK-NEXT: bl callee1
2626
; CHECK-NEXT: addvl sp, sp, #4
2727
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -73,10 +73,10 @@ define float @foo2(ptr %x0, ptr %x1) nounwind {
7373
; CHECK-NEXT: mov w7, #7 // =0x7
7474
; CHECK-NEXT: add x9, sp, #16
7575
; CHECK-NEXT: ptrue p0.d
76-
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
77-
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
78-
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
7976
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
77+
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
78+
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
79+
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
8080
; CHECK-NEXT: str x8, [sp]
8181
; CHECK-NEXT: bl callee2
8282
; CHECK-NEXT: addvl sp, sp, #4
@@ -121,9 +121,9 @@ define float @foo3(ptr %x0, ptr %x1, ptr %x2) nounwind {
121121
; CHECK-NEXT: fmov s1, #2.00000000
122122
; CHECK-NEXT: mov x0, sp
123123
; CHECK-NEXT: ptrue p0.d
124-
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
125-
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
126124
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
125+
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
126+
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
127127
; CHECK-NEXT: bl callee3
128128
; CHECK-NEXT: addvl sp, sp, #3
129129
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -714,6 +714,10 @@ define void @verify_all_operands_are_initialised() {
714714
; CHECK-NEXT: fmov s6, #6.00000000
715715
; CHECK-NEXT: fmov s7, #7.00000000
716716
; CHECK-NEXT: add x0, sp, #16
717+
; CHECK-NEXT: add x9, sp, #16
718+
; CHECK-NEXT: ptrue p0.s
719+
; CHECK-NEXT: fmov z16.s, #9.00000000
720+
; CHECK-NEXT: st1w { z16.s }, p0, [x9]
717721
; CHECK-NEXT: str w8, [sp]
718722
; CHECK-NEXT: bl func_f8_and_v0_passed_via_memory
719723
; CHECK-NEXT: addvl sp, sp, #1

0 commit comments

Comments
 (0)