Skip to content

Commit 9a6844c

Browse files
committed
[ARM] Skip storing of stack arguments when lowering tail calls
1 parent 71d8b22 commit 9a6844c

File tree

5 files changed

+115
-10
lines changed

5 files changed

+115
-10
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,6 +2422,45 @@ static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
24222422
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
24232423
}
24242424

2425+
/// Check whether a stack argument requires lowering in a tail call.
2426+
static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
2427+
const CCValAssign &VA, SDValue Arg,
2428+
ISD::ArgFlagsTy Flags, int CallOffset) {
2429+
// FIXME: We should be able to handle this case, but it's not clear how to.
2430+
if (Flags.isZExt() || Flags.isSExt())
2431+
return true;
2432+
2433+
for (;;) {
2434+
// Look through nodes that don't alter the bits of the incoming value.
2435+
unsigned Op = Arg.getOpcode();
2436+
if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2437+
Op == ISD::AssertZext || Op == ISD::AssertSext) {
2438+
Arg = Arg.getOperand(0);
2439+
continue;
2440+
}
2441+
break;
2442+
}
2443+
2444+
// If the argument is a load from the same immutable stack slot, we can reuse
2445+
// it.
2446+
if (auto *LoadNode = dyn_cast<LoadSDNode>(Arg)) {
2447+
if (auto *FINode = dyn_cast<FrameIndexSDNode>(LoadNode->getBasePtr())) {
2448+
const MachineFrameInfo &MFI = MF.getFrameInfo();
2449+
int FI = FINode->getIndex();
2450+
if (!MFI.isImmutableObjectIndex(FI))
2451+
return true;
2452+
if (CallOffset != MFI.getObjectOffset(FI))
2453+
return true;
2454+
uint64_t SizeInBits = LoadNode->getMemoryVT().getFixedSizeInBits();
2455+
if (SizeInBits / 8 != MFI.getObjectSize(FI))
2456+
return true;
2457+
return false;
2458+
}
2459+
}
2460+
2461+
return true;
2462+
}
2463+
24252464
/// LowerCall - Lowering a call into a callseq_start <-
24262465
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
24272466
/// nodes.
@@ -2783,6 +2822,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
27832822
}
27842823
} else {
27852824
assert(VA.isMemLoc());
2825+
2826+
// When the frame pointer is perfectly aligned for the tail call and the
2827+
// same stack argument is passed down intact, we can reuse it.
2828+
if (!SPDiff && !shouldLowerTailCallStackArg(MF, VA, Arg, Flags,
2829+
VA.getLocMemOffset()))
2830+
continue;
2831+
27862832
SDValue DstAddr;
27872833
MachinePointerInfo DstInfo;
27882834
std::tie(DstAddr, DstInfo) =

llvm/test/CodeGen/ARM/debug-frame.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ declare void @_ZSt9terminatev()
175175
; CHECK-FP: .cfi_offset r4, -36
176176
; CHECK-FP: add r11, sp, #28
177177
; CHECK-FP: .cfi_def_cfa r11, 8
178-
; CHECK-FP: sub sp, sp, #44
178+
; CHECK-FP: sub sp, sp, #36
179179
; CHECK-FP: .cfi_endproc
180180

181181
; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd:
@@ -240,7 +240,7 @@ declare void @_ZSt9terminatev()
240240
; CHECK-THUMB-FP: .cfi_offset r4, -20
241241
; CHECK-THUMB-FP: add r7, sp, #12
242242
; CHECK-THUMB-FP: .cfi_def_cfa r7, 8
243-
; CHECK-THUMB-FP: sub sp, #60
243+
; CHECK-THUMB-FP: sub sp, #52
244244
; CHECK-THUMB-FP: .cfi_endproc
245245

246246
; CHECK-THUMB-FP-ELIM-LABEL: _Z4testiiiiiddddd:

llvm/test/CodeGen/ARM/ehabi.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ declare void @_ZSt9terminatev()
167167
; CHECK-FP: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
168168
; CHECK-FP: .setfp r11, sp, #28
169169
; CHECK-FP: add r11, sp, #28
170-
; CHECK-FP: .pad #44
171-
; CHECK-FP: sub sp, sp, #44
170+
; CHECK-FP: .pad #36
171+
; CHECK-FP: sub sp, sp, #36
172172
; CHECK-FP: .personality __gxx_personality_v0
173173
; CHECK-FP: .handlerdata
174174
; CHECK-FP: .fnend
@@ -226,7 +226,7 @@ declare void @_ZSt9terminatev()
226226
; DWARF-FP: .cfi_offset r4, -36
227227
; DWARF-FP: add r11, sp, #28
228228
; DWARF-FP: .cfi_def_cfa r11, 8
229-
; DWARF-FP: sub sp, sp, #44
229+
; DWARF-FP: sub sp, sp, #36
230230
; DWARF-FP: sub sp, r11, #28
231231
; DWARF-FP: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
232232
; DWARF-FP: mov pc, lr

llvm/test/CodeGen/ARM/fp16-vector-argument.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,7 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
155155
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
156156
; SOFT-NEXT: add r12, sp, #16
157157
; SOFT-NEXT: vmul.f16 q8, q9, q8
158-
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
159-
; SOFT-NEXT: vldr d16, [sp]
160-
; SOFT-NEXT: vstr d16, [sp]
158+
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
161159
; SOFT-NEXT: str r3, [sp, #8]
162160
; SOFT-NEXT: b use
163161
;
@@ -185,10 +183,8 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
185183
; SOFTEB-NEXT: add r12, sp, #16
186184
; SOFTEB-NEXT: vrev64.16 q9, q9
187185
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
188-
; SOFTEB-NEXT: vldr d18, [sp]
189186
; SOFTEB-NEXT: vrev64.16 q8, q8
190187
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
191-
; SOFTEB-NEXT: vstr d18, [sp]
192188
; SOFTEB-NEXT: str r3, [sp, #8]
193189
; SOFTEB-NEXT: b use
194190
;
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: llc %s -o - | FileCheck %s
2+
3+
; Tail calls which have stack arguments in the same offsets as the caller do not
4+
; need to load and store the arguments from the stack.
5+
6+
target triple = "armv7"
7+
8+
declare void @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
9+
10+
; CHECK-LABEL: wrapper_func:
11+
define void @wrapper_func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
12+
; CHECK: @ %bb.
13+
; CHECK-NEXT: b func
14+
tail call void @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
15+
ret void
16+
}
17+
18+
; CHECK-LABEL: wrapper_func_zero_arg:
19+
define void @wrapper_func_zero_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
20+
; CHECK: @ %bb.
21+
; CHECK-NEXT: mov r12, #0
22+
; CHECK-NEXT: r12, [sp, #20]
23+
; CHECK-NEXT: b func
24+
tail call void @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 0)
25+
ret void
26+
}
27+
28+
; CHECK-LABEL: wrapper_func_overriden_arg:
29+
define void @wrapper_func_overriden_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
30+
; CHECK: @ %bb.
31+
; CHECK-NEXT: push {r11, lr}
32+
; CHECK-NEXT: ldr r12, [sp, #24]
33+
; CHECK-NEXT: mov lr, #0
34+
; CHECK-NEXT: str lr, [sp, #24]
35+
; CHECK-NEXT: str r12, [sp, #28]
36+
; CHECK-NEXT: pop {r11, lr}
37+
; CHECK-NEXT: b func
38+
tail call void @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 0, i32 %i)
39+
ret void
40+
}
41+
42+
declare void @func_i1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 %j)
43+
44+
; CHECK-LABEL: wrapper_func_i1:
45+
define void @wrapper_func_i1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 %j) {
46+
; CHECK: @ %bb.
47+
; CHECK-NEXT: b func_i1
48+
tail call void @func_i1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 %j)
49+
ret void
50+
}
51+
52+
declare void @func_signext_i1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 signext %j)
53+
54+
; FIXME: Support zero/sign-extended stack arguments.
55+
; CHECK-LABEL: wrapper_func_i8:
56+
define void @wrapper_func_i8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 signext %j) {
57+
; CHECK: @ %bb.
58+
; CHECK-NEXT: ldr r12, [sp, #20]
59+
; CHECK-NEXT: str r12, [sp, #20]
60+
; CHECK-NEXT: b func_signext_i1
61+
tail call void @func_signext_i1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 signext %j)
62+
ret void
63+
}

0 commit comments

Comments
 (0)