Skip to content

Commit 84b2753

Browse files
[CGP] Consider arguments and ret values in dupRetToEnableTailCallOpts
Hint further tail call optimization opportunities when the examined returned value is either the return value of a call instruction, or a function argument. Moreover, take into account the cases in which incoming values from phi-nodes, not directly tail call instructions, may still be simplified. Fixes: #75455.
1 parent b9c852a commit 84b2753

File tree

2 files changed

+85
-36
lines changed

2 files changed

+85
-36
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,8 +2522,39 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
25222522
return false;
25232523
}
25242524

2525+
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
2526+
const CallInst *CI) {
2527+
assert(CI && CI->use_empty());
2528+
2529+
if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2530+
switch (II->getIntrinsicID()) {
2531+
case Intrinsic::memset:
2532+
case Intrinsic::memcpy:
2533+
case Intrinsic::memmove:
2534+
return true;
2535+
default:
2536+
return false;
2537+
}
2538+
2539+
LibFunc LF;
2540+
Function *Callee = CI->getCalledFunction();
2541+
if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2542+
switch (LF) {
2543+
case LibFunc_strcpy:
2544+
case LibFunc_strncpy:
2545+
case LibFunc_strcat:
2546+
case LibFunc_strncat:
2547+
return true;
2548+
default:
2549+
return false;
2550+
}
2551+
2552+
return false;
2553+
}
2554+
25252555
/// Look for opportunities to duplicate return instructions to the predecessor
2526-
/// to enable tail call optimizations. The case it is currently looking for is:
2556+
/// to enable tail call optimizations. The case it is currently looking for are
2557+
/// simple return of call values, function arguments, or phi nodes as follows:
25272558
/// @code
25282559
/// bb0:
25292560
/// %tmp0 = tail call i32 @f0()
@@ -2580,7 +2611,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
25802611
}
25812612

25822613
PN = dyn_cast<PHINode>(V);
2583-
if (!PN)
2614+
if (!PN && !isa<Argument>(V) && !isa<CallInst>(V))
25842615
return false;
25852616
}
25862617

@@ -2620,8 +2651,31 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
26202651
// Make sure the phi value is indeed produced by the tail call.
26212652
if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
26222653
TLI->mayBeEmittedAsTailCall(CI) &&
2623-
attributesPermitTailCall(F, CI, RetI, *TLI))
2654+
attributesPermitTailCall(F, CI, RetI, *TLI)) {
26242655
TailCallBBs.push_back(PredBB);
2656+
} else {
2657+
/// Consider the cases in which the phi value is indirectly produced by
2658+
/// the tail call, for example when encountering memset(), memmove(),
2659+
/// strcpy(), whose return value may have been optimized out. In such
2660+
/// cases, the value needs to be the first function argument.
2661+
/// @code
2662+
/// bb0:
2663+
/// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2664+
/// br label %return
2665+
/// return:
2666+
/// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2667+
/// @endcode
2668+
if (PredBB && PredBB->getSingleSuccessor() == BB)
2669+
CI = dyn_cast_or_null<CallInst>(
2670+
PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2671+
2672+
if (CI && CI->use_empty() && CI->hasArgument(IncomingVal) &&
2673+
IncomingVal == CI->getArgOperand(0) &&
2674+
isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2675+
TLI->mayBeEmittedAsTailCall(CI) &&
2676+
attributesPermitTailCall(F, CI, RetI, *TLI))
2677+
TailCallBBs.push_back(PredBB);
2678+
}
26252679
}
26262680
} else {
26272681
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2631,8 +2685,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
26312685
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
26322686
CallInst *CI = dyn_cast<CallInst>(I);
26332687
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2634-
attributesPermitTailCall(F, CI, RetI, *TLI))
2635-
TailCallBBs.push_back(Pred);
2688+
attributesPermitTailCall(F, CI, RetI, *TLI)) {
2689+
bool RVIsArgOrCI =
2690+
isa_and_nonnull<Argument>(V) || isa_and_nonnull<CallInst>(V);
2691+
if (!RVIsArgOrCI ||
2692+
(RVIsArgOrCI && isIntrinsicOrLFToBeTailCalled(TLInfo, CI)))
2693+
TailCallBBs.push_back(Pred);
2694+
}
26362695
}
26372696
}
26382697
}

llvm/test/CodeGen/X86/tailcall-cgp-dup.ll

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -188,18 +188,14 @@ return:
188188
define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
189189
; CHECK-LABEL: memset_tailc:
190190
; CHECK: ## %bb.0: ## %entry
191-
; CHECK-NEXT: pushq %rbx
192-
; CHECK-NEXT: movq %rdi, %rbx
193191
; CHECK-NEXT: testq %rdi, %rdi
194-
; CHECK-NEXT: je LBB4_2
195-
; CHECK-NEXT: ## %bb.1: ## %if.then
192+
; CHECK-NEXT: je LBB4_1
193+
; CHECK-NEXT: ## %bb.2: ## %if.then
196194
; CHECK-NEXT: movq %rsi, %rdx
197-
; CHECK-NEXT: movq %rbx, %rdi
198195
; CHECK-NEXT: xorl %esi, %esi
199-
; CHECK-NEXT: callq _memset
200-
; CHECK-NEXT: LBB4_2: ## %return
201-
; CHECK-NEXT: movq %rbx, %rax
202-
; CHECK-NEXT: popq %rbx
196+
; CHECK-NEXT: jmp _memset ## TAILCALL
197+
; CHECK-NEXT: LBB4_1: ## %return
198+
; CHECK-NEXT: movq %rdi, %rax
203199
; CHECK-NEXT: retq
204200
entry:
205201
%cmp = icmp eq ptr %ret_val, null
@@ -242,21 +238,15 @@ return:
242238
define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
243239
; CHECK-LABEL: memcpy_tailc:
244240
; CHECK: ## %bb.0: ## %entry
245-
; CHECK-NEXT: pushq %rbx
246241
; CHECK-NEXT: testq %rsi, %rsi
247242
; CHECK-NEXT: je LBB6_1
248243
; CHECK-NEXT: ## %bb.2: ## %if.then
249244
; CHECK-NEXT: movq %rsi, %rax
250-
; CHECK-NEXT: movq %rdi, %rbx
251245
; CHECK-NEXT: movq %rdx, %rsi
252246
; CHECK-NEXT: movq %rax, %rdx
253-
; CHECK-NEXT: callq _memcpy
254-
; CHECK-NEXT: jmp LBB6_3
255-
; CHECK-NEXT: LBB6_1:
256-
; CHECK-NEXT: movq %rdx, %rbx
257-
; CHECK-NEXT: LBB6_3: ## %return
258-
; CHECK-NEXT: movq %rbx, %rax
259-
; CHECK-NEXT: popq %rbx
247+
; CHECK-NEXT: jmp _memcpy ## TAILCALL
248+
; CHECK-NEXT: LBB6_1: ## %return
249+
; CHECK-NEXT: movq %rdx, %rax
260250
; CHECK-NEXT: retq
261251
entry:
262252
%cmp = icmp eq i64 %sz, 0
@@ -277,25 +267,25 @@ define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
277267
; CHECK-NEXT: pushq %r15
278268
; CHECK-NEXT: pushq %r14
279269
; CHECK-NEXT: pushq %rbx
280-
; CHECK-NEXT: movq %rdx, %r14
270+
; CHECK-NEXT: movq %rdx, %rbx
281271
; CHECK-NEXT: movq %rsi, %r15
282-
; CHECK-NEXT: movq %rdi, %rbx
272+
; CHECK-NEXT: movq %rdi, %r14
283273
; CHECK-NEXT: movq %rsi, %rdi
284274
; CHECK-NEXT: callq _malloc
285275
; CHECK-NEXT: testq %r15, %r15
286-
; CHECK-NEXT: je LBB7_2
287-
; CHECK-NEXT: ## %bb.1: ## %if.then
276+
; CHECK-NEXT: je LBB7_1
277+
; CHECK-NEXT: ## %bb.2: ## %if.then
288278
; CHECK-NEXT: movq %rax, %rdi
289-
; CHECK-NEXT: movq %r14, %rsi
290-
; CHECK-NEXT: movq %rax, %rbx
291-
; CHECK-NEXT: callq _strcpy
292-
; CHECK-NEXT: jmp LBB7_3
293-
; CHECK-NEXT: LBB7_2: ## %if.else
294-
; CHECK-NEXT: movq %rbx, %rdi
295-
; CHECK-NEXT: movq %r14, %rsi
279+
; CHECK-NEXT: movq %rbx, %rsi
280+
; CHECK-NEXT: popq %rbx
281+
; CHECK-NEXT: popq %r14
282+
; CHECK-NEXT: popq %r15
283+
; CHECK-NEXT: jmp _strcpy ## TAILCALL
284+
; CHECK-NEXT: LBB7_1: ## %if.else
285+
; CHECK-NEXT: movq %r14, %rdi
286+
; CHECK-NEXT: movq %rbx, %rsi
296287
; CHECK-NEXT: callq _baz
297-
; CHECK-NEXT: LBB7_3: ## %return
298-
; CHECK-NEXT: movq %rbx, %rax
288+
; CHECK-NEXT: movq %r14, %rax
299289
; CHECK-NEXT: popq %rbx
300290
; CHECK-NEXT: popq %r14
301291
; CHECK-NEXT: popq %r15

0 commit comments

Comments
 (0)