Skip to content

[CGP] Consider arguments and ret values in dupRetToEnableTailCallOpts #76613

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 64 additions & 6 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2522,8 +2522,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return false;
}

static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
const CallInst *CI) {
assert(CI && CI->use_empty());

if (const auto *II = dyn_cast<IntrinsicInst>(CI))
switch (II->getIntrinsicID()) {
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
return true;
default:
return false;
}

LibFunc LF;
Function *Callee = CI->getCalledFunction();
if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
switch (LF) {
case LibFunc_strcpy:
case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
return true;
default:
return false;
}

return false;
}

/// Look for opportunities to duplicate return instructions to the predecessor
/// to enable tail call optimizations. The case it is currently looking for is:
/// to enable tail call optimizations. The case it is currently looking for is
/// the following one. Known intrinsics or library function that may be tail
/// called are taken into account as well.
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
Expand Down Expand Up @@ -2580,8 +2612,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
}

PN = dyn_cast<PHINode>(V);
if (!PN)
return false;
}

if (PN && PN->getParent() != BB)
Expand Down Expand Up @@ -2620,8 +2650,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
attributesPermitTailCall(F, CI, RetI, *TLI)) {
TailCallBBs.push_back(PredBB);
} else {
// Consider the cases in which the phi value is indirectly produced by
// the tail call, for example when encountering memset(), memmove(),
// strcpy(), whose return value may have been optimized out. In such
// cases, the value needs to be the first function argument.
//
// bb0:
// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
// br label %return
// return:
// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
if (PredBB && PredBB->getSingleSuccessor() == BB)
CI = dyn_cast_or_null<CallInst>(
PredBB->getTerminator()->getPrevNonDebugInstruction(true));

if (CI && CI->use_empty() &&
isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
IncomingVal == CI->getArgOperand(0) &&
TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
TailCallBBs.push_back(PredBB);
}
}
} else {
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
Expand All @@ -2631,8 +2683,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
CallInst *CI = dyn_cast<CallInst>(I);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
TailCallBBs.push_back(Pred);
attributesPermitTailCall(F, CI, RetI, *TLI)) {
// Either we return void or the return value must be the first
// argument of a known intrinsic or library function.
if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
V == CI->getArgOperand(0))) {
TailCallBBs.push_back(Pred);
}
}
}
}
}
Expand Down
183 changes: 183 additions & 0 deletions llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,186 @@ return:
%retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
ret ptr %retval
}

define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
; CHECK-LABEL: memset_tailc:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: je LBB4_1
; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: jmp _memset ## TAILCALL
; CHECK-NEXT: LBB4_1: ## %return
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
entry:
%cmp = icmp eq ptr %ret_val, null
br i1 %cmp, label %return, label %if.then

if.then:
tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false)
br label %return

return:
ret ptr %ret_val
}

define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
; CHECK-LABEL: memcpy_tailc:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je LBB5_1
; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %rdx, %rsi
; CHECK-NEXT: movq %rax, %rdx
; CHECK-NEXT: jmp _memcpy ## TAILCALL
; CHECK-NEXT: LBB5_1: ## %return
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: retq
entry:
%cmp = icmp eq i64 %sz, 0
br i1 %cmp, label %return, label %if.then

if.then:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false)
br label %return

return:
%phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
ret ptr %phi
}

define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
; CHECK-LABEL: strcpy_legal_and_baz_illegal:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movq %rsi, %r15
; CHECK-NEXT: movq %rdi, %r14
; CHECK-NEXT: movq %rsi, %rdi
; CHECK-NEXT: callq _malloc
; CHECK-NEXT: testq %r15, %r15
; CHECK-NEXT: je LBB6_1
; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rax, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: jmp _strcpy ## TAILCALL
; CHECK-NEXT: LBB6_1: ## %if.else
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq _baz
; CHECK-NEXT: movq %r14, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: retq
entry:
%strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz)
%cmp = icmp eq i64 %sz, 0
br i1 %cmp, label %if.else, label %if.then

if.then:
%rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %strcpy_ret_val, ptr dereferenceable(1) %2)
br label %return

if.else:
%rv_unused_2 = tail call ptr @baz(ptr %arg, ptr %2)
br label %return

return:
%phi = phi ptr [ %strcpy_ret_val, %if.then ], [ %arg, %if.else ]
ret ptr %phi
}

define ptr @baz_illegal_tailc(ptr %ret_val, ptr %arg) nounwind {
; CHECK-LABEL: baz_illegal_tailc:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: je LBB7_2
; CHECK-NEXT: ## %bb.1: ## %if.then
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq _baz
; CHECK-NEXT: LBB7_2: ## %return
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
entry:
%cmp = icmp eq ptr %ret_val, null
br i1 %cmp, label %return, label %if.then

if.then:
%rv = tail call ptr @baz(ptr %ret_val, ptr %arg)
br label %return

return:
ret ptr %ret_val
}

define ptr @memset_illegal_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) nounwind {
; CHECK-LABEL: memset_illegal_tailc:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je LBB8_2
; CHECK-NEXT: ## %bb.1: ## %if.then
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rcx, %rbx
; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq _memset
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: LBB8_2: ## %return
; CHECK-NEXT: retq
entry:
%cmp = icmp eq i64 %sz, 0
br i1 %cmp, label %return, label %if.then

if.then:
tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false)
br label %return

return:
%phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ]
ret ptr %phi
}

define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src) nounwind {
; CHECK-LABEL: strcpy_illegal_tailc:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je LBB9_2
; CHECK-NEXT: ## %bb.1: ## %if.then
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq _strcpy
; CHECK-NEXT: LBB9_2: ## %return
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%cmp = icmp eq i64 %sz, 0
br i1 %cmp, label %return, label %if.then

if.then:
%6 = tail call ptr @strcpy(ptr dereferenceable(1) %dest, ptr dereferenceable(1) %src)
br label %return

return:
ret ptr %src
}

declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
declare noalias ptr @malloc(i64)
declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
declare ptr @baz(ptr, ptr)