Skip to content

Commit 6448d5b

Browse files
committed
AMDGPU: Remove pointless libcall recognition of native_{divide|recip}
This was trying to constant fold these calls, and also turn some of them into a regular fmul/fdiv. There's no point to doing that, the underlying library implementation should be using those in the first place. Even when the library does use the rcp intrinsics, the backend handles constant folding of those. This was also only performing the folds under overly strict fast-evertyhing-is-required conditions. The one possible plus this gained over linking in the library is if you were using all fast math flags, it would propagate them to the new instructions. We could address this in the library by adding more fast math flags to the native implementations. The constant fold case also had no test coverage. https://reviews.llvm.org/D156676
1 parent 44c876f commit 6448d5b

File tree

2 files changed

+6
-60
lines changed

2 files changed

+6
-60
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,6 @@ class AMDGPULibCalls {
6565

6666
/* Specialized optimizations */
6767

68-
// recip (half or native)
69-
bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
70-
71-
// divide (half or native)
72-
bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
73-
7468
// pow/powr/pown
7569
bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
7670

@@ -587,19 +581,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
587581
case AMDGPULibFunc::EI_COS:
588582
case AMDGPULibFunc::EI_SIN:
589583
return fold_sincos(FPOp, B, FInfo);
590-
case AMDGPULibFunc::EI_RECIP:
591-
// skip vector function
592-
assert((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
593-
FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
594-
"recip must be an either native or half function");
595-
return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
596-
597-
case AMDGPULibFunc::EI_DIVIDE:
598-
// skip vector function
599-
assert((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
600-
FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
601-
"divide must be an either native or half function");
602-
return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
603584
case AMDGPULibFunc::EI_FMA:
604585
case AMDGPULibFunc::EI_MAD:
605586
case AMDGPULibFunc::EI_NFMA:
@@ -687,45 +668,6 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
687668
return false;
688669
}
689670

690-
// [native_]half_recip(c) ==> 1.0/c
691-
bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
692-
const FuncInfo &FInfo) {
693-
Value *opr0 = CI->getArgOperand(0);
694-
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
695-
// Just create a normal div. Later, InstCombine will be able
696-
// to compute the divide into a constant (avoid check float infinity
697-
// or subnormal at this point).
698-
Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
699-
opr0,
700-
"recip2div");
701-
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
702-
replaceCall(CI, nval);
703-
return true;
704-
}
705-
return false;
706-
}
707-
708-
// [native_]half_divide(x, c) ==> x/c
709-
bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
710-
const FuncInfo &FInfo) {
711-
Value *opr0 = CI->getArgOperand(0);
712-
Value *opr1 = CI->getArgOperand(1);
713-
ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
714-
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
715-
716-
if ((CF0 && CF1) || // both are constants
717-
(CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
718-
// CF1 is constant && f32 divide
719-
{
720-
Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
721-
opr1, "__div2recip");
722-
Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
723-
replaceCall(CI, nval);
724-
return true;
725-
}
726-
return false;
727-
}
728-
729671
namespace llvm {
730672
static double log2(double V) {
731673
#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,10 @@ entry:
156156

157157
declare float @_Z10half_recipf(float)
158158

159+
; Do nothing, the underlying implementation will optimize correctly
160+
; after inlining.
159161
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
160-
; GCN: fmul fast float %tmp, 0x3FD5555560000000
162+
; GCN: %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
161163
define amdgpu_kernel void @test_native_divide(ptr addrspace(1) nocapture %a) {
162164
entry:
163165
%tmp = load float, ptr addrspace(1) %a, align 4
@@ -168,8 +170,10 @@ entry:
168170

169171
declare float @_Z13native_divideff(float, float)
170172

173+
; Do nothing, the optimization will naturally happen after inlining.
174+
171175
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
172-
; GCN: fmul fast float %tmp, 0x3FD5555560000000
176+
; GCN: %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
173177
define amdgpu_kernel void @test_half_divide(ptr addrspace(1) nocapture %a) {
174178
entry:
175179
%tmp = load float, ptr addrspace(1) %a, align 4

0 commit comments

Comments
 (0)