Skip to content

Commit ddc3346

Browse files
authored
clang/AMDGPU: Fix accidental behavior change for __builtin_amdgcn_ldexph (#66340)
1 parent 7976bdb commit ddc3346

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17392,14 +17392,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1739217392
case AMDGPU::BI__builtin_amdgcn_log_clampf:
1739317393
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
1739417394
case AMDGPU::BI__builtin_amdgcn_ldexp:
17395-
case AMDGPU::BI__builtin_amdgcn_ldexpf:
17396-
case AMDGPU::BI__builtin_amdgcn_ldexph: {
17395+
case AMDGPU::BI__builtin_amdgcn_ldexpf: {
1739717396
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1739817397
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1739917398
llvm::Function *F =
1740017399
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
1740117400
return Builder.CreateCall(F, {Src0, Src1});
1740217401
}
17402+
case AMDGPU::BI__builtin_amdgcn_ldexph: {
17403+
// The raw instruction has a different behavior for out of bounds exponent
17404+
// values (implicit truncation instead of saturate to short_min/short_max).
17405+
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17406+
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17407+
llvm::Function *F =
17408+
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
17409+
return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
17410+
}
1740317411
case AMDGPU::BI__builtin_amdgcn_frexp_mant:
1740417412
case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
1740517413
case AMDGPU::BI__builtin_amdgcn_frexp_manth:

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ void test_cos_f16(global half* out, half a)
5252
}
5353

5454
// CHECK-LABEL: @test_ldexp_f16
55-
// CHECK: call half @llvm.ldexp.f16.i32
55+
// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
56+
// CHECK: call half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
5657
void test_ldexp_f16(global half* out, half a, int b)
5758
{
5859
*out = __builtin_amdgcn_ldexph(a, b);

0 commit comments

Comments
 (0)