Skip to content

[SYCL][CUDA] Change builtin selection for SYCL #9768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Aug 17, 2023
Merged
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2329,9 +2329,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
bool ConstWithoutExceptions =
getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
if (FD->hasAttr<ConstAttr>() ||
((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
(!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) {
if ((FD->hasAttr<ConstAttr>() ||
((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
(!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) &&
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIceil:
case Builtin::BIceilf:
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/Driver/ToolChains/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;

// math-errno should be the default for SYCL but not other OFK using CUDA TC
bool IsMathErrnoDefault() const override { return OK == Action::OFK_SYCL; }

void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;

Expand Down
142 changes: 142 additions & 0 deletions clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// SYCL compilation uses libdevice in order to implement platform specific
// versions of funcs like cosf, logf, etc. In order for the libdevice funcs
// to be used, we need to make sure that llvm intrinsics such as llvm.cos.f32
// are not emitted since many backends do not have lowerings for such
// intrinsics. This allows the driver to link in the libdevice definitions for
// cosf etc. later in the driver flow.

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"

extern "C" {
float scalbnf(float x, int n);
float logf(float x);
float expf(float x);
float frexpf(float x, int *exp);
float ldexpf(float x, int exp);
float log10f(float x);
float modff(float x, float *intpart);
float exp2f(float x);
float expm1f(float x);
int ilogbf(float x);
float log1pf(float x);
float log2f(float x);
float logbf(float x);
float sqrtf(float x);
float cbrtf(float x);
float hypotf(float x, float y);
float erff(float x);
float erfcf(float x);
float tgammaf(float x);
float lgammaf(float x);
float fmodf(float x, float y);
float remainderf(float x, float y);
float remquof(float x, float y, int *q);
float nextafterf(float x, float y);
float fdimf(float x, float y);
float fmaf(float x, float y, float z);
float sinf(float x);
float cosf(float x);
float tanf(float x);
float powf(float x, float y);
float acosf(float x);
float asinf(float x);
float atanf(float x);
float atan2f(float x, float y);
float coshf(float x);
float sinhf(float x);
float tanhf(float x);
float acoshf(float x);
float asinhf(float x);
float atanhf(float x);
};

// CHECK-NOT: llvm.abs.
// CHECK-NOT: llvm.scalbnf.
// CHECK-NOT: llvm.log.
// CHECK-NOT: llvm.exp.
// CHECK-NOT: llvm.frexp.
// CHECK-NOT: llvm.ldexp.
// CHECK-NOT: llvm.log10.
// CHECK-NOT: llvm.mod.
// CHECK-NOT: llvm.exp2.
// CHECK-NOT: llvm.expm1.
// CHECK-NOT: llvm.ilogb.
// CHECK-NOT: llvm.log1p.
// CHECK-NOT: llvm.log2.
// CHECK-NOT: llvm.logb.
// CHECK-NOT: llvm.sqrt.
// CHECK-NOT: llvm.cbrt.
// CHECK-NOT: llvm.hypot.
// CHECK-NOT: llvm.erf.
// CHECK-NOT: llvm.erfc.
// CHECK-NOT: llvm.tgamma.
// CHECK-NOT: llvm.lgamma.
// CHECK-NOT: llvm.fmod.
// CHECK-NOT: llvm.remainder.
// CHECK-NOT: llvm.remquo.
// CHECK-NOT: llvm.nextafter.
// CHECK-NOT: llvm.fdim.
// CHECK-NOT: llvm.fma.
// CHECK-NOT: llvm.sin.
// CHECK-NOT: llvm.cos.
// CHECK-NOT: llvm.tan.
// CHECK-NOT: llvm.pow.
// CHECK-NOT: llvm.acos.
// CHECK-NOT: llvm.asin.
// CHECK-NOT: llvm.atan.
// CHECK-NOT: llvm.atan2.
// CHECK-NOT: llvm.cosh.
// CHECK-NOT: llvm.sinh.
// CHECK-NOT: llvm.tanh.
// CHECK-NOT: llvm.acosh.
// CHECK-NOT: llvm.asinh.
// CHECK-NOT: llvm.atanh.
void sycl_kernel(float *a, int *b) {
sycl::queue{}.submit([&](sycl::handler &cgh) {
cgh.single_task<class kernel>([=]() {
a[0] = scalbnf(a[0], b[0]);
a[0] = logf(a[0]);
a[0] = expf(a[0]);
a[0] = frexpf(a[0], b);
a[0] = ldexpf(a[0], b[0]);
a[0] = log10f(a[0]);
a[0] = modff(a[0], a);
a[0] = exp2f(a[0]);
a[0] = expm1f(a[0]);
a[0] = ilogbf(a[0]);
a[0] = log1pf(a[0]);
a[0] = log2f(a[0]);
a[0] = logbf(a[0]);
a[0] = sqrtf(a[0]);
a[0] = cbrtf(a[0]);
a[0] = hypotf(a[0], a[0]);
a[0] = erff(a[0]);
a[0] = erfcf(a[0]);
a[0] = tgammaf(a[0]);
a[0] = lgammaf(a[0]);
a[0] = fmodf(a[0], a[0]);
a[0] = remainderf(a[0], a[0]);
a[0] = remquof(a[0], a[0], b);
a[0] = nextafterf(a[0], a[0]);
a[0] = fdimf(a[0], a[0]);
a[0] = fmaf(a[0], a[0], a[0]);
a[0] = sinf(a[0]);
a[0] = cosf(a[0]);
a[0] = tanf(a[0]);
a[0] = powf(a[0], a[0]);
a[0] = acosf(a[0]);
a[0] = asinf(a[0]);
a[0] = atanf(a[0]);
a[0] = atan2f(a[0], a[0]);
a[0] = coshf(a[0]);
a[0] = sinhf(a[0]);
a[0] = tanhf(a[0]);
a[0] = acoshf(a[0]);
a[0] = asinhf(a[0]);
a[0] = atanhf(a[0]);
});
});
}
12 changes: 12 additions & 0 deletions sycl/test-e2e/DeviceLib/cmath_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

// RUN: %{build} -fno-builtin -fsycl-device-lib-jit-link %{mathflags} -o %t.out
// RUN: %if !gpu %{ %{run} %t.out %}
//
// // Check that --fast-math works with cmath funcs for CUDA
// RUN: %if cuda %{ %{build} -fno-builtin %{mathflags} -o %t.out -ffast-math -DSYCL_E2E_FASTMATH %}
// RUN: %if cuda %{ %{run} %t.out %}

#include "math_utils.hpp"
#include <cmath>
Expand Down Expand Up @@ -92,6 +96,9 @@ template <class T> void device_cmath_test_1(s::queue &deviceQueue) {

res_access[i++] = !(std::signbit(infinity) == 0);
res_access[i++] = !(std::signbit(minus_infinity) != 0);

#ifndef SYCL_E2E_FASTMATH
// -ffast-math is not guaranteed to correctly detect nan etc.
res_access[i++] = !(std::isunordered(minus_nan, nan) != 0);
res_access[i++] = !(std::isunordered(minus_infinity, infinity) == 0);
res_access[i++] = !(std::isgreater(minus_infinity, infinity) == 0);
Expand All @@ -113,6 +120,11 @@ template <class T> void device_cmath_test_1(s::queue &deviceQueue) {
res_access[i++] = !(std::isnormal(minus_infinity) == 0);
res_access[i++] = !(std::isnormal(subnormal) == 0);
res_access[i++] = !(std::isnormal(1.0f) != 0);
#else
for (; i < static_cast<int>(TEST_NUM);) {
res_access[i++] = 0;
}
#endif
});
});
}
Expand Down