Skip to content

[libclc] Move sqrt to CLC library #128748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
# CLC internal libraries
clc/lib/generic/SOURCES;
clc/lib/amdgcn/SOURCES;
clc/lib/amdgpu/SOURCES;
clc/lib/clspv/SOURCES;
clc/lib/spirv/SOURCES;
)
Expand Down
1 change: 0 additions & 1 deletion libclc/amdgpu/lib/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@ math/half_log2.cl
math/half_recip.cl
math/half_rsqrt.cl
math/half_sqrt.cl
math/sqrt.cl
4 changes: 3 additions & 1 deletion libclc/clc/include/clc/float/definitions.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#define MAXFLOAT 0x1.fffffep127f
#define HUGE_VALF __builtin_huge_valf()
#define INFINITY __builtin_inff()
#define NAN __builtin_nanf("")

#define FLT_DIG 6
#define FLT_MANT_DIG 24
Expand All @@ -13,6 +12,7 @@
#define FLT_MAX MAXFLOAT
#define FLT_MIN 0x1.0p-126f
#define FLT_EPSILON 0x1.0p-23f
#define FLT_NAN __builtin_nanf("")

#define FP_ILOGB0 (-2147483647 - 1)
#define FP_ILOGBNAN 2147483647
Expand Down Expand Up @@ -46,6 +46,7 @@
#define DBL_MAX 0x1.fffffffffffffp1023
#define DBL_MIN 0x1.0p-1022
#define DBL_EPSILON 0x1.0p-52
#define DBL_NAN __builtin_nan("")

#define M_E 0x1.5bf0a8b145769p+1
#define M_LOG2E 0x1.71547652b82fep+0
Expand Down Expand Up @@ -80,6 +81,7 @@
#define HALF_MAX 0x1.ffcp15h
#define HALF_MIN 0x1.0p-14h
#define HALF_EPSILON 0x1.0p-10h
#define HALF_NAN __builtin_nanf16("")

#define M_LOG2E_H 0x1.714p+0h

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#include <clc/clcfunc.h>
#include <clc/clctypes.h>
#ifndef __CLC_MATH_CLC_SQRT_H__
#define __CLC_MATH_CLC_SQRT_H__

#define __CLC_FUNCTION __clc_sqrt
#define __CLC_BODY <clc/math/unary_decl.inc>
#define __CLC_FUNCTION __clc_sqrt

#include <clc/math/gentype.inc>

#undef __CLC_BODY
#undef __CLC_FUNCTION

#endif // __CLC_MATH_CLC_SQRT_H__
1 change: 1 addition & 0 deletions libclc/clc/lib/amdgpu/SOURCES
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
math/clc_sqrt_fp64.cl
Original file line number Diff line number Diff line change
Expand Up @@ -20,52 +20,43 @@
* THE SOFTWARE.
*/

#include "math/clc_sqrt.h"
#include <clc/clc.h>
#include <clc/clcmacro.h>

_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)

#endif
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

#ifdef __AMDGCN__
#define __clc_builtin_rsq __builtin_amdgcn_rsq
#define __clc_builtin_rsq __builtin_amdgcn_rsq
#else
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
#endif

_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {

_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
uint vcc = x < 0x1p-767;
uint exp0 = vcc ? 0x100 : 0;
unsigned exp1 = vcc ? 0xffffff80 : 0;

double v01 = ldexp(x, exp0);
double v01 = __clc_ldexp(x, exp0);
double v23 = __clc_builtin_rsq(v01);
double v45 = v01 * v23;
v23 = v23 * 0.5;

double v67 = fma(-v23, v45, 0.5);
v45 = fma(v45, v67, v45);
double v89 = fma(-v45, v45, v01);
v23 = fma(v23, v67, v23);
v45 = fma(v89, v23, v45);
v67 = fma(-v45, v45, v01);
v23 = fma(v67, v23, v45);
double v67 = __clc_fma(-v23, v45, 0.5);
v45 = __clc_fma(v45, v67, v45);
double v89 = __clc_fma(-v45, v45, v01);
v23 = __clc_fma(v23, v67, v23);
v45 = __clc_fma(v89, v23, v45);
v67 = __clc_fma(-v45, v45, v01);
v23 = __clc_fma(v67, v23, v45);

v23 = ldexp(v23, exp1);
return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
v23 = __clc_ldexp(v23, exp1);
return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
}

_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);

#endif
1 change: 1 addition & 0 deletions libclc/clc/lib/generic/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ math/clc_nan.cl
math/clc_nextafter.cl
math/clc_rint.cl
math/clc_round.cl
math/clc_sqrt.cl
math/clc_sw_fma.cl
math/clc_trunc.cl
relational/clc_all.cl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@
* THE SOFTWARE.
*/

#include <clc/clc.h>
#include <clc/float/definitions.h>
#include <clc/internal/clc.h>

// Map the llvm sqrt intrinsic to an OpenCL function.
#define __CLC_FUNCTION __clc_llvm_intr_sqrt
#define __CLC_INTRINSIC "llvm.sqrt"
#include <clc/math/unary_intrin.inc>
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC

#define __CLC_BODY <clc_sqrt_impl.inc>
#define __CLC_BODY <clc_sqrt.inc>
#include <clc/math/gentype.inc>
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,7 @@
* THE SOFTWARE.
*/

#if __CLC_FPSIZE == 64
#define __CLC_NAN __builtin_nan("")
#define ZERO 0.0
#elif __CLC_FPSIZE == 32
#define __CLC_NAN NAN
#define ZERO 0.0f
#elif __CLC_FPSIZE == 16
#define __CLC_NAN (half)NAN
#define ZERO 0.0h
#endif

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_sqrt(__CLC_GENTYPE val) {
return __builtin_elementwise_sqrt(val);
}

#undef __CLC_NAN
#undef ZERO
1 change: 0 additions & 1 deletion libclc/generic/lib/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ math/sincos.cl
math/sincos_helpers.cl
math/sinh.cl
math/sinpi.cl
math/clc_sqrt.cl
math/sqrt.cl
math/clc_tan.cl
math/tan.cl
Expand Down
5 changes: 3 additions & 2 deletions libclc/generic/lib/math/clc_hypot.cl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/math.h>
#include <clc/math/clc_sqrt.h>
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_clamp.h>
#include <math/clc_hypot.h>
Expand All @@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
float fx = as_float(ux) * fi_exp;
float fy = as_float(uy) * fi_exp;
retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;

retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
Expand Down Expand Up @@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
double ay = y * preadjust;

// The post adjust may overflow, but this can't be avoided in any case
double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;

// If the difference in exponents between x and y is large
double s = x + y;
Expand Down
8 changes: 5 additions & 3 deletions libclc/generic/lib/math/sqrt.cl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
*/

#include <clc/clc.h>
#include "math/clc_sqrt.h"
#include <clc/math/clc_sqrt.h>

#define __CLC_FUNCTION sqrt
#include <clc/math/unary_builtin.inc>
#define FUNCTION sqrt
#define __CLC_BODY <clc/shared/unary_def.inc>

#include <clc/math/gentype.inc>