llvm · frasercrmck · Feb 27, 2025 · Feb 25, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
@@ -29,6 +29,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
   # CLC internal libraries
   clc/lib/generic/SOURCES;
   clc/lib/amdgcn/SOURCES;
+  clc/lib/amdgpu/SOURCES;
   clc/lib/clspv/SOURCES;
   clc/lib/spirv/SOURCES;
 )

diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES
@@ -10,4 +10,3 @@ math/half_log2.cl
 math/half_recip.cl
 math/half_rsqrt.cl
 math/half_sqrt.cl
-math/sqrt.cl
diff --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h
@@ -1,7 +1,6 @@
 #define MAXFLOAT 0x1.fffffep127f
 #define HUGE_VALF __builtin_huge_valf()
 #define INFINITY __builtin_inff()
-#define NAN __builtin_nanf("")
 
 #define FLT_DIG 6
 #define FLT_MANT_DIG 24
@@ -13,6 +12,7 @@
 #define FLT_MAX MAXFLOAT
 #define FLT_MIN 0x1.0p-126f
 #define FLT_EPSILON 0x1.0p-23f
+#define FLT_NAN __builtin_nanf("")
 
 #define FP_ILOGB0 (-2147483647 - 1)
 #define FP_ILOGBNAN 2147483647
@@ -46,6 +46,7 @@
 #define DBL_MAX 0x1.fffffffffffffp1023
 #define DBL_MIN 0x1.0p-1022
 #define DBL_EPSILON 0x1.0p-52
+#define DBL_NAN __builtin_nan("")
 
 #define M_E 0x1.5bf0a8b145769p+1
 #define M_LOG2E 0x1.71547652b82fep+0
@@ -80,6 +81,7 @@
 #define HALF_MAX 0x1.ffcp15h
 #define HALF_MIN 0x1.0p-14h
 #define HALF_EPSILON 0x1.0p-10h
+#define HALF_NAN __builtin_nanf16("")
 
 #define M_LOG2E_H 0x1.714p+0h
 

diff --git a/libclc/generic/include/math/clc_sqrt.h → libclc/clc/include/clc/math/clc_sqrt.h b/libclc/generic/include/math/clc_sqrt.h → libclc/clc/include/clc/math/clc_sqrt.h
@@ -1,8 +1,12 @@
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
+#ifndef __CLC_MATH_CLC_SQRT_H__
+#define __CLC_MATH_CLC_SQRT_H__
 
-#define __CLC_FUNCTION __clc_sqrt
 #define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sqrt
+
 #include <clc/math/gentype.inc>
+
 #undef __CLC_BODY
 #undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SQRT_H__
diff --git a/libclc/clc/lib/amdgpu/SOURCES b/libclc/clc/lib/amdgpu/SOURCES
@@ -0,0 +1 @@
+math/clc_sqrt_fp64.cl
diff --git a/libclc/amdgpu/lib/math/sqrt.cl → libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl b/libclc/amdgpu/lib/math/sqrt.cl → libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
@@ -20,52 +20,43 @@
  * THE SOFTWARE.
  */
 
-#include "math/clc_sqrt.h"
-#include <clc/clc.h>
 #include <clc/clcmacro.h>
-
-_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
-
-#endif
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_ldexp.h>
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 #ifdef __AMDGCN__
-  #define __clc_builtin_rsq __builtin_amdgcn_rsq
+#define __clc_builtin_rsq __builtin_amdgcn_rsq
 #else
-  #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
+#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
 #endif
 
-_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
-
+_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
   uint vcc = x < 0x1p-767;
   uint exp0 = vcc ? 0x100 : 0;
   unsigned exp1 = vcc ? 0xffffff80 : 0;
 
-  double v01 = ldexp(x, exp0);
+  double v01 = __clc_ldexp(x, exp0);
   double v23 = __clc_builtin_rsq(v01);
   double v45 = v01 * v23;
   v23 = v23 * 0.5;
 
-  double v67 = fma(-v23, v45, 0.5);
-  v45 = fma(v45, v67, v45);
-  double v89 = fma(-v45, v45, v01);
-  v23 = fma(v23, v67, v23);
-  v45 = fma(v89, v23, v45);
-  v67 = fma(-v45, v45, v01);
-  v23 = fma(v67, v23, v45);
+  double v67 = __clc_fma(-v23, v45, 0.5);
+  v45 = __clc_fma(v45, v67, v45);
+  double v89 = __clc_fma(-v45, v45, v01);
+  v23 = __clc_fma(v23, v67, v23);
+  v45 = __clc_fma(v89, v23, v45);
+  v67 = __clc_fma(-v45, v45, v01);
+  v23 = __clc_fma(v67, v23, v45);
 
-  v23 = ldexp(v23, exp1);
-  return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
+  v23 = __clc_ldexp(v23, exp1);
+  return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
 }
 
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
 
 #endif
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
@@ -33,6 +33,7 @@ math/clc_nan.cl
 math/clc_nextafter.cl
 math/clc_rint.cl
 math/clc_round.cl
+math/clc_sqrt.cl
 math/clc_sw_fma.cl
 math/clc_trunc.cl
 relational/clc_all.cl

diff --git a/libclc/generic/lib/math/clc_sqrt.cl → libclc/clc/lib/generic/math/clc_sqrt.cl b/libclc/generic/lib/math/clc_sqrt.cl → libclc/clc/lib/generic/math/clc_sqrt.cl
@@ -20,14 +20,8 @@
  * THE SOFTWARE.
  */
 
-#include <clc/clc.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
 
-// Map the llvm sqrt intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_llvm_intr_sqrt
-#define __CLC_INTRINSIC "llvm.sqrt"
-#include <clc/math/unary_intrin.inc>
-#undef __CLC_FUNCTION
-#undef __CLC_INTRINSIC
-
-#define __CLC_BODY <clc_sqrt_impl.inc>
+#define __CLC_BODY <clc_sqrt.inc>
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/clc_sqrt_impl.inc → libclc/clc/lib/generic/math/clc_sqrt.inc b/libclc/generic/lib/math/clc_sqrt_impl.inc → libclc/clc/lib/generic/math/clc_sqrt.inc
@@ -20,20 +20,7 @@
  * THE SOFTWARE.
  */
 
-#if __CLC_FPSIZE == 64
-#define __CLC_NAN __builtin_nan("")
-#define ZERO 0.0
-#elif __CLC_FPSIZE == 32
-#define __CLC_NAN NAN
-#define ZERO 0.0f
-#elif __CLC_FPSIZE == 16
-#define __CLC_NAN (half)NAN
-#define ZERO 0.0h
-#endif
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
-  return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
+__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
+__clc_sqrt(__CLC_GENTYPE val) {
+  return __builtin_elementwise_sqrt(val);
 }
-
-#undef __CLC_NAN
-#undef ZERO
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
@@ -179,7 +179,6 @@ math/sincos.cl
 math/sincos_helpers.cl
 math/sinh.cl
 math/sinpi.cl
-math/clc_sqrt.cl
 math/sqrt.cl
 math/clc_tan.cl
 math/tan.cl

diff --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl
@@ -27,6 +27,7 @@
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
+#include <clc/math/clc_sqrt.h>
 #include <clc/relational/clc_isnan.h>
 #include <clc/shared/clc_clamp.h>
 #include <math/clc_hypot.h>
@@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
   float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
   float fx = as_float(ux) * fi_exp;
   float fy = as_float(uy) * fi_exp;
-  retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
+  retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp;
 
   retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
   retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
@@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
   double ay = y * preadjust;
 
   // The post adjust may overflow, but this can't be avoided in any case
-  double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
+  double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
 
   // If the difference in exponents between x and y is large
   double s = x + y;

diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl
@@ -21,7 +21,9 @@
  */
 
 #include <clc/clc.h>
-#include "math/clc_sqrt.h"
+#include <clc/math/clc_sqrt.h>
 
-#define __CLC_FUNCTION sqrt
-#include <clc/math/unary_builtin.inc>
+#define FUNCTION sqrt
+#define __CLC_BODY <clc/shared/unary_def.inc>
+
+#include <clc/math/gentype.inc>