[libclc] Move fmin/fmax to the CLC library

frasercrmck · frasercrmck · commit 43d4d7d1fd03 · 2025-02-24T12:43:29.000Z
Note the CLC versions of these builtins don't offer the vector/scalar
forms, for simplicity. The OpenCL layer converts the vector/scalar form
to vector/vector.

The CLC builtins use clang's __builtin_elementwise_(min|max) which helps
us generate llvm.(min|max)num intrinsics directly. These intrinsics
select the non-NAN input over the NAN input, which adheres to the OpenCL
specification. Note that the OpenCL specification doesn't require
support for sNAN, so returning qNAN over sNAN is acceptable. Note also
that the intrinsics don't differentiate between -0.0 and +0.0; this does
not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.
diff --git a/libclc/clc/include/clc/math/binary_builtin.inc b/libclc/clc/include/clc/math/binary_builtin.inc
@@ -0,0 +1,27 @@
+#include <clc/clcmacro.h>
+#include <clc/utils.h>
+
+#ifndef __CLC_BUILTIN
+#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
+#endif
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        double, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN,
+                                        half, half)
+
+#endif
diff --git a/libclc/clc/include/clc/math/clc_fmax.h b/libclc/clc/include/clc/math/clc_fmax.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_FMAX_H__
+#define __CLC_MATH_CLC_FMAX_H__
+
+#define __CLC_FUNCTION __clc_fmax
+#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_FMAX_H__
diff --git a/libclc/clc/include/clc/math/clc_fmin.h b/libclc/clc/include/clc/math/clc_fmin.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_FMIN_H__
+#define __CLC_MATH_CLC_FMIN_H__
+
+#define __CLC_FUNCTION __clc_fmin
+#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_FMIN_H__
diff --git a/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc
@@ -0,0 +1,7 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+                                                     __CLC_GENTYPE y);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
+                                                     __CLC_SCALAR_GENTYPE y);
+#endif
diff --git a/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc
@@ -0,0 +1,17 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, b);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_SCALAR_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, (__CLC_GENTYPE)b);
+}
+#endif
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
@@ -20,8 +20,10 @@ integer/clc_upsample.cl
 math/clc_ceil.cl
 math/clc_copysign.cl
 math/clc_fabs.cl
-math/clc_fma.cl
 math/clc_floor.cl
+math/clc_fma.cl
+math/clc_fmax.cl
+math/clc_fmin.cl
 math/clc_frexp.cl
 math/clc_mad.cl
 math/clc_modf.cl
diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_fmax
+#define __CLC_BUILTIN __builtin_elementwise_max
+#include <clc/math/binary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_fmin
+#define __CLC_BUILTIN __builtin_elementwise_min
+#include <clc/math/binary_builtin.inc>
diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl
@@ -1,31 +1,8 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/math/clc_fmax.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
+#define FUNCTION fmax
+#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
-{
-   if (isnan(x))
-      return y;
-   if (isnan(y))
-      return x;
-   return (x < y) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
-
-#endif
-
-#define __CLC_BODY <fmax.inc>
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl
@@ -1,30 +1,8 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/math/clc_fmin.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
+#define FUNCTION fmin
+#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
-{
-   if (isnan(x))
-      return y;
-   if (isnan(y))
-      return x;
-   return (y < x) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
-
-#endif
-
-#define __CLC_BODY <fmin.inc>
 #include <clc/math/gentype.inc>