Skip to content

Commit 43d4d7d

Browse files
committed
[libclc] Move fmin/fmax to the CLC library
Note the CLC versions of these builtins don't offer the vector/scalar forms, for simplicity. The OpenCL layer converts the vector/scalar form to vector/vector. The CLC builtins use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets.
1 parent aca8a5c commit 43d4d7d

File tree

10 files changed

+96
-52
lines changed

10 files changed

+96
-52
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include <clc/clcmacro.h>
2+
#include <clc/utils.h>
3+
4+
#ifndef __CLC_BUILTIN
5+
#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
6+
#endif
7+
8+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN,
9+
float, float)
10+
11+
#ifdef cl_khr_fp64
12+
13+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
14+
15+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN,
16+
double, double)
17+
18+
#endif
19+
20+
#ifdef cl_khr_fp16
21+
22+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
23+
24+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN,
25+
half, half)
26+
27+
#endif
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_MATH_CLC_FMAX_H__
2+
#define __CLC_MATH_CLC_FMAX_H__
3+
4+
#define __CLC_FUNCTION __clc_fmax
5+
#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
6+
7+
#include <clc/math/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_FMAX_H__
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_MATH_CLC_FMIN_H__
2+
#define __CLC_MATH_CLC_FMIN_H__
3+
4+
#define __CLC_FUNCTION __clc_fmin
5+
#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
6+
7+
#include <clc/math/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_FMIN_H__
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
2+
__CLC_GENTYPE y);
3+
4+
#ifndef __CLC_SCALAR
5+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
6+
__CLC_SCALAR_GENTYPE y);
7+
#endif
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <clc/utils.h>
2+
3+
#ifndef __CLC_FUNCTION
4+
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
5+
#endif
6+
7+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
8+
__CLC_GENTYPE b) {
9+
return __CLC_FUNCTION(FUNCTION)(a, b);
10+
}
11+
12+
#ifndef __CLC_SCALAR
13+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
14+
__CLC_SCALAR_GENTYPE b) {
15+
return __CLC_FUNCTION(FUNCTION)(a, (__CLC_GENTYPE)b);
16+
}
17+
#endif

libclc/clc/lib/generic/SOURCES

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ integer/clc_upsample.cl
2020
math/clc_ceil.cl
2121
math/clc_copysign.cl
2222
math/clc_fabs.cl
23-
math/clc_fma.cl
2423
math/clc_floor.cl
24+
math/clc_fma.cl
25+
math/clc_fmax.cl
26+
math/clc_fmin.cl
2527
math/clc_frexp.cl
2628
math/clc_mad.cl
2729
math/clc_modf.cl
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_fmax
5+
#define __CLC_BUILTIN __builtin_elementwise_max
6+
#include <clc/math/binary_builtin.inc>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_fmin
5+
#define __CLC_BUILTIN __builtin_elementwise_min
6+
#include <clc/math/binary_builtin.inc>

libclc/generic/lib/math/fmax.cl

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,8 @@
11
#include <clc/clc.h>
22
#include <clc/clcmacro.h>
3+
#include <clc/math/clc_fmax.h>
34

4-
_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
5+
#define FUNCTION fmax
6+
#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
57

6-
#ifdef cl_khr_fp64
7-
8-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
9-
10-
_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
11-
12-
#endif
13-
14-
#ifdef cl_khr_fp16
15-
16-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
17-
18-
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
19-
{
20-
if (isnan(x))
21-
return y;
22-
if (isnan(y))
23-
return x;
24-
return (x < y) ? y : x;
25-
}
26-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
27-
28-
#endif
29-
30-
#define __CLC_BODY <fmax.inc>
318
#include <clc/math/gentype.inc>

libclc/generic/lib/math/fmin.cl

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,8 @@
11
#include <clc/clc.h>
22
#include <clc/clcmacro.h>
3+
#include <clc/math/clc_fmin.h>
34

4-
_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
5+
#define FUNCTION fmin
6+
#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
57

6-
#ifdef cl_khr_fp64
7-
8-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
9-
10-
_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
11-
12-
#endif
13-
#ifdef cl_khr_fp16
14-
15-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
16-
17-
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
18-
{
19-
if (isnan(x))
20-
return y;
21-
if (isnan(y))
22-
return x;
23-
return (y < x) ? y : x;
24-
}
25-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
26-
27-
#endif
28-
29-
#define __CLC_BODY <fmin.inc>
308
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)