-
Notifications
You must be signed in to change notification settings - Fork 13.9k
[libclc] Move several 'native' builtins to CLC library #129679
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libclc] Move several 'native' builtins to CLC library #129679
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Fraser Cormack (frasercrmck) ChangesThis commit moves the 'native' builtins that use asm statements to generate LLVM intrinsics to the CLC library. In doing so it converts them to use the appropriate elementwise builtin to generate the same intrinsic; there are no codegen changes to any target. This work forms part of #127196 and indeed with this commit there are no 'generic' builtins using/abusing asm statements - the remaining builtins are specific to the amdgpu and r600 targets. Patch is 21.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129679.diff 41 Files Affected:
diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES
index d7782a2ae14dc..ab5da40711aa4 100644
--- a/libclc/amdgpu/lib/SOURCES
+++ b/libclc/amdgpu/lib/SOURCES
@@ -1,6 +1,3 @@
-math/native_exp.cl
-math/native_log.cl
-math/native_log10.cl
math/half_exp.cl
math/half_exp10.cl
math/half_exp2.cl
diff --git a/libclc/amdgpu/lib/math/native_exp.cl b/libclc/amdgpu/lib/math/native_exp.cl
deleted file mode 100644
index 71c9a2ea2fccf..0000000000000
--- a/libclc/amdgpu/lib/math/native_exp.cl
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <clc/clc.h>
-
-#define __CLC_BODY <native_exp.inc>
-#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
diff --git a/libclc/amdgpu/lib/math/native_exp.inc b/libclc/amdgpu/lib/math/native_exp.inc
deleted file mode 100644
index c61ec0157aad8..0000000000000
--- a/libclc/amdgpu/lib/math/native_exp.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) {
- return native_exp2(val * M_LOG2E_F);
-}
diff --git a/libclc/amdgpu/lib/math/native_log.cl b/libclc/amdgpu/lib/math/native_log.cl
deleted file mode 100644
index 3c5592f602c35..0000000000000
--- a/libclc/amdgpu/lib/math/native_log.cl
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <clc/clc.h>
-
-#define __CLC_BODY <native_log.inc>
-#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
diff --git a/libclc/amdgpu/lib/math/native_log.inc b/libclc/amdgpu/lib/math/native_log.inc
deleted file mode 100644
index e6818dc132614..0000000000000
--- a/libclc/amdgpu/lib/math/native_log.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log(__CLC_GENTYPE val) {
- return native_log2(val) * (1.0f / M_LOG2E_F);
-}
diff --git a/libclc/amdgpu/lib/math/native_log10.cl b/libclc/amdgpu/lib/math/native_log10.cl
deleted file mode 100644
index 2e7c11ec1975e..0000000000000
--- a/libclc/amdgpu/lib/math/native_log10.cl
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <clc/clc.h>
-
-#define __CLC_BODY <native_log10.inc>
-#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
diff --git a/libclc/amdgpu/lib/math/native_log10.inc b/libclc/amdgpu/lib/math/native_log10.inc
deleted file mode 100644
index 96b1a0a23c660..0000000000000
--- a/libclc/amdgpu/lib/math/native_log10.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log10(__CLC_GENTYPE val) {
- return native_log2(val) * (M_LN2_F / M_LN10_F);
-}
diff --git a/libclc/clc/include/clc/math/clc_native_cos.h b/libclc/clc/include/clc/math/clc_native_cos.h
new file mode 100644
index 0000000000000..9563b7cc2f48f
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_cos.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_COS_H__
+#define __CLC_MATH_CLC_NATIVE_COS_H__
+
+#define __CLC_FUNCTION __clc_native_cos
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_COS_H__
diff --git a/libclc/clc/include/clc/math/clc_native_exp.h b/libclc/clc/include/clc/math/clc_native_exp.h
new file mode 100644
index 0000000000000..6c82cc778b454
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_exp.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_EXP_H__
+#define __CLC_MATH_CLC_NATIVE_EXP_H__
+
+#define __CLC_FUNCTION __clc_native_exp
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_EXP_H__
diff --git a/libclc/clc/include/clc/math/clc_native_exp2.h b/libclc/clc/include/clc/math/clc_native_exp2.h
new file mode 100644
index 0000000000000..b120eebb8a1e3
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_exp2.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_EXP2_H__
+#define __CLC_MATH_CLC_NATIVE_EXP2_H__
+
+#define __CLC_FUNCTION __clc_native_exp2
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_EXP2_H__
diff --git a/libclc/clc/include/clc/math/clc_native_log.h b/libclc/clc/include/clc/math/clc_native_log.h
new file mode 100644
index 0000000000000..db7cc2cb6eda7
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_log.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_LOG_H__
+#define __CLC_MATH_CLC_NATIVE_LOG_H__
+
+#define __CLC_FUNCTION __clc_native_log
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_LOG_H__
diff --git a/libclc/clc/include/clc/math/clc_native_log10.h b/libclc/clc/include/clc/math/clc_native_log10.h
new file mode 100644
index 0000000000000..0b33243084769
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_log10.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_LOG10_H__
+#define __CLC_MATH_CLC_NATIVE_LOG10_H__
+
+#define __CLC_FUNCTION __clc_native_log10
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_LOG10_H__
diff --git a/libclc/clc/include/clc/math/clc_native_log2.h b/libclc/clc/include/clc/math/clc_native_log2.h
new file mode 100644
index 0000000000000..e0fb84f8af63d
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_log2.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_LOG2_H__
+#define __CLC_MATH_CLC_NATIVE_LOG2_H__
+
+#define __CLC_FUNCTION __clc_native_log2
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_LOG2_H__
diff --git a/libclc/clc/include/clc/math/clc_native_sin.h b/libclc/clc/include/clc/math/clc_native_sin.h
new file mode 100644
index 0000000000000..37087faff1baa
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_sin.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_SIN_H__
+#define __CLC_MATH_CLC_NATIVE_SIN_H__
+
+#define __CLC_FUNCTION __clc_native_sin
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_SIN_H__
diff --git a/libclc/clc/include/clc/math/clc_native_sqrt.h b/libclc/clc/include/clc/math/clc_native_sqrt.h
new file mode 100644
index 0000000000000..21b26ba504b58
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_sqrt.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_NATIVE_SQRT_H__
+#define __CLC_MATH_CLC_NATIVE_SQRT_H__
+
+#define __CLC_FUNCTION __clc_native_sqrt
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_NATIVE_SQRT_H__
diff --git a/libclc/clc/include/clc/math/unary_intrin.inc b/libclc/clc/include/clc/math/unary_intrin.inc
deleted file mode 100644
index 5ea2246244bef..0000000000000
--- a/libclc/clc/include/clc/math/unary_intrin.inc
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
-
-_CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
-_CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
-_CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
-_CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
-_CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
-_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC
- ".v16f32");
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_OVERLOAD double __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64");
-_CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
-_CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
-_CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
-_CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
-_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC
- ".v16f64");
-#endif
-
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
-_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
-_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
-_CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
-_CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
-_CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
-#endif
-
-#undef __CLC_FUNCTION
-#undef __CLC_INTRINSIC
diff --git a/libclc/clc/lib/amdgpu/SOURCES b/libclc/clc/lib/amdgpu/SOURCES
index fd64a862021e8..dfb0c8005676c 100644
--- a/libclc/clc/lib/amdgpu/SOURCES
+++ b/libclc/clc/lib/amdgpu/SOURCES
@@ -1 +1,4 @@
+math/clc_native_log10.cl
+math/clc_native_log.cl
+math/clc_native_exp.cl
math/clc_sqrt_fp64.cl
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_exp.cl b/libclc/clc/lib/amdgpu/math/clc_native_exp.cl
new file mode 100644
index 0000000000000..0f2fdaf2a2d8f
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_exp.cl
@@ -0,0 +1,7 @@
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_native_exp2.h>
+
+#define __CLC_BODY <clc_native_exp.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_exp.inc b/libclc/clc/lib/amdgpu/math/clc_native_exp.inc
new file mode 100644
index 0000000000000..fa5685a339a89
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_exp.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_exp(__CLC_GENTYPE val) {
+ return __clc_native_exp2(val * M_LOG2E_F);
+}
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_log.cl b/libclc/clc/lib/amdgpu/math/clc_native_log.cl
new file mode 100644
index 0000000000000..f99ddfb91c83c
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_log.cl
@@ -0,0 +1,7 @@
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_native_log2.h>
+
+#define __CLC_BODY <clc_native_log.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_log.inc b/libclc/clc/lib/amdgpu/math/clc_native_log.inc
new file mode 100644
index 0000000000000..7db3244e6ae6a
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_log.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_log(__CLC_GENTYPE val) {
+ return __clc_native_log2(val) * (1.0f / M_LOG2E_F);
+}
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_log10.cl b/libclc/clc/lib/amdgpu/math/clc_native_log10.cl
new file mode 100644
index 0000000000000..20c5a6fefacb6
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_log10.cl
@@ -0,0 +1,7 @@
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_native_log2.h>
+
+#define __CLC_BODY <clc_native_log10.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_log10.inc b/libclc/clc/lib/amdgpu/math/clc_native_log10.inc
new file mode 100644
index 0000000000000..01b7e1154b64d
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/math/clc_native_log10.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_log10(__CLC_GENTYPE val) {
+ return __clc_native_log2(val) * (M_LN2_F / M_LN10_F);
+}
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 206c7c18ce1a1..ff0e0b42f2f2f 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -30,6 +30,14 @@ math/clc_log2.cl
math/clc_mad.cl
math/clc_modf.cl
math/clc_nan.cl
+math/clc_native_cos.cl
+math/clc_native_exp.cl
+math/clc_native_exp2.cl
+math/clc_native_log.cl
+math/clc_native_log10.cl
+math/clc_native_log2.cl
+math/clc_native_sin.cl
+math/clc_native_sqrt.cl
math/clc_nextafter.cl
math/clc_rint.cl
math/clc_round.cl
diff --git a/libclc/clc/lib/generic/math/clc_native_cos.cl b/libclc/clc/lib/generic/math/clc_native_cos.cl
new file mode 100644
index 0000000000000..114f4ac301631
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_cos.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_cos
+#define __CLC_BUILTIN __builtin_elementwise_cos
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_exp.cl b/libclc/clc/lib/generic/math/clc_native_exp.cl
new file mode 100644
index 0000000000000..f56fc4578f601
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_exp.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_exp
+#define __CLC_BUILTIN __builtin_elementwise_exp
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_exp2.cl b/libclc/clc/lib/generic/math/clc_native_exp2.cl
new file mode 100644
index 0000000000000..cbc3ec2599ba5
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_exp2.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_exp2
+#define __CLC_BUILTIN __builtin_elementwise_exp2
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_log.cl b/libclc/clc/lib/generic/math/clc_native_log.cl
new file mode 100644
index 0000000000000..da10ece33bee1
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_log.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_log
+#define __CLC_BUILTIN __builtin_elementwise_log
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_log10.cl b/libclc/clc/lib/generic/math/clc_native_log10.cl
new file mode 100644
index 0000000000000..bec055d99e81c
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_log10.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_log10
+#define __CLC_BUILTIN __builtin_elementwise_log10
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_log2.cl b/libclc/clc/lib/generic/math/clc_native_log2.cl
new file mode 100644
index 0000000000000..bd58a9f977f2d
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_log2.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_log2
+#define __CLC_BUILTIN __builtin_elementwise_log2
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_sin.cl b/libclc/clc/lib/generic/math/clc_native_sin.cl
new file mode 100644
index 0000000000000..17647b1fc1dfd
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_sin.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_sin
+#define __CLC_BUILTIN __builtin_elementwise_sin
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_sqrt.cl b/libclc/clc/lib/generic/math/clc_native_sqrt.cl
new file mode 100644
index 0000000000000..28f55ec51cc73
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_sqrt.cl
@@ -0,0 +1,6 @@
+#include <clc/internal/clc.h>
+
+#undef __CLC_FUNCTION
+#define __CLC_FUNCTION __clc_native_sqrt
+#define __CLC_BUILTIN __builtin_elementwise_sqrt
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_cos.cl b/libclc/generic/lib/math/native_cos.cl
index 3a934272a2838..c839eb3b78d74 100644
--- a/libclc/generic/lib/math/native_cos.cl
+++ b/libclc/generic/lib/math/native_cos.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_cos.h>
-#define __CLC_NATIVE_INTRINSIC cos
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_cos
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_exp.cl b/libclc/generic/lib/math/native_exp.cl
index 889bb135c0619..0e550072f2ce1 100644
--- a/libclc/generic/lib/math/native_exp.cl
+++ b/libclc/generic/lib/math/native_exp.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_exp.h>
-#define __CLC_NATIVE_INTRINSIC exp
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_exp
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_exp2.cl b/libclc/generic/lib/math/native_exp2.cl
index 0312f998ebd8a..d42d0e8cacc47 100644
--- a/libclc/generic/lib/math/native_exp2.cl
+++ b/libclc/generic/lib/math/native_exp2.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_exp2.h>
-#define __CLC_NATIVE_INTRINSIC exp2
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_exp2
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_log.cl b/libclc/generic/lib/math/native_log.cl
index 5708249a67078..334fcd32f4e35 100644
--- a/libclc/generic/lib/math/native_log.cl
+++ b/libclc/generic/lib/math/native_log.cl
@@ -21,9 +21,8 @@
*/
#include <clc/clc.h>
+#include <clc/math/clc_native_log.h>
-#define __CLC_NATIVE_INTRINSIC log
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_log
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_log10.cl b/libclc/generic/lib/math/native_log10.cl
index d69b7b608c3a1..2888ec4aac7f1 100644
--- a/libclc/generic/lib/math/native_log10.cl
+++ b/libclc/generic/lib/math/native_log10.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_log10.h>
-#define __CLC_NATIVE_INTRINSIC log10
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_log10
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_log2.cl b/libclc/generic/lib/math/native_log2.cl
index b6104237ab2de..8ed762ddcbdae 100644
--- a/libclc/generic/lib/math/native_log2.cl
+++ b/libclc/generic/lib/math/native_log2.cl
@@ -21,8 +21,8 @@
*/
#include <clc/clc.h>
+#include <clc/math/clc_native_log2.h>
-#define __CLC_NATIVE_INTRINSIC log2
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_log2
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_sin.cl b/libclc/generic/lib/math/native_sin.cl
index fd9232f188efd..8430403567fce 100644
--- a/libclc/generic/lib/math/native_sin.cl
+++ b/libclc/generic/lib/math/native_sin.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_sin.h>
-#define __CLC_NATIVE_INTRINSIC sin
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_sin
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_sqrt.cl b/libclc/generic/lib/math/native_sqrt.cl
index 92a2e1bef6e8c..f284a8b036da0 100644
--- a/libclc/generic/lib/math/native_sqrt.cl
+++ b/libclc/generic/lib/math/native_sqrt.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
+#include <clc/math/clc_native_sqrt.h>
-#define __CLC_NATIVE_INTRINSIC sqrt
-
-#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+#define __CLC_FUNCTION native_sqrt
+#include <clc/math/unary_builtin.inc>
diff --git a/libclc/generic/lib/math/native_unary_intrinsic.inc b/libclc/generic/lib/math/native_unary_intrinsic.inc
deleted file mode 100644
index c0a3efdc8b9a3..0000000000000
--- a/libclc/generic/lib/math/native_unary_intrinsic.inc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- ...
[truncated]
|
@arsenm it's very possible that AMDGPU no longer needs the custom implementations of exp/log/log10 but I kept them until you say otherwise. |
This came up during a discussion on llvm#129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %x) { %0 = extractelement <2 x float> %x, i64 0 %mul.i4 = fmul afn float %0, 0x3FF7154760000000 %1 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i4) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %mul.i = fmul afn float %2, 0x3FF7154760000000 %3 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 }
This came up during a discussion on llvm#129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) { entry: %mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000) %0 = extractelement <2 x float> %mul, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit.i = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %mul, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1 ret <2 x float> %vecinit2.i } define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) { entry: %0 = extractelement <2 x float> %x, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 }
This came up during a discussion on #129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) { %mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000) %0 = extractelement <2 x float> %mul, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit.i = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %mul, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1 ret <2 x float> %vecinit2.i } define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) { %0 = extractelement <2 x float> %x, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 }
This commit moves the 'native' builtins that use asm statements to generate LLVM intrinsics to the CLC library. In doing so it converts them to use the appropriate elementwise builtin to generate the same intrinsic; there are no codegen changes to any target. This work forms part of llvm#127196 and indeed with this commit there are no 'generic' builtins using/abusing asm statements - the remaining builtins are specific to the amdgpu and r600 targets.
4ba508a
to
aabdf57
Compare
This commit moves the 'native' builtins that use asm statements to generate LLVM intrinsics to the CLC library. In doing so it converts them to use the appropriate elementwise builtin to generate the same intrinsic; there are no codegen changes to any target except to AMDGPU targets where `native_log` is no longer custom implemented and instead used the clang elementwise builtin. This work forms part of llvm#127196 and indeed with this commit there are no 'generic' builtins using/abusing asm statements - the remaining builtins are specific to the amdgpu and r600 targets.
This commit moves the 'native' builtins that use asm statements to generate LLVM intrinsics to the CLC library. In doing so it converts them to use the appropriate elementwise builtin to generate the same intrinsic; there are no codegen changes to any target except to AMDGPU targets where
native_log
is no longer custom implemented and instead used the clang elementwise builtin.This work forms part of #127196 and indeed with this commit there are no 'generic' builtins using/abusing asm statements - the remaining builtins are specific to the amdgpu and r600 targets.