|
20 | 20 | * THE SOFTWARE.
|
21 | 21 | */
|
22 | 22 |
|
23 |
| -#include "math/clc_sqrt.h" |
24 |
| -#include <clc/clc.h> |
25 | 23 | #include <clc/clcmacro.h>
|
26 |
| - |
27 |
| -_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) |
28 |
| - |
29 |
| -#ifdef cl_khr_fp16 |
30 |
| - |
31 |
| -#pragma OPENCL EXTENSION cl_khr_fp16 : enable |
32 |
| -_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half) |
33 |
| - |
34 |
| -#endif |
| 24 | +#include <clc/internal/clc.h> |
| 25 | +#include <clc/math/clc_fma.h> |
| 26 | +#include <clc/math/clc_ldexp.h> |
35 | 27 |
|
36 | 28 | #ifdef cl_khr_fp64
|
37 | 29 |
|
38 | 30 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
39 | 31 |
|
40 | 32 | #ifdef __AMDGCN__
|
41 |
| - #define __clc_builtin_rsq __builtin_amdgcn_rsq |
| 33 | +#define __clc_builtin_rsq __builtin_amdgcn_rsq |
42 | 34 | #else
|
43 |
| - #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee |
| 35 | +#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee |
44 | 36 | #endif
|
45 | 37 |
|
46 |
| -_CLC_OVERLOAD _CLC_DEF double sqrt(double x) { |
47 |
| - |
| 38 | +_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) { |
48 | 39 | uint vcc = x < 0x1p-767;
|
49 | 40 | uint exp0 = vcc ? 0x100 : 0;
|
50 | 41 | unsigned exp1 = vcc ? 0xffffff80 : 0;
|
51 | 42 |
|
52 |
| - double v01 = ldexp(x, exp0); |
| 43 | + double v01 = __clc_ldexp(x, exp0); |
53 | 44 | double v23 = __clc_builtin_rsq(v01);
|
54 | 45 | double v45 = v01 * v23;
|
55 | 46 | v23 = v23 * 0.5;
|
56 | 47 |
|
57 |
| - double v67 = fma(-v23, v45, 0.5); |
58 |
| - v45 = fma(v45, v67, v45); |
59 |
| - double v89 = fma(-v45, v45, v01); |
60 |
| - v23 = fma(v23, v67, v23); |
61 |
| - v45 = fma(v89, v23, v45); |
62 |
| - v67 = fma(-v45, v45, v01); |
63 |
| - v23 = fma(v67, v23, v45); |
| 48 | + double v67 = __clc_fma(-v23, v45, 0.5); |
| 49 | + v45 = __clc_fma(v45, v67, v45); |
| 50 | + double v89 = __clc_fma(-v45, v45, v01); |
| 51 | + v23 = __clc_fma(v23, v67, v23); |
| 52 | + v45 = __clc_fma(v89, v23, v45); |
| 53 | + v67 = __clc_fma(-v45, v45, v01); |
| 54 | + v23 = __clc_fma(v67, v23, v45); |
64 | 55 |
|
65 |
| - v23 = ldexp(v23, exp1); |
66 |
| - return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23; |
| 56 | + v23 = __clc_ldexp(v23, exp1); |
| 57 | + return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23; |
67 | 58 | }
|
68 | 59 |
|
69 |
| -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double); |
| 60 | +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double); |
70 | 61 |
|
71 | 62 | #endif
|
0 commit comments