Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 1 addition & 27 deletions amd/device-libs/ocml/src/privF.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,4 @@

#define MATH_FAST_SQRT(X) BUILTIN_AMDGPU_SQRT_F32(X)

#define MATH_SQRT(X) ({ \
float _sqrt_x = X; \
bool _sqrt_b = _sqrt_x < 0x1.0p-96f; \
_sqrt_x *= _sqrt_b ? 0x1.0p+32f : 1.0f; \
float _sqrt_s; \
if (!DAZ_OPT()) { \
_sqrt_s = BUILTIN_AMDGPU_SQRT_F32(_sqrt_x); \
float _sqrt_sp = AS_FLOAT(AS_INT(_sqrt_s) - 1); \
float _sqrt_ss = AS_FLOAT(AS_INT(_sqrt_s) + 1); \
float _sqrt_vp = BUILTIN_FMA_F32(-_sqrt_sp, _sqrt_s, _sqrt_x); \
float _sqrt_vs = BUILTIN_FMA_F32(-_sqrt_ss, _sqrt_s, _sqrt_x); \
_sqrt_s = _sqrt_vp <= 0.0f ? _sqrt_sp : _sqrt_s; \
_sqrt_s = _sqrt_vs > 0.0f ? _sqrt_ss : _sqrt_s; \
} else { \
float _sqrt_r = BUILTIN_AMDGPU_RSQRT_F32(_sqrt_x); \
_sqrt_s = _sqrt_x * _sqrt_r; \
float _sqrt_h = 0.5f * _sqrt_r; \
float _sqrt_e = BUILTIN_FMA_F32(-_sqrt_h, _sqrt_s, 0.5f); \
_sqrt_h = BUILTIN_FMA_F32(_sqrt_h, _sqrt_e, _sqrt_h); \
_sqrt_s = BUILTIN_FMA_F32(_sqrt_s, _sqrt_e, _sqrt_s); \
float _sqrt_d = BUILTIN_FMA_F32(-_sqrt_s, _sqrt_s, _sqrt_x); \
_sqrt_s = BUILTIN_FMA_F32(_sqrt_d, _sqrt_h, _sqrt_s); \
} \
_sqrt_s *= _sqrt_b ? 0x1.0p-16f : 1.0f; \
_sqrt_s = BUILTIN_CLASS_F32(_sqrt_x, CLASS_PZER|CLASS_NZER|CLASS_PINF) ? _sqrt_x : _sqrt_s; \
_sqrt_s; \
})
#define MATH_SQRT(X) __ocml_sqrt_f32(X)