Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions dag_vecMath_trig_constexpr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#pragma once

constexpr long double const_pi = 3.1415926535897932384626433832795028841972L;
constexpr long double const_half_pi = 1.5707963267948966192313216916397514420986L;

template <typename T> constexpr bool v_isnan(const T x) noexcept { return x != x; }

constexpr float v_infinity() noexcept { return __builtin_huge_val(); }

template <typename T> constexpr bool v_isneginf(const T x) noexcept { return (x == -__builtin_huge_val()); }

template <typename T> constexpr bool v_isposinf(const T x) noexcept { return (x == __builtin_huge_val()); }

template <typename T> constexpr bool v_isinf(const T x) noexcept { return (v_isneginf(x) || v_isposinf(x)); }

template <typename T> constexpr bool v_isfinite(const T x) noexcept { return (!v_isnan(x)) && (!v_isinf(x)); }

template <typename T> constexpr T v_abs_c(const T x) noexcept { return (x == T(0) ? T(0) : (x < T(0)) ? (-x) : x); }
constexpr bool v_isodd_c(const int64_t x) noexcept { return (x & 1U) != 0; }

namespace detail_pow_integral {
// forward declaration
template <typename T1, typename T2> constexpr T1 pow_integral_compute(const T1 base, const T2 exp_term) noexcept;

// integral-valued powers using method described in
// https://en.wikipedia.org/wiki/Exponentiation_by_squaring

template <typename T1, typename T2> constexpr T1 pow_integral_compute_recur(const T1 base, const T1 val, const T2 exp_term) noexcept {
if (exp_term > T2(1)) {
if (v_isodd_c(exp_term)) {
return pow_integral_compute_recur(base * base, val * base, exp_term / 2);
}

return pow_integral_compute_recur(base * base, val, exp_term / 2);
} else if (exp_term == T2(1)) {
return (val * base);
}

return val;
}

template <typename T1, typename T2>
constexpr T1 pow_integral_sgn_check(const T1 base, const T2 exp_term) noexcept {
return (pow_integral_compute_recur(base, T1(1), exp_term));
}

template <typename T1, typename T2> constexpr T1 pow_integral_compute(const T1 base, const T2 exp_term) noexcept {
if (exp_term == T2(3)) {
return (base * base * base);
} else if (exp_term == T2(2)) {
return (base * base);
} else if (exp_term == T2(1)) {
return base;
} else if (exp_term == T2(0)) {
return T1(1);
} else if (exp_term == std::numeric_limits<T2>::min()) {
return T1(0);
} else if (exp_term == std::numeric_limits<T2>::max()) {
return v_infinity();
} else {
return pow_integral_sgn_check(base, exp_term);
}
}

template <typename T1, typename T2>
constexpr T1 _pow_integral(const T1 base, const T2 exp_term) noexcept {
return pow_integral_compute(base, static_cast<int64_t>(exp_term));
}

} // namespace detail_pow_integral

namespace detail_floor {

template <typename T> constexpr int floor_resid(const T x, const T x_whole) noexcept { return ((x < T(0)) && (x < x_whole)); }

template <typename T> constexpr T floor_int(const T x, const T x_whole) noexcept { return (x_whole - static_cast<T>(floor_resid(x, x_whole))); }

template <typename T> constexpr T _floor(const T x) noexcept {
return (v_isnan(x) ? std::numeric_limits<T>::quiet_NaN() : !v_isfinite(x) ? x : std::numeric_limits<T>::min() > v_abs_c(x) ? x : floor_int(x, T(static_cast<int64_t>(x))));
}

} // namespace detail_floor

namespace detail_tan {
// this is based on a fourth-order expansion of tan(z) using Bernoulli numbers
template <typename T> constexpr T tan_series_exp_long(const T x) noexcept {
return (-1 / x // iter 1
+ (x / 3 // iter 2
+ (detail_pow_integral::_pow_integral(x, 3) / 45 // iter 3
+ (2 * detail_pow_integral::_pow_integral(x, 5) / 945 // iter 4
+ detail_pow_integral::_pow_integral(x, 7) / 4725) // iter 5
)));
}

template <typename T> constexpr T tan_series_exp(const T x) noexcept {
// the value tan(pi/2) is somewhat of a convention;
// technically the function is not defined at EXACTLY pi/2,
// but this is floating point pi/2
// otherwise we use an expansion around pi/2
return (std::numeric_limits<T>::min() > v_abs_c(x - T(const_half_pi)) ? T(1.633124e+16) : tan_series_exp_long(x - T(const_half_pi)));
}

template <typename T> constexpr T tan_cf_recur(const T xx, const int depth, const int max_depth) noexcept {
return (depth < max_depth ? T(2 * depth - 1) - xx / tan_cf_recur(xx, depth + 1, max_depth) : T(2 * depth - 1));
}

template <typename T> constexpr T tan_cf_main(const T x) noexcept {
return (((x > T(1.55) && x < T(1.60)))
? tan_series_exp(x)
: x > T(1.4) ? x / tan_cf_recur(x * x, 1, 45) : x > T(1) ? x / tan_cf_recur(x * x, 1, 35) : x / tan_cf_recur(x * x, 1, 25));
}

template <typename T> constexpr T tan_begin(const T x, const int count = 0) noexcept {
return (x > T(const_pi) ? count > 1 ? std::numeric_limits<T>::quiet_NaN() : (tan_begin(x - T(const_pi) * detail_floor::_floor(x / T(const_pi)), count + 1))
: tan_cf_main(x));
}

template <typename T> constexpr T _tan(const T x) noexcept {
return (v_isnan(x) ? std::numeric_limits<T>::quiet_NaN() : (std::numeric_limits<T>::min() > v_abs_c(x)) ? T(0) : x < T(0) ? (-tan_begin(-x)) : tan_begin(x));
}
} // namespace detail_tan

namespace detail_cos {

template <typename T> constexpr T cos_impl(const T x) noexcept { return (T(1) - x * x) / (T(1) + x * x); }

template <typename T> constexpr T _cos(const T x) noexcept {
return (v_isnan(x) ? std::numeric_limits<T>::quiet_NaN()
: (std::numeric_limits<T>::min() > v_abs_c(x))
? T(1) // indistinguishable from 0
: (std::numeric_limits<T>::min() > v_abs_c(x - T(const_half_pi)))
? T(0) // special cases: pi/2 and pi
: (std::numeric_limits<T>::min() > v_abs_c(x + T(const_half_pi)))
? T(0)
: (std::numeric_limits<T>::min() > v_abs_c(x - T(const_pi)))
? -T(1)
: (std::numeric_limits<T>::min() > v_abs_c(x + T(const_pi))) ? -T(1) : cos_impl(detail_tan::_tan(x / T(2))));
}
} // namespace detail_cos

namespace detail_sin {
template <typename T> constexpr T sin_compute(const T x) noexcept { return T(2) * x / (T(1) + x * x); }

template <typename T> constexpr T _sin(const T x) noexcept {
return (v_isnan(x) ? std::numeric_limits<T>::quiet_NaN()
: std::numeric_limits<T>::min() > v_abs_c(x)
? T(0)
: std::numeric_limits<T>::min() > v_abs_c(x - T(const_half_pi)) // special cases: pi/2 and pi
? T(1)
: std::numeric_limits<T>::min() > v_abs_c(x + T(const_half_pi))
? -T(1)
: std::numeric_limits<T>::min() > v_abs_c(x - T(const_pi))
? T(0)
: std::numeric_limits<T>::min() > v_abs_c(x + T(const_pi)) ? -T(0) : sin_compute(detail_tan::_tan(x / T(2))));
}

} // namespace detail_sin

template <typename T> constexpr T v_tan_c(const T x) noexcept { return detail_tan::_tan(static_cast<T>(x)); }
constexpr vec4f v_tan_m(const vec4f a) noexcept { return vec4f{detail_tan::_tan(a.m128_f32[0]),
detail_tan::_tan(a.m128_f32[1]),
detail_tan::_tan(a.m128_f32[2]),
detail_tan::_tan(a.m128_f32[3])}; }

template <typename T> constexpr T v_cos_c(const T x) noexcept { return detail_cos::_cos(static_cast<T>(x)); }
constexpr vec4f v_cos_m(const vec4f a) noexcept { return vec4f{detail_cos::_cos(a.m128_f32[0]),
detail_cos::_cos(a.m128_f32[1]),
detail_cos::_cos(a.m128_f32[2]),
detail_cos::_cos(a.m128_f32[3])}; }

template <typename T> constexpr T v_sin_c(const T x) noexcept { return detail_sin::_sin(static_cast<T>(x)); }
constexpr vec4f v_sin_m(const vec4f a) noexcept { return vec4f{detail_sin::_sin(a.m128_f32[0]),
detail_sin::_sin(a.m128_f32[1]),
detail_sin::_sin(a.m128_f32[2]),
detail_sin::_sin(a.m128_f32[3])}; }
89 changes: 89 additions & 0 deletions dag_vecMath_trig_constexpr_unittest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include <assert.h>

#include "dag_vecMath.h"
#include "dag_vecMath_pc_sse.h"
#include "dag_vecMath_trig_constexpr.h"
#include "dag_vecMath_trig.h"

#pragma warning( push )
#pragma warning( disable : 4995 ) // Allow deprecated functions, cos/sin/log etc...
#pragma warning( disable : 4220 ) // Disable warning treated as an error, for this test case only.

#include <cmath> // Using to check our compile-time math with std implementation.
void test_constexpr_trig() {
#define COMPILETIME_TEST_COMPARE_VALS(a, b, ...) \
{ \
constexpr auto avalue = a(__VA_ARGS__); \
const auto bvalue = b(__VA_ARGS__); \
assert(v_abs_c(avalue - bvalue) < 0.0000001); \
}

#define COMPILETIME_TEST_COMPARE_MVALS(a, b, ...) \
{ \
constexpr auto avalue = a(__VA_ARGS__); \
const auto bvalue = b(__VA_ARGS__); \
assert(v_abs_c<float>(avalue.m128_f32[0] - bvalue.m128_f32[0]) < 0.00001); \
assert(v_abs_c<float>(avalue.m128_f32[1] - bvalue.m128_f32[1]) < 0.00001); \
assert(v_abs_c<float>(avalue.m128_f32[2] - bvalue.m128_f32[2]) < 0.00001); \
assert(v_abs_c<float>(avalue.m128_f32[3] - bvalue.m128_f32[3]) < 0.00001); \
}

// abs
COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, 0.0);
COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, -0.0);
COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, 1.0);
COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, -1.0);

// cos
constexpr vec4f point0{ 0.f, 1.f, 2.f, 3.f };
constexpr vec4f point1{ -1.5f, -1.0f, 1.0f, 1.5f };
constexpr vec4f point2{ 0.0f, 0.0001f, 1.0001f, 1.5f };
constexpr vec4f point3{ 1.5f, -1.5f, 0.0001f, 0.0f };

COMPILETIME_TEST_COMPARE_VALS(v_cos_c, std::cos, -1.5);
COMPILETIME_TEST_COMPARE_VALS(v_cos_c, std::cos, 0.0);

// cos vec4f
auto cos_m = [](vec4f p) { return vec4f{ std::cos(p.m128_f32[0]), std::cos(p.m128_f32[1]), std::cos(p.m128_f32[2]), std::cos(p.m128_f32[3]) }; };
COMPILETIME_TEST_COMPARE_MVALS(v_cos_m, cos_m, point1);
COMPILETIME_TEST_COMPARE_MVALS(v_cos_m, cos_m, point0);

// tan

COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 0.0);
COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 0.001);
COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 1.001);
COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 1.5);
COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, -1.5);

// tan vec4f
auto tan_m = [](vec4f p) { return vec4f{ std::tan(p.m128_f32[0]), std::tan(p.m128_f32[1]), std::tan(p.m128_f32[2]), std::tan(p.m128_f32[3]) }; };
COMPILETIME_TEST_COMPARE_MVALS(v_tan_m, tan_m, point2);
COMPILETIME_TEST_COMPARE_MVALS(v_tan_m, tan_m, point3);


// sin
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, -1.5f);
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 0.0f);
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 0.001f);
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 1.001f);
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 1.5f);
COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 11.1f);

// sin vec4f
auto sin_m = [](vec4f p) { return vec4f{ std::sin(p.m128_f32[0]), std::sin(p.m128_f32[1]), std::sin(p.m128_f32[2]), std::sin(p.m128_f32[3]) }; };
COMPILETIME_TEST_COMPARE_MVALS(v_sin_m, sin_m, point0);
COMPILETIME_TEST_COMPARE_MVALS(v_sin_m, sin_m, point1);

// last check
// rad: 0.f 1.f 2.f 3.f
// val: 0.00000000 0.841471016 0.909297407 0.141120046}
constexpr vec4f gold_sin = v_sin_m(vec4f{0.f, 1.f, 2.f, 3.f});
assert(gold_sin.m128_f32[0] < 0.000001
&& v_abs_c(gold_sin.m128_f32[1] - 0.841471016) < 0.000001
&& v_abs_c(gold_sin.m128_f32[2] - 0.909297407) < 0.000001
&& v_abs_c(gold_sin.m128_f32[3] - 0.141120046) < 0.000001);

printf("All tests passed: OK");
}
#pragma warning(pop)