-
-
Notifications
You must be signed in to change notification settings - Fork 2
algo: implement sine/cosine based on Intel SVML for single/double precision #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
2c6746f
18d9307
4ebebf9
c8af54e
77ccfab
182c52c
424ab26
21ac103
7167b9f
8a3778c
cbfde16
9039ab8
a672683
eb24f8b
afbf0c8
fe1d0f8
43fb58b
c800ad8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,179 @@ | ||||||
#ifndef NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_ | ||||||
#define NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_ | ||||||
|
||||||
#include "hwy/highway.h" | ||||||
|
||||||
#include <cfenv> | ||||||
#include <type_traits> | ||||||
|
||||||
namespace npsr { | ||||||
|
||||||
struct _NoLargeArgument {}; | ||||||
struct _NoSpecialCases {}; | ||||||
struct _NoExceptions {}; | ||||||
struct _LowAccuracy {}; | ||||||
constexpr auto kNoLargeArgument = _NoLargeArgument{}; | ||||||
constexpr auto kNoSpecialCases = _NoSpecialCases{}; | ||||||
constexpr auto kNoExceptions = _NoExceptions{}; | ||||||
constexpr auto kLowAccuracy = _LowAccuracy{}; | ||||||
|
||||||
struct Round { | ||||||
struct _Force {}; | ||||||
struct _Nearest {}; | ||||||
struct _Down {}; | ||||||
struct _Up {}; | ||||||
struct _Zero {}; | ||||||
static constexpr auto kForce = _Force{}; | ||||||
static constexpr auto kNearest = _Nearest{}; | ||||||
#if 0 // not used yet | ||||||
static constexpr auto kDown = _Down{}; | ||||||
static constexpr auto kUp = _Up{}; | ||||||
static constexpr auto kZero = _Zero{}; | ||||||
#endif | ||||||
}; | ||||||
|
||||||
struct Subnormal { | ||||||
struct _DAZ {}; | ||||||
struct _FTZ {}; | ||||||
struct _IEEE754 {}; | ||||||
#if 0 // not used yet | ||||||
static constexpr auto kDAZ = _DAZ{}; | ||||||
static constexpr auto kFTZ = _FTZ{}; | ||||||
#endif | ||||||
static constexpr auto kIEEE754 = _IEEE754{}; | ||||||
}; | ||||||
|
||||||
struct FPExceptions { | ||||||
static constexpr auto kNone = 0; | ||||||
static constexpr auto kInvalid = FE_INVALID; | ||||||
static constexpr auto kDivByZero = FE_DIVBYZERO; | ||||||
static constexpr auto kOverflow = FE_OVERFLOW; | ||||||
static constexpr auto kUnderflow = FE_UNDERFLOW; | ||||||
}; | ||||||
|
||||||
/** | ||||||
* @brief RAII floating-point precision control class | ||||||
* | ||||||
* The Precise class provides automatic management of floating-point environment | ||||||
* settings during its lifetime. It uses RAII principles to save the current | ||||||
* floating-point state on construction and restore it on destruction. | ||||||
* | ||||||
* The class is configured using variadic template arguments that specify | ||||||
* the desired floating-point behavior through tag types. | ||||||
* | ||||||
* **IMPORTANT PERFORMANCE NOTE**: Create the Precise object BEFORE loops, | ||||||
* not inside them. The constructor and destructor have overhead from saving | ||||||
* and restoring floating-point state, so it should be done once per | ||||||
* computational scope, not per iteration. | ||||||
* | ||||||
* @tparam Args Variadic template arguments for configuration flags | ||||||
* | ||||||
* @example | ||||||
* ```cpp | ||||||
* using namespace hwy::HWY_NAMESPACE; | ||||||
* using namespace npsr; | ||||||
* using namespace npsr::HWY_NAMESPACE; | ||||||
* | ||||||
* Precise precise = {kLowAccuracy, kNoSpecialCases, kNoLargeArgument}; | ||||||
* const ScalableTag<float> d; | ||||||
* typename V = Vec<DFromV<SclableTag>>; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a typo in the documentation comment: 'SclableTag' should be corrected to 'ScalableTag'.
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
* for (size_t i = 0; i < n; i += Lanes(d)) { | ||||||
* V input = LoadU(d, &input[i]); | ||||||
* V result = Sin(precise, input); | ||||||
* StoreU(result, d, &output[i]); | ||||||
* } | ||||||
* ``` | ||||||
*/ | ||||||
template <typename... Args> class Precise { | ||||||
public: | ||||||
Precise(Args...) { | ||||||
if constexpr (!kNoExceptions) { | ||||||
fegetexceptflag(&_exceptions, FE_ALL_EXCEPT); | ||||||
} | ||||||
if constexpr (kRoundForce) { | ||||||
_rounding_mode = fegetround(); | ||||||
int new_mode = _NewRoundingMode(); | ||||||
if (_rounding_mode != new_mode) { | ||||||
_retrieve_rounding_mode = true; | ||||||
fesetround(new_mode); | ||||||
} | ||||||
} | ||||||
} | ||||||
void FlushExceptions() { fesetexceptflag(&_exceptions, FE_ALL_EXCEPT); } | ||||||
|
||||||
void Raise(int errors) { | ||||||
static_assert(!kNoExceptions, | ||||||
"Cannot raise exceptions in NoExceptions mode"); | ||||||
_exceptions |= errors; | ||||||
} | ||||||
~Precise() { | ||||||
FlushExceptions(); | ||||||
if constexpr (kRoundForce) { | ||||||
if (_retrieve_rounding_mode) { | ||||||
fesetround(_rounding_mode); | ||||||
} | ||||||
} | ||||||
} | ||||||
static constexpr bool kNoExceptions = | ||||||
(std::is_same_v<_NoExceptions, Args> || ...); | ||||||
static constexpr bool kNoLargeArgument = | ||||||
(std::is_same_v<_NoLargeArgument, Args> || ...); | ||||||
static constexpr bool kNoSpecialCases = | ||||||
(std::is_same_v<_NoSpecialCases, Args> || ...); | ||||||
static constexpr bool kLowAccuracy = | ||||||
(std::is_same_v<_LowAccuracy, Args> || ...); | ||||||
// defaults to high accuracy if no low accuracy flag is set | ||||||
static constexpr bool kHighAccuracy = !kLowAccuracy; | ||||||
// defaults to large argument support if no no large argument flag is set | ||||||
static constexpr bool kLargeArgument = !kNoLargeArgument; | ||||||
// defaults to special cases support if no no special cases flag is set | ||||||
static constexpr bool kSpecialCases = !kNoSpecialCases; | ||||||
// defaults to exception support if no no exception flag is set | ||||||
static constexpr bool kException = !kNoExceptions; | ||||||
|
||||||
static constexpr bool kRoundForce = | ||||||
(std::is_same_v<Round::_Force, Args> || ...); | ||||||
static constexpr bool _kRoundNearest = | ||||||
(std::is_same_v<Round::_Nearest, Args> || ...); | ||||||
static constexpr bool kRoundZero = | ||||||
(std::is_same_v<Round::_Zero, Args> || ...); | ||||||
static constexpr bool kRoundDown = | ||||||
(std::is_same_v<Round::_Down, Args> || ...); | ||||||
static constexpr bool kRoundUp = (std::is_same_v<Round::_Up, Args> || ...); | ||||||
// only one rounding mode can be set | ||||||
static_assert((_kRoundNearest + kRoundDown + kRoundUp + kRoundZero) <= 1, | ||||||
"Only one rounding mode can be set at a time"); | ||||||
// if no rounding mode is set, default to round nearest | ||||||
static constexpr bool kRoundNearest = | ||||||
_kRoundNearest || (!kRoundDown && !kRoundUp && !kRoundZero); | ||||||
|
||||||
static constexpr bool kDAZ = (std::is_same_v<Subnormal::_DAZ, Args> || ...); | ||||||
static constexpr bool kFTZ = (std::is_same_v<Subnormal::_FTZ, Args> || ...); | ||||||
static constexpr bool _kIEEE754 = | ||||||
(std::is_same_v<Subnormal::_IEEE754, Args> || ...); | ||||||
static_assert(!_kIEEE754 || !(kDAZ || kFTZ), | ||||||
"IEEE754 mode cannot be used " | ||||||
"with Denormals Are Zero (DAZ) or Flush To Zero (FTZ) " | ||||||
"subnormal handling"); | ||||||
static constexpr bool kIEEE754 = _kIEEE754 || !(kDAZ || kFTZ); | ||||||
|
||||||
private: | ||||||
int _NewRoundingMode() const { | ||||||
if constexpr (kRoundDown) { | ||||||
return FE_DOWNWARD; | ||||||
} else if constexpr (kRoundUp) { | ||||||
return FE_UPWARD; | ||||||
} else if constexpr (kRoundZero) { | ||||||
return FE_TOWARDZERO; | ||||||
} else { | ||||||
return FE_TONEAREST; | ||||||
} | ||||||
} | ||||||
int _rounding_mode = 0; | ||||||
bool _retrieve_rounding_mode = false; | ||||||
fexcept_t _exceptions; | ||||||
}; | ||||||
|
||||||
} // namespace npsr | ||||||
|
||||||
#endif // NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// To include them once per target, which is ensured by the toggle check. | ||
// clang-format off | ||
#if defined(_NPSR_NPSR_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT | ||
#ifdef _NPSR_NPSR_H_ | ||
#undef _NPSR_NPSR_H_ | ||
#else | ||
#define _NPSR_NPSR_H_ | ||
#endif | ||
|
||
#include "npsr/trig/inl.h" | ||
|
||
#endif // _NPSR_NPSR_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#include "npsr/common.h" | ||
|
||
#include "npsr/sincos/large-inl.h" | ||
#include "npsr/sincos/small-inl.h" | ||
|
||
// clang-format off | ||
#if defined(NPSR_TRIG_INL_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indeed |
||
#ifdef NPSR_TRIG_INL_H_ | ||
#undef NPSR_TRIG_INL_H_ | ||
#else | ||
#define NPSR_TRIG_INL_H_ | ||
#endif | ||
|
||
HWY_BEFORE_NAMESPACE(); | ||
|
||
namespace npsr::HWY_NAMESPACE::sincos { | ||
template <bool IS_COS, typename V, typename Prec> | ||
HWY_API V SinCos(Prec &prec, V x) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potentially worth creating a new macro for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest a name? HWY_API just combine HWY_ATTR and HWY_INLINE. |
||
using namespace hwy::HWY_NAMESPACE; | ||
constexpr bool kIsSingle = std::is_same_v<TFromV<V>, float>; | ||
const DFromV<V> d; | ||
V ret; | ||
if constexpr (Prec::kLowAccuracy) { | ||
ret = SmallArgLow<IS_COS>(x); | ||
} | ||
else { | ||
ret = SmallArg<IS_COS>(x); | ||
} | ||
if constexpr (Prec::kLargeArgument) { | ||
// Identify inputs requiring extended precision (very large arguments) | ||
auto has_large_arg = Gt(Abs(x), Set(d, kIsSingle ? 10000.0 : 16777216.0)); | ||
if (HWY_UNLIKELY(!AllFalse(d, has_large_arg))) { | ||
// Use extended precision algorithm for large arguments | ||
ret = IfThenElse(has_large_arg, LargeArg<IS_COS>(x), ret); | ||
} | ||
} | ||
if constexpr (Prec::kSpecialCases || Prec::kException) { | ||
auto is_finite = IsFinite(x); | ||
ret = IfThenElse(is_finite, ret, NaN(d)); | ||
if constexpr (Prec::kException) { | ||
prec.Raise(!AllFalse(d, IsInf(x)) ? FPExceptions::kInvalid : 0); | ||
} | ||
} | ||
return ret; | ||
} | ||
} // namespace npsr::HWY_NAMESPACE::sincos | ||
|
||
namespace npsr::HWY_NAMESPACE { | ||
template <typename V, typename Prec> | ||
HWY_API V Sin(Prec &prec, V x) { | ||
return sincos::SinCos<false>(prec, x); | ||
} | ||
template <typename V, typename Prec> | ||
HWY_API V Cos(Prec &prec, V x) { | ||
return sincos::SinCos<true>(prec, x); | ||
} | ||
} // namespace npsr::HWY_NAMESPACE | ||
|
||
HWY_AFTER_NAMESPACE(); | ||
|
||
#endif // NPSR_TRIG_INL_H_ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add these when we come across use cases for them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, this good point .. I'm gonna remove them