-
Notifications
You must be signed in to change notification settings - Fork 1.6k
<random>: Implement modified ziggurat algorithm for normal_distribution
#6104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
247f34d
8d3cfc3
68988ae
e5d43fa
85eb27e
d35ca8b
f8f2be2
7e51978
545401c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
|
|
||
| #if _STL_COMPILER_PREPROCESSOR | ||
| #include <__msvc_int128.hpp> | ||
| #include <__msvc_random_ziggurat_tables.hpp> | ||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <cstdint> | ||
|
|
@@ -2645,53 +2646,90 @@ public: | |
|
|
||
| private: | ||
| template <class _Engine> | ||
| result_type _Eval(_Engine& _Eng, const param_type& _Par0) { | ||
| // compute next value | ||
| // Knuth, vol. 2, p. 122, alg. P | ||
| _Ty _Res; | ||
| if (_Valid) { | ||
| _Res = _Xx2; | ||
| _Valid = false; | ||
| } else { // generate two values, store one, return one | ||
| _Ty _Vx1; | ||
| _Ty _Vx2; | ||
| _Ty _Sx; | ||
| for (;;) { // reject bad values to avoid generating NaN/Inf on the next calculations | ||
| _Vx1 = 2 * _Nrand_impl<_Ty>(_Eng) - 1; | ||
| _Vx2 = 2 * _Nrand_impl<_Ty>(_Eng) - 1; | ||
| _Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2; | ||
| if (_Sx < _Ty{1} && _Vx1 != _Ty{0} && _Vx2 != _Ty{0}) { | ||
| // good values! | ||
| break; | ||
| } | ||
| } | ||
| _NODISCARD result_type _Eval_std(_Engine& _Eng) { | ||
| // McFarland, C. D. (2015). A modified ziggurat algorithm for generating exponentially and normally distributed | ||
| // pseudorandom numbers. Journal of Statistical Computation and Simulation, 86(7), 1281-1294. | ||
| // https://doi.org/10.1080/00949655.2015.1060234 | ||
| using _Fty = conditional_t<numeric_limits<_Ty>::digits <= 24, float, double>; | ||
| constexpr const auto& _Tables = _Normal_distribution_tables<_Fty>::_Value; | ||
| using _Traits = remove_reference_t<decltype(_Tables)>; | ||
| _Rng_from_urng_v2<typename _Traits::_Uint_type, _Engine> _Generator(_Eng); | ||
|
|
||
| const auto _Xbits = static_cast<typename _Traits::_Xtype>(_Generator._Get_all_bits()); | ||
| const auto _Regular_layer = static_cast<uint8_t>(_Xbits); | ||
|
|
||
| if (_Regular_layer < _Tables._Layer_num - 1) { | ||
| // rectangular region of a middle layer | ||
| return static_cast<_Ty>(_Tables._Layer_widths[_Regular_layer + 1] * static_cast<_Fty>(_Xbits)); | ||
| } else { | ||
| const auto _Lbits = static_cast<typename _Traits::_Uint_type>(_Generator._Get_all_bits()); | ||
| const auto _Index = static_cast<uint8_t>(_Lbits); | ||
| const uint8_t _Irregular_layer = | ||
| _Lbits < _Tables._Alias_probabilities[_Index] ? _Index : _Tables._Alias_indices[_Index]; | ||
|
|
||
| if (_Irregular_layer < _Tables._Layer_num) { | ||
| // tail of a non-bottommost layer | ||
| const _Fty _Xx0 = _Tables._Layer_widths[_Irregular_layer] * _Tables._Width_scale; | ||
| const _Fty _Xx1 = _Tables._Layer_widths[_Irregular_layer + 1] * _Tables._Width_scale; | ||
| const _Fty _Yy0 = _Tables._Layer_heights[_Irregular_layer]; | ||
| const _Fty _Yy1 = _Tables._Layer_heights[_Irregular_layer + 1]; | ||
| const _Fty _Xdiff = _Xx1 - _Xx0; | ||
| const _Fty _Ydiff = _Yy1 - _Yy0; | ||
|
|
||
| for (;;) { | ||
| const _Fty _Dx = _Xdiff * _STD _Nrand_impl<_Fty>(_Eng); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The upper bits of |
||
| const _Fty _Dy = _Ydiff * _STD _Nrand_impl<_Fty>(_Eng); | ||
|
|
||
| // _Dexp = -(_Xval^2 - _Xx0^2) / 2 | ||
| // f(_Xval) = exp(-_Xval^2 / 2) = _Yy0 * exp(_Dexp) | ||
| // _Yy0 >= d(f(_Xval))/d(_Dexp) = f(_Xval) >= _Yy1 | ||
| // _Dexp * _Yy0 <= f(_Xval) - _Yy0 <= _Dexp * _Yy1 | ||
| const _Fty _Dexp = -_Dx * (_Xx0 + _Dx * _Fty{0.5}); | ||
| if (_Dy > _Dexp * _Yy1) { | ||
| // _Yval > _Yy0 + _Dexp * _Yy1 >= f(_Xval), reject | ||
| continue; | ||
| } | ||
|
|
||
| const _Fty _Xval = _Xx0 + _Dx; | ||
| if (_Dy < _Dexp * _Yy0) { | ||
| // _Yval < _Yy0 + _Dexp * _Yy0 <= f(_Xval), accept | ||
| return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval); | ||
| } | ||
|
|
||
| _Ty _LogSx; | ||
| if (_Sx > _Ty{1e-4}) { | ||
| _LogSx = _STD log(_Sx); | ||
| const _Fty _Yval = _Yy0 + _Dy; | ||
| if (_Yval < _STD exp(_Xval * _Xval * _Fty{-0.5})) { | ||
| return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval); | ||
| } | ||
| } | ||
| } else if (_Irregular_layer == _Tables._Layer_num) { | ||
| // tail of the bottommost layer (infinite width) | ||
| constexpr _Fty _Tail = _Tables._Layer_widths[_Tables._Layer_num] * _Tables._Width_scale; | ||
| constexpr _Fty _Tail_reciprocal = _Fty{1.0} / _Tail; | ||
| for (;;) { | ||
| const _Fty _Xval = -_STD log(_Fty{1.0} - _STD _Nrand_impl<_Fty>(_Eng)) * _Tail_reciprocal; | ||
| const _Fty _Yval = _STD _Nrand_impl<_Fty>(_Eng); | ||
| if (_Yval < _STD exp(_Xval * _Xval * _Fty{-0.5})) { | ||
| const _Fty _Xorig = _Tail + _Xval; | ||
| return static_cast<_Ty>(_Xbits < 0 ? -_Xorig : _Xorig); | ||
| } | ||
|
Comment on lines
+2709
to
+2714
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Marsaglia's tail method might be faster if we don't have a fast exponential generator, as it only needs |
||
| } | ||
| } else { | ||
| // Bad _Sx value! Very small values will overflow log(_Sx) / _Sx. | ||
| // Generate a new value based on scaling method. | ||
| const _Ty _Ln2{_Ty{0.69314718055994530941723212145818}}; | ||
| const _Ty _Maxabs{(_STD max) (_STD abs(_Vx1), _STD abs(_Vx2))}; | ||
| const int _ExpMax{_STD ilogb(_Maxabs)}; | ||
| _Vx1 = _STD scalbn(_Vx1, -_ExpMax); | ||
| _Vx2 = _STD scalbn(_Vx2, -_ExpMax); | ||
| _Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2; | ||
| _LogSx = _STD log(_Sx) + static_cast<_Ty>(_ExpMax) * (_Ln2 * 2); | ||
| // rectangular region of the bottommost layer | ||
| return static_cast<_Ty>(_Tables._Layer_widths[_Tables._Layer_num] * static_cast<_Fty>(_Xbits)); | ||
| } | ||
|
|
||
| const _Ty _Fx{_STD sqrt(_Ty{-2} * _LogSx / _Sx)}; | ||
| _Xx2 = _Fx * _Vx2; // save second value for next call | ||
| _Valid = true; | ||
| _Res = _Fx * _Vx1; | ||
| } | ||
| return _Res * _Par0._Sigma + _Par0._Mean; | ||
| } | ||
|
|
||
| template <class _Engine> | ||
| result_type _Eval(_Engine& _Eng, const param_type& _Par0) { | ||
| return _Eval_std(_Eng) * _Par0._Sigma + _Par0._Mean; | ||
| } | ||
|
|
||
| param_type _Par; | ||
| // TRANSITION, ABI: _Valid must be initialized to false, and must be reset to false when reset() is called, but is | ||
| // unused by the current implementation | ||
| bool _Valid; | ||
| _Ty _Xx2; | ||
| _Ty _Xx2; // TRANSITION, ABI: unused by the current implementation | ||
| }; | ||
|
|
||
| _EXPORT_STD template <class _Ty = double> | ||
|
|
||
This file was deleted.
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In common cases (e.g., CVTSI2SD with ties-to-even) this conversion to floating point is slightly biased toward values where the significand ends in 0. It's also correlated with
_Layerbecause a tie only happens when the lower eight or eleven bits of_Xbitsare 1 followed by all 0s. Occurs below.I'm unsure how much effect this has, but if it doesn't affect the performance much, maybe it would be cleaner to mask or shift out the lower bits as they're used.
[Edit: I shouldn't say that round-to-even bias happens only with those value of the lower bits. I think it's sufficient but not necessary.]