Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions stl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ set(HEADERS
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_minmax.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ostream.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_print.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_random_ziggurat_tables.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ranges_to.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ranges_tuple_formatter.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_sanitizer_annotate_container.hpp
Expand Down
366 changes: 366 additions & 0 deletions stl/inc/__msvc_random_ziggurat_tables.hpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions stl/inc/header-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"__msvc_minmax.hpp",
"__msvc_ostream.hpp",
"__msvc_print.hpp",
"__msvc_random_ziggurat_tables.hpp",
"__msvc_ranges_to.hpp",
"__msvc_ranges_tuple_formatter.hpp",
"__msvc_sanitizer_annotate_container.hpp",
Expand Down
116 changes: 77 additions & 39 deletions stl/inc/random
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#if _STL_COMPILER_PREPROCESSOR
#include <__msvc_int128.hpp>
#include <__msvc_random_ziggurat_tables.hpp>
#include <algorithm>
#include <cmath>
#include <cstdint>
Expand Down Expand Up @@ -2645,53 +2646,90 @@ public:

private:
template <class _Engine>
result_type _Eval(_Engine& _Eng, const param_type& _Par0) {
// compute next value
// Knuth, vol. 2, p. 122, alg. P
_Ty _Res;
if (_Valid) {
_Res = _Xx2;
_Valid = false;
} else { // generate two values, store one, return one
_Ty _Vx1;
_Ty _Vx2;
_Ty _Sx;
for (;;) { // reject bad values to avoid generating NaN/Inf on the next calculations
_Vx1 = 2 * _Nrand_impl<_Ty>(_Eng) - 1;
_Vx2 = 2 * _Nrand_impl<_Ty>(_Eng) - 1;
_Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2;
if (_Sx < _Ty{1} && _Vx1 != _Ty{0} && _Vx2 != _Ty{0}) {
// good values!
break;
}
}
_NODISCARD result_type _Eval_std(_Engine& _Eng) {
// McFarland, C. D. (2015). A modified ziggurat algorithm for generating exponentially and normally distributed
// pseudorandom numbers. Journal of Statistical Computation and Simulation, 86(7), 1281-1294.
// https://doi.org/10.1080/00949655.2015.1060234
using _Fty = conditional_t<numeric_limits<_Ty>::digits <= 24, float, double>;
constexpr const auto& _Tables = _Normal_distribution_tables<_Fty>::_Value;
using _Traits = remove_reference_t<decltype(_Tables)>;
_Rng_from_urng_v2<typename _Traits::_Uint_type, _Engine> _Generator(_Eng);

const auto _Xbits = static_cast<typename _Traits::_Xtype>(_Generator._Get_all_bits());
const auto _Regular_layer = static_cast<uint8_t>(_Xbits);

if (_Regular_layer < _Tables._Layer_num - 1) {
// rectangular region of a middle layer
return static_cast<_Ty>(_Tables._Layer_widths[_Regular_layer + 1] * static_cast<_Fty>(_Xbits));
Copy link
Contributor

@MattStephanson MattStephanson Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In common cases (e.g., CVTSI2SD with ties-to-even) this conversion to floating point is slightly biased toward values where the significand ends in 0. It's also correlated with _Layer because a tie only happens when the lower eight or eleven bits of _Xbits are 1 followed by all 0s. Occurs below.

I'm unsure how much effect this has, but if it doesn't affect the performance much, maybe it would be cleaner to mask or shift out the lower bits as they're used.

[Edit: I shouldn't say that round-to-even bias happens only with those value of the lower bits. I think it's sufficient but not necessary.]

} else {
const auto _Lbits = static_cast<typename _Traits::_Uint_type>(_Generator._Get_all_bits());
const auto _Index = static_cast<uint8_t>(_Lbits);
const uint8_t _Irregular_layer =
_Lbits < _Tables._Alias_probabilities[_Index] ? _Index : _Tables._Alias_indices[_Index];

if (_Irregular_layer < _Tables._Layer_num) {
// tail of a non-bottommost layer
const _Fty _Xx0 = _Tables._Layer_widths[_Irregular_layer] * _Tables._Width_scale;
const _Fty _Xx1 = _Tables._Layer_widths[_Irregular_layer + 1] * _Tables._Width_scale;
const _Fty _Yy0 = _Tables._Layer_heights[_Irregular_layer];
const _Fty _Yy1 = _Tables._Layer_heights[_Irregular_layer + 1];
const _Fty _Xdiff = _Xx1 - _Xx0;
const _Fty _Ydiff = _Yy1 - _Yy0;

for (;;) {
const _Fty _Dx = _Xdiff * _STD _Nrand_impl<_Fty>(_Eng);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The upper bits of _Xbits could be used on the first iteration. Also for the tail, below.

const _Fty _Dy = _Ydiff * _STD _Nrand_impl<_Fty>(_Eng);

// _Dexp = -(_Xval^2 - _Xx0^2) / 2
// f(_Xval) = exp(-_Xval^2 / 2) = _Yy0 * exp(_Dexp)
// _Yy0 >= d(f(_Xval))/d(_Dexp) = f(_Xval) >= _Yy1
// _Dexp * _Yy0 <= f(_Xval) - _Yy0 <= _Dexp * _Yy1
const _Fty _Dexp = -_Dx * (_Xx0 + _Dx * _Fty{0.5});
if (_Dy > _Dexp * _Yy1) {
// _Yval > _Yy0 + _Dexp * _Yy1 >= f(_Xval), reject
continue;
}

const _Fty _Xval = _Xx0 + _Dx;
if (_Dy < _Dexp * _Yy0) {
// _Yval < _Yy0 + _Dexp * _Yy0 <= f(_Xval), accept
return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval);
}

_Ty _LogSx;
if (_Sx > _Ty{1e-4}) {
_LogSx = _STD log(_Sx);
const _Fty _Yval = _Yy0 + _Dy;
if (_Yval < _STD exp(_Xval * _Xval * _Fty{-0.5})) {
return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval);
}
}
} else if (_Irregular_layer == _Tables._Layer_num) {
// tail of the bottommost layer (infinite width)
constexpr _Fty _Tail = _Tables._Layer_widths[_Tables._Layer_num] * _Tables._Width_scale;
constexpr _Fty _Tail_reciprocal = _Fty{1.0} / _Tail;
for (;;) {
const _Fty _Xval = -_STD log(_Fty{1.0} - _STD _Nrand_impl<_Fty>(_Eng)) * _Tail_reciprocal;
const _Fty _Yval = _STD _Nrand_impl<_Fty>(_Eng);
if (_Yval < _STD exp(_Xval * _Xval * _Fty{-0.5})) {
const _Fty _Xorig = _Tail + _Xval;
return static_cast<_Ty>(_Xbits < 0 ? -_Xorig : _Xorig);
}
Comment on lines +2709 to +2714
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marsaglia's tail method might be faster if we don't have a fast exponential generator, as it only needs sqrt when returning, instead of an additional log or exp. It can be found in Luc Devroye's book, section IX.1.2, or this DTIC report.

}
} else {
// Bad _Sx value! Very small values will overflow log(_Sx) / _Sx.
// Generate a new value based on scaling method.
const _Ty _Ln2{_Ty{0.69314718055994530941723212145818}};
const _Ty _Maxabs{(_STD max) (_STD abs(_Vx1), _STD abs(_Vx2))};
const int _ExpMax{_STD ilogb(_Maxabs)};
_Vx1 = _STD scalbn(_Vx1, -_ExpMax);
_Vx2 = _STD scalbn(_Vx2, -_ExpMax);
_Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2;
_LogSx = _STD log(_Sx) + static_cast<_Ty>(_ExpMax) * (_Ln2 * 2);
// rectangular region of the bottommost layer
return static_cast<_Ty>(_Tables._Layer_widths[_Tables._Layer_num] * static_cast<_Fty>(_Xbits));
}

const _Ty _Fx{_STD sqrt(_Ty{-2} * _LogSx / _Sx)};
_Xx2 = _Fx * _Vx2; // save second value for next call
_Valid = true;
_Res = _Fx * _Vx1;
}
return _Res * _Par0._Sigma + _Par0._Mean;
}

template <class _Engine>
result_type _Eval(_Engine& _Eng, const param_type& _Par0) {
return _Eval_std(_Eng) * _Par0._Sigma + _Par0._Mean;
}

param_type _Par;
// TRANSITION, ABI: _Valid must be initialized to false, and must be reset to false when reset() is called, but is
// unused by the current implementation
bool _Valid;
_Ty _Xx2;
_Ty _Xx2; // TRANSITION, ABI: unused by the current implementation
};

_EXPORT_STD template <class _Ty = double>
Expand Down
1 change: 0 additions & 1 deletion tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ tests\GH_004477_mdspan_warning_5246
tests\GH_004597_self_swap
tests\GH_004609_heterogeneous_cmp_overloads
tests\GH_004618_mixed_operator_usage_keeps_statistical_properties
tests\GH_004618_normal_distribution_avoids_resets
tests\GH_004657_expected_constraints_permissive
tests\GH_004686_vectorization_on_trivial_assignability
tests\GH_004845_logical_operator_traits_with_non_bool_constant
Expand Down

This file was deleted.

This file was deleted.

Loading