Closed
Description
The following program fails on Mono and returns +∞
instead of NaN
as expected. This is likely an error somewhere in the branch handling edge case inputs at the end of the Invoke
function. It was caught as part of #97114
// This code is based on `vrd2_exp` from amd/aocl-libm-ose
// Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ----------------------
// 1. Argument Reduction:
// e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64) --- (1)
//
// Choose 'n' and 'f', such that
// x * 64/ln2 = n + f --- (2) | n is integer
// | |f| <= 0.5
// Choose 'm' and 'j' such that,
// n = (64 * m) + j --- (3)
//
// From (1), (2) and (3),
// e^x = 2^((64*m + j + f)/64)
// = (2^m) * (2^(j/64)) * 2^(f/64)
// = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
//
// 2. Table Lookup
// Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63
//
// 3. Polynomial Evaluation
// From (2),
// f = x*(64/ln(2)) - n
// Let,
// r = f*(ln(2)/64) = x - n*(ln(2)/64)
//
// 4. Reconstruction
// Thus,
// e^x = (2^m) * (2^(j/64)) * e^r
using System.Runtime.Intrinsics;
var res = Invoke(Vector128.Create(double.NaN));
Console.WriteLine(res);
const ulong V_ARG_MAX = 0x40862000_00000000;
const ulong V_DP64_BIAS = 1023;
const double V_EXPF_MIN = -709.782712893384;
const double V_EXPF_MAX = +709.782712893384;
const double V_EXPF_HUGE = 6755399441055744;
const double V_TBL_LN2 = 1.4426950408889634;
const double V_LN2_HEAD = +0.693359375;
const double V_LN2_TAIL = -0.00021219444005469057;
const double C3 = 0.5000000000000018;
const double C4 = 0.1666666666666617;
const double C5 = 0.04166666666649277;
const double C6 = 0.008333333333559272;
const double C7 = 0.001388888895122404;
const double C8 = 0.00019841269432677495;
const double C9 = 2.4801486521374483E-05;
const double C10 = 2.7557622532543023E-06;
const double C11 = 2.7632293298250954E-07;
const double C12 = 2.499430431958571E-08;
static Vector128<double> Invoke(Vector128<double> x)
{
// x * (64.0 / ln(2))
Vector128<double> z = x * Vector128.Create(V_TBL_LN2);
Vector128<double> dn = z + Vector128.Create(V_EXPF_HUGE);
// n = (int)z
Vector128<ulong> n = dn.AsUInt64();
// dn = (double)n
dn -= Vector128.Create(V_EXPF_HUGE);
// r = x - (dn * (ln(2) / 64))
// where ln(2) / 64 is split into Head and Tail values
Vector128<double> r = x - (dn * Vector128.Create(V_LN2_HEAD)) - (dn * Vector128.Create(V_LN2_TAIL));
Vector128<double> r2 = r * r;
Vector128<double> r4 = r2 * r2;
Vector128<double> r8 = r4 * r4;
// Compute polynomial
Vector128<double> poly = ((Vector128.Create(C12) * r + Vector128.Create(C11)) * r2 +
Vector128.Create(C10) * r + Vector128.Create(C9)) * r8 +
((Vector128.Create(C8) * r + Vector128.Create(C7)) * r2 +
(Vector128.Create(C6) * r + Vector128.Create(C5))) * r4 +
((Vector128.Create(C4) * r + Vector128.Create(C3)) * r2 + (r + Vector128<double>.One));
// m = (n - j) / 64
// result = polynomial * 2^m
Vector128<double> ret = poly * ((n + Vector128.Create(V_DP64_BIAS)) << 52).AsDouble();
// Check if -709 < vx < 709
if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt64(), Vector128.Create(V_ARG_MAX)))
{
// (x > V_EXPF_MAX) ? double.PositiveInfinity : x
Vector128<double> infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX));
ret = Vector128.ConditionalSelect(
infinityMask,
Vector128.Create(double.PositiveInfinity),
ret
);
// (x < V_EXPF_MIN) ? 0 : x
ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN)));
}
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment