Skip to content

Commit 4080f17

Browse files
authored
[libc][math] Implement double precision sincos correctly rounded to all rounding modes. (#96719)
Sharing the same algorithm as double precision sin: #95736 and cos: #96591
1 parent 4ed8796 commit 4080f17

File tree

13 files changed

+459
-1
lines changed

13 files changed

+459
-1
lines changed

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ set(TARGET_LIBM_ENTRYPOINTS
225225
libc.src.math.scalbn
226226
libc.src.math.scalbnf
227227
libc.src.math.scalbnl
228+
libc.src.math.sincos
228229
libc.src.math.sincosf
229230
libc.src.math.sinhf
230231
libc.src.math.sin

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ set(TARGET_LIBM_ENTRYPOINTS
480480
libc.src.math.scalbn
481481
libc.src.math.scalbnf
482482
libc.src.math.scalbnl
483+
libc.src.math.sincos
483484
libc.src.math.sincosf
484485
libc.src.math.sinhf
485486
libc.src.math.sin

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ set(TARGET_LIBM_ENTRYPOINTS
358358
libc.src.math.scalbn
359359
libc.src.math.scalbnf
360360
libc.src.math.scalbnl
361+
libc.src.math.sincos
361362
libc.src.math.sincosf
362363
libc.src.math.sin
363364
libc.src.math.sinf

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ set(TARGET_LIBM_ENTRYPOINTS
488488
libc.src.math.scalbn
489489
libc.src.math.scalbnf
490490
libc.src.math.scalbnl
491+
libc.src.math.sincos
491492
libc.src.math.sincosf
492493
libc.src.math.sinhf
493494
libc.src.math.sin

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ set(TARGET_LIBM_ENTRYPOINTS
510510
libc.src.math.scalbnf
511511
libc.src.math.scalbnl
512512
libc.src.math.sin
513+
libc.src.math.sincos
513514
libc.src.math.sincosf
514515
libc.src.math.sinhf
515516
libc.src.math.sinf

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ set(TARGET_LIBM_ENTRYPOINTS
251251
libc.src.math.scalbnl
252252
libc.src.math.sin
253253
libc.src.math.sincosf
254+
libc.src.math.sincosf
254255
libc.src.math.sinf
255256
libc.src.math.sinhf
256257
libc.src.math.sqrt

libc/docs/math/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ Higher Math Functions
326326
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
327327
| sin | |check| | |check| | | | | 7.12.4.6 | F.10.1.6 |
328328
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
329-
| sincos | |check| | large | | | | | |
329+
| sincos | |check| | |check| | | | | | |
330330
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
331331
| sinh | |check| | | | | | 7.12.5.5 | F.10.2.5 |
332332
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/src/math/generic/CMakeLists.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,28 @@ add_entrypoint_object(
261261
-O3
262262
)
263263

264+
add_entrypoint_object(
265+
sincos
266+
SRCS
267+
sincos.cpp
268+
HDRS
269+
../sincos.h
270+
DEPENDS
271+
.range_reduction_double
272+
.sincos_eval
273+
libc.hdr.errno_macros
274+
libc.src.errno.errno
275+
libc.src.__support.FPUtil.double_double
276+
libc.src.__support.FPUtil.dyadic_float
277+
libc.src.__support.FPUtil.except_value_utils
278+
libc.src.__support.FPUtil.fenv_impl
279+
libc.src.__support.FPUtil.fp_bits
280+
libc.src.__support.FPUtil.multiply_add
281+
libc.src.__support.macros.optimization
282+
COMPILE_OPTIONS
283+
-O3
284+
)
285+
264286
add_entrypoint_object(
265287
sincosf
266288
SRCS

libc/src/math/generic/sincos.cpp

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
//===-- Double-precision sincos function ----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/sincos.h"
10+
#include "hdr/errno_macros.h"
11+
#include "src/__support/FPUtil/FEnvImpl.h"
12+
#include "src/__support/FPUtil/FPBits.h"
13+
#include "src/__support/FPUtil/double_double.h"
14+
#include "src/__support/FPUtil/dyadic_float.h"
15+
#include "src/__support/FPUtil/except_value_utils.h"
16+
#include "src/__support/FPUtil/multiply_add.h"
17+
#include "src/__support/FPUtil/rounding_mode.h"
18+
#include "src/__support/common.h"
19+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20+
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
21+
#include "src/math/generic/sincos_eval.h"
22+
23+
#ifdef LIBC_TARGET_CPU_HAS_FMA
24+
#include "range_reduction_double_fma.h"
25+
26+
using LIBC_NAMESPACE::fma::FAST_PASS_EXPONENT;
27+
using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI;
28+
using LIBC_NAMESPACE::fma::range_reduction_small;
29+
using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128;
30+
31+
LIBC_INLINE constexpr bool NO_FMA = false;
32+
#else
33+
#include "range_reduction_double_nofma.h"
34+
35+
using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT;
36+
using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI;
37+
using LIBC_NAMESPACE::nofma::range_reduction_small;
38+
using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128;
39+
40+
LIBC_INLINE constexpr bool NO_FMA = true;
41+
#endif // LIBC_TARGET_CPU_HAS_FMA
42+
43+
// TODO: We might be able to improve the performance of large range reduction of
44+
// non-FMA targets further by operating directly on 25-bit chunks of 128/pi and
45+
// pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of
46+
// those lookup table.
47+
#include "range_reduction_double_common.h"
48+
49+
#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
50+
#define LIBC_MATH_SINCOS_SKIP_ACCURATE_PASS
51+
#endif
52+
53+
namespace LIBC_NAMESPACE {
54+
55+
using DoubleDouble = fputil::DoubleDouble;
56+
using Float128 = typename fputil::DyadicFloat<128>;
57+
58+
LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) {
59+
using FPBits = typename fputil::FPBits<double>;
60+
FPBits xbits(x);
61+
62+
uint16_t x_e = xbits.get_biased_exponent();
63+
64+
DoubleDouble y;
65+
unsigned k;
66+
generic::LargeRangeReduction<NO_FMA> range_reduction_large;
67+
68+
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
69+
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
70+
// |x| < 2^-27
71+
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
72+
// Signed zeros.
73+
if (LIBC_UNLIKELY(x == 0.0)) {
74+
*sin_x = x;
75+
*cos_x = 1.0;
76+
return;
77+
}
78+
79+
// For |x| < 2^-27, max(|sin(x) - x|, |cos(x) - 1|) < ulp(x)/2.
80+
#ifdef LIBC_TARGET_CPU_HAS_FMA
81+
*sin_x = fputil::multiply_add(x, -0x1.0p-54, x);
82+
*cos_x = fputil::multiply_add(x, -x, 1.0);
83+
#else
84+
*cos_x = fputil::round_result_slightly_down(1.0);
85+
86+
if (LIBC_UNLIKELY(x_e < 4)) {
87+
int rounding_mode = fputil::quick_get_round();
88+
if (rounding_mode == FE_TOWARDZERO ||
89+
(xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) ||
90+
(xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD))
91+
*sin_x = FPBits(xbits.uintval() - 1).get_val();
92+
}
93+
*sin_x = fputil::multiply_add(x, -0x1.0p-54, x);
94+
#endif // LIBC_TARGET_CPU_HAS_FMA
95+
return;
96+
}
97+
98+
// // Small range reduction.
99+
k = range_reduction_small(x, y);
100+
} else {
101+
// Inf or NaN
102+
if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {
103+
// sin(+-Inf) = NaN
104+
if (xbits.get_mantissa() == 0) {
105+
fputil::set_errno_if_required(EDOM);
106+
fputil::raise_except_if_required(FE_INVALID);
107+
}
108+
*sin_x = *cos_x = x + FPBits::quiet_nan().get_val();
109+
return;
110+
}
111+
112+
// Large range reduction.
113+
k = range_reduction_large.compute_high_part(x);
114+
y = range_reduction_large.fast();
115+
}
116+
117+
DoubleDouble sin_y, cos_y;
118+
119+
generic::sincos_eval(y, sin_y, cos_y);
120+
121+
// Look up sin(k * pi/128) and cos(k * pi/128)
122+
// Memory saving versions:
123+
124+
// Use 128-entry table instead:
125+
// DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 127];
126+
// uint64_t sin_s = static_cast<uint64_t>(k & 128) << (63 - 7);
127+
// sin_k.hi = FPBits(FPBits(sin_k.hi).uintval() ^ sin_s).get_val();
128+
// sin_k.lo = FPBits(FPBits(sin_k.hi).uintval() ^ sin_s).get_val();
129+
// DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 127];
130+
// uint64_t cos_s = static_cast<uint64_t>((k + 64) & 128) << (63 - 7);
131+
// cos_k.hi = FPBits(FPBits(cos_k.hi).uintval() ^ cos_s).get_val();
132+
// cos_k.lo = FPBits(FPBits(cos_k.hi).uintval() ^ cos_s).get_val();
133+
134+
// Use 64-entry table instead:
135+
// auto get_idx_dd = [](unsigned kk) -> DoubleDouble {
136+
// unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
137+
// DoubleDouble ans = SIN_K_PI_OVER_128[idx];
138+
// if (kk & 128) {
139+
// ans.hi = -ans.hi;
140+
// ans.lo = -ans.lo;
141+
// }
142+
// return ans;
143+
// };
144+
// DoubleDouble sin_k = get_idx_dd(k);
145+
// DoubleDouble cos_k = get_idx_dd(k + 64);
146+
147+
// Fast look up version, but needs 256-entry table.
148+
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
149+
DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 255];
150+
DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255];
151+
DoubleDouble msin_k{-sin_k.lo, -sin_k.hi};
152+
153+
// After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
154+
// So k is an integer and -pi / 256 <= y <= pi / 256.
155+
// Then sin(x) = sin((k * pi/128 + y)
156+
// = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)
157+
DoubleDouble sin_k_cos_y = fputil::quick_mult<NO_FMA>(cos_y, sin_k);
158+
DoubleDouble cos_k_sin_y = fputil::quick_mult<NO_FMA>(sin_y, cos_k);
159+
// cos(x) = cos((k * pi/128 + y)
160+
// = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)
161+
DoubleDouble cos_k_cos_y = fputil::quick_mult<NO_FMA>(cos_y, cos_k);
162+
DoubleDouble msin_k_sin_y = fputil::quick_mult<NO_FMA>(sin_y, msin_k);
163+
164+
DoubleDouble sin_dd =
165+
fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi);
166+
DoubleDouble cos_dd =
167+
fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi);
168+
sin_dd.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
169+
cos_dd.lo += msin_k_sin_y.lo + cos_k_cos_y.lo;
170+
171+
#ifdef LIBC_MATH_SINCOS_SKIP_ACCURATE_PASS
172+
*sin_x = sin_dd.hi + sin_dd.lo;
173+
*cos_x = cos_dd.hi + cos_dd.lo;
174+
return;
175+
#else
176+
// Accurate test and pass for correctly rounded implementation.
177+
178+
#ifdef LIBC_TARGET_CPU_HAS_FMA
179+
constexpr double ERR = 0x1.0p-70;
180+
#else
181+
// TODO: Improve non-FMA fast pass accuracy.
182+
constexpr double ERR = 0x1.0p-66;
183+
#endif // LIBC_TARGET_CPU_HAS_FMA
184+
185+
double sin_lp = sin_dd.lo + ERR;
186+
double sin_lm = sin_dd.lo - ERR;
187+
double cos_lp = cos_dd.lo + ERR;
188+
double cos_lm = cos_dd.lo - ERR;
189+
190+
double sin_upper = sin_dd.hi + sin_lp;
191+
double sin_lower = sin_dd.hi + sin_lm;
192+
double cos_upper = cos_dd.hi + cos_lp;
193+
double cos_lower = cos_dd.hi + cos_lm;
194+
195+
// Ziv's rounding test.
196+
if (LIBC_LIKELY(sin_upper == sin_lower && cos_upper == cos_lower)) {
197+
*sin_x = sin_upper;
198+
*cos_x = cos_upper;
199+
return;
200+
}
201+
202+
Float128 u_f128, sin_u, cos_u;
203+
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
204+
u_f128 = generic::range_reduction_small_f128(x);
205+
else
206+
u_f128 = range_reduction_large.accurate();
207+
208+
generic::sincos_eval(u_f128, sin_u, cos_u);
209+
210+
auto get_sin_k = [](unsigned kk) -> Float128 {
211+
unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
212+
Float128 ans = generic::SIN_K_PI_OVER_128_F128[idx];
213+
if (kk & 128)
214+
ans.sign = Sign::NEG;
215+
return ans;
216+
};
217+
218+
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
219+
Float128 sin_k_f128 = get_sin_k(k);
220+
Float128 cos_k_f128 = get_sin_k(k + 64);
221+
Float128 msin_k_f128 = get_sin_k(k + 128);
222+
223+
// TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
224+
// https://github.com/llvm/llvm-project/issues/96452.
225+
226+
if (sin_upper == sin_lower)
227+
*sin_x = sin_upper;
228+
else
229+
// sin(x) = sin((k * pi/128 + u)
230+
// = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
231+
*sin_x = static_cast<double>(
232+
fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
233+
fputil::quick_mul(cos_k_f128, sin_u)));
234+
235+
if (cos_upper == cos_lower)
236+
*cos_x = cos_upper;
237+
else
238+
// cos(x) = cos((k * pi/128 + u)
239+
// = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)
240+
*cos_x = static_cast<double>(
241+
fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),
242+
fputil::quick_mul(msin_k_f128, sin_u)));
243+
244+
#endif // !LIBC_MATH_SINCOS_SKIP_ACCURATE_PASS
245+
}
246+
247+
} // namespace LIBC_NAMESPACE

libc/test/src/math/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,18 @@ add_fp_unittest(
7272
libc.src.__support.FPUtil.fp_bits
7373
)
7474

75+
add_fp_unittest(
76+
sincos_test
77+
NEED_MPFR
78+
SUITE
79+
libc-math-unittests
80+
SRCS
81+
sincos_test.cpp
82+
DEPENDS
83+
libc.src.math.sincos
84+
libc.src.__support.FPUtil.fp_bits
85+
)
86+
7587
add_fp_unittest(
7688
tanf_test
7789
NEED_MPFR

0 commit comments

Comments
 (0)