Skip to content

Commit ed3d051

Browse files
authored
[libc][math][c23] Add sinhf16 and coshf16 C23 math functions (#105947)
Part of #95250.
1 parent ed7868d commit ed3d051

File tree

17 files changed

+762
-2
lines changed

17 files changed

+762
-2
lines changed

libc/config/gpu/entrypoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
521521
libc.src.math.canonicalizef16
522522
libc.src.math.ceilf16
523523
libc.src.math.copysignf16
524+
libc.src.math.coshf16
524525
libc.src.math.exp10f16
525526
libc.src.math.exp10m1f16
526527
libc.src.math.exp2f16
@@ -585,6 +586,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
585586
libc.src.math.scalbnf16
586587
libc.src.math.setpayloadf16
587588
libc.src.math.setpayloadsigf16
589+
libc.src.math.sinhf16
588590
libc.src.math.totalorderf16
589591
libc.src.math.totalordermagf16
590592
libc.src.math.truncf16

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
610610
libc.src.math.canonicalizef16
611611
libc.src.math.ceilf16
612612
libc.src.math.copysignf16
613+
libc.src.math.coshf16
613614
libc.src.math.exp10f16
614615
libc.src.math.exp10m1f16
615616
libc.src.math.exp2f16
@@ -678,6 +679,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
678679
libc.src.math.scalbnf16
679680
libc.src.math.setpayloadf16
680681
libc.src.math.setpayloadsigf16
682+
libc.src.math.sinhf16
681683
libc.src.math.sinpif16
682684
libc.src.math.totalorderf16
683685
libc.src.math.totalordermagf16

libc/docs/math/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ Higher Math Functions
278278
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
279279
| cos | |check| | |check| | | | | 7.12.4.5 | F.10.1.5 |
280280
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
281-
| cosh | |check| | | | | | 7.12.5.4 | F.10.2.4 |
281+
| cosh | |check| | | | |check| | | 7.12.5.4 | F.10.2.4 |
282282
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
283283
| cospi | |check| | | | | | 7.12.4.12 | F.10.1.12 |
284284
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -340,7 +340,7 @@ Higher Math Functions
340340
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
341341
| sincos | |check| | |check| | | | | | |
342342
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
343-
| sinh | |check| | | | | | 7.12.5.5 | F.10.2.5 |
343+
| sinh | |check| | | | |check| | | 7.12.5.5 | F.10.2.5 |
344344
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
345345
| sinpi | |check| | | | |check| | | 7.12.4.13 | F.10.1.13 |
346346
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

libc/spec/stdc.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,11 @@ def StdC : StandardSpec<"stdc"> {
792792
FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
793793

794794
FunctionSpec<"coshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
795+
GuardedFunctionSpec<"coshf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
796+
795797
FunctionSpec<"sinhf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
798+
GuardedFunctionSpec<"sinhf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
799+
796800
FunctionSpec<"tanhf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
797801

798802
FunctionSpec<"acosf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

libc/src/math/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,11 @@ add_math_entrypoint_object(copysignf128)
8989

9090
add_math_entrypoint_object(cos)
9191
add_math_entrypoint_object(cosf)
92+
9293
add_math_entrypoint_object(cosh)
9394
add_math_entrypoint_object(coshf)
95+
add_math_entrypoint_object(coshf16)
96+
9497
add_math_entrypoint_object(cospif)
9598

9699
add_math_entrypoint_object(daddl)
@@ -481,6 +484,7 @@ add_math_entrypoint_object(sinpif16)
481484

482485
add_math_entrypoint_object(sinh)
483486
add_math_entrypoint_object(sinhf)
487+
add_math_entrypoint_object(sinhf16)
484488

485489
add_math_entrypoint_object(sqrt)
486490
add_math_entrypoint_object(sqrtf)

libc/src/math/coshf16.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation header for coshf16 -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_COSHF16_H
10+
#define LLVM_LIBC_SRC_MATH_COSHF16_H
11+
12+
#include "src/__support/macros/config.h"
13+
#include "src/__support/macros/properties/types.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
float16 coshf16(float16 x);
18+
19+
} // namespace LIBC_NAMESPACE_DECL
20+
21+
#endif // LLVM_LIBC_SRC_MATH_COSHF16_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4218,6 +4218,25 @@ add_entrypoint_object(
42184218
-O3
42194219
)
42204220

4221+
add_entrypoint_object(
4222+
coshf16
4223+
SRCS
4224+
coshf16.cpp
4225+
HDRS
4226+
../coshf16.h
4227+
DEPENDS
4228+
.expxf16
4229+
libc.hdr.errno_macros
4230+
libc.hdr.fenv_macros
4231+
libc.src.__support.FPUtil.except_value_utils
4232+
libc.src.__support.FPUtil.fenv_impl
4233+
libc.src.__support.FPUtil.fp_bits
4234+
libc.src.__support.FPUtil.rounding_mode
4235+
libc.src.__support.macros.optimization
4236+
COMPILE_OPTIONS
4237+
-O3
4238+
)
4239+
42214240
add_entrypoint_object(
42224241
sinhf
42234242
SRCS
@@ -4233,6 +4252,25 @@ add_entrypoint_object(
42334252
-O3
42344253
)
42354254

4255+
add_entrypoint_object(
4256+
sinhf16
4257+
SRCS
4258+
sinhf16.cpp
4259+
HDRS
4260+
../sinhf16.h
4261+
DEPENDS
4262+
.expxf16
4263+
libc.hdr.errno_macros
4264+
libc.hdr.fenv_macros
4265+
libc.src.__support.FPUtil.except_value_utils
4266+
libc.src.__support.FPUtil.fenv_impl
4267+
libc.src.__support.FPUtil.fp_bits
4268+
libc.src.__support.FPUtil.rounding_mode
4269+
libc.src.__support.macros.optimization
4270+
COMPILE_OPTIONS
4271+
-O3
4272+
)
4273+
42364274
add_entrypoint_object(
42374275
tanhf
42384276
SRCS
@@ -5297,6 +5335,7 @@ add_header_library(
52975335
expxf16.h
52985336
DEPENDS
52995337
libc.src.__support.CPP.array
5338+
libc.src.__support.FPUtil.cast
53005339
libc.src.__support.FPUtil.fp_bits
53015340
libc.src.__support.FPUtil.multiply_add
53025341
libc.src.__support.FPUtil.nearest_integer

libc/src/math/generic/coshf16.cpp

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
//===-- Half-precision cosh(x) function -----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/coshf16.h"
10+
#include "expxf16.h"
11+
#include "hdr/errno_macros.h"
12+
#include "hdr/fenv_macros.h"
13+
#include "src/__support/FPUtil/FEnvImpl.h"
14+
#include "src/__support/FPUtil/FPBits.h"
15+
#include "src/__support/FPUtil/except_value_utils.h"
16+
#include "src/__support/FPUtil/rounding_mode.h"
17+
#include "src/__support/common.h"
18+
#include "src/__support/macros/config.h"
19+
#include "src/__support/macros/optimization.h"
20+
21+
namespace LIBC_NAMESPACE_DECL {
22+
23+
static constexpr fputil::ExceptValues<float16, 9> COSHF16_EXCEPTS_POS = {{
24+
// x = 0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ)
25+
{0x29a8U, 0x3c00U, 1U, 0U, 1U},
26+
// x = 0x1.8c4p+0, coshf16(x) = 0x1.3a8p+1 (RZ)
27+
{0x3e31U, 0x40eaU, 1U, 0U, 0U},
28+
// x = 0x1.994p+0, coshf16(x) = 0x1.498p+1 (RZ)
29+
{0x3e65U, 0x4126U, 1U, 0U, 0U},
30+
// x = 0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ)
31+
{0x3ed8U, 0x41b6U, 1U, 0U, 1U},
32+
// x = 0x1.aap+1, coshf16(x) = 0x1.be8p+3 (RZ)
33+
{0x42a8U, 0x4afaU, 1U, 0U, 1U},
34+
// x = 0x1.cc4p+1, coshf16(x) = 0x1.23cp+4 (RZ)
35+
{0x4331U, 0x4c8fU, 1U, 0U, 0U},
36+
// x = 0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ)
37+
{0x44a2U, 0x526dU, 1U, 0U, 0U},
38+
// x = 0x1.958p+2, coshf16(x) = 0x1.1a4p+8 (RZ)
39+
{0x4656U, 0x5c69U, 1U, 0U, 0U},
40+
// x = 0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ)
41+
{0x497cU, 0x7715U, 1U, 0U, 1U},
42+
}};
43+
44+
static constexpr fputil::ExceptValues<float16, 4> COSHF16_EXCEPTS_NEG = {{
45+
// x = -0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ)
46+
{0xa9a8U, 0x3c00U, 1U, 0U, 1U},
47+
// x = -0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ)
48+
{0xbed8U, 0x41b6U, 1U, 0U, 1U},
49+
// x = -0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ)
50+
{0xc4a2U, 0x526dU, 1U, 0U, 0U},
51+
// x = -0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ)
52+
{0xc97cU, 0x7715U, 1U, 0U, 1U},
53+
}};
54+
55+
LLVM_LIBC_FUNCTION(float16, coshf16, (float16 x)) {
56+
using FPBits = fputil::FPBits<float16>;
57+
FPBits x_bits(x);
58+
59+
uint16_t x_u = x_bits.uintval();
60+
uint16_t x_abs = x_u & 0x7fffU;
61+
62+
// When |x| >= acosh(2^16), or x is NaN.
63+
if (LIBC_UNLIKELY(x_abs >= 0x49e5U)) {
64+
// cosh(NaN) = NaN
65+
if (x_bits.is_nan()) {
66+
if (x_bits.is_signaling_nan()) {
67+
fputil::raise_except_if_required(FE_INVALID);
68+
return FPBits::quiet_nan().get_val();
69+
}
70+
71+
return x;
72+
}
73+
74+
// When |x| >= acosh(2^16).
75+
if (x_abs >= 0x49e5U) {
76+
// cosh(+/-inf) = +inf
77+
if (x_bits.is_inf())
78+
return FPBits::inf().get_val();
79+
80+
switch (fputil::quick_get_round()) {
81+
case FE_TONEAREST:
82+
case FE_UPWARD:
83+
fputil::set_errno_if_required(ERANGE);
84+
fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
85+
return FPBits::inf().get_val();
86+
default:
87+
return FPBits::max_normal().get_val();
88+
}
89+
}
90+
}
91+
92+
if (x_bits.is_pos()) {
93+
if (auto r = COSHF16_EXCEPTS_POS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
94+
return r.value();
95+
} else {
96+
if (auto r = COSHF16_EXCEPTS_NEG.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
97+
return r.value();
98+
}
99+
100+
return eval_sinh_or_cosh</*IsSinh=*/false>(x);
101+
}
102+
103+
} // namespace LIBC_NAMESPACE_DECL

libc/src/math/generic/expxf16.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "src/__support/CPP/array.h"
1313
#include "src/__support/FPUtil/FPBits.h"
1414
#include "src/__support/FPUtil/PolyEval.h"
15+
#include "src/__support/FPUtil/cast.h"
1516
#include "src/__support/FPUtil/multiply_add.h"
1617
#include "src/__support/FPUtil/nearest_integer.h"
1718
#include "src/__support/macros/attributes.h"
@@ -174,6 +175,119 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
174175
return {exp2_hi_mid, exp10_lo};
175176
}
176177

178+
// Generated by Sollya with the following commands:
179+
// > display = hexadecimal;
180+
// > round(log2(exp(1)), SG, RN);
181+
static constexpr float LOG2F_E = 0x1.715476p+0f;
182+
183+
// Generated by Sollya with the following commands:
184+
// > display = hexadecimal;
185+
// > round(log(2), SG, RN);
186+
static constexpr float LOGF_2 = 0x1.62e43p-1f;
187+
188+
// Generated by Sollya with the following commands:
189+
// > display = hexadecimal;
190+
// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
191+
static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
192+
0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
193+
0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
194+
0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
195+
0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
196+
0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
197+
0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
198+
0x3ff5'257dU, 0x3ffa'83b3U,
199+
};
200+
201+
// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
202+
// and exp(-x) simultaneously.
203+
// To compute e^x, we perform the following range reduction:
204+
// find hi, mid, lo such that:
205+
// x = (hi + mid) * log(2) + lo, in which
206+
// hi is an integer,
207+
// 0 <= mid * 2^5 < 32 is an integer
208+
// -2^(-5) <= lo * log2(e) <= 2^-5.
209+
// In particular,
210+
// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
211+
// Then,
212+
// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
213+
// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
214+
// by adding hi to the exponent field of 2^mid.
215+
// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
216+
// e^lo ~ P(lo)
217+
// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
218+
// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
219+
// = P_even + lo * P_odd
220+
// To compute e^(-x), notice that:
221+
// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
222+
// ~ 2^(-(hi + mid)) * P(-lo)
223+
// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
224+
// So:
225+
// sinh(x) = (e^x - e^(-x)) / 2
226+
// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
227+
// 2^(-(hi + mid)) * (P_even - lo * P_odd))
228+
// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
229+
// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
230+
// And similarly:
231+
// cosh(x) = (e^x + e^(-x)) / 2
232+
// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
233+
// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
234+
// The main point of these formulas is that the expensive part of calculating
235+
// the polynomials approximating lower parts of e^x and e^(-x) is shared and
236+
// only done once.
237+
template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
238+
float xf = x;
239+
float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f));
240+
int x_hi_mid_p = static_cast<int>(kf);
241+
int x_hi_mid_m = -x_hi_mid_p;
242+
243+
unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
244+
unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
245+
unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
246+
unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;
247+
248+
uint32_t exp2_hi_mid_bits_p =
249+
EXP2_MID_5_BITS[x_mid_p] +
250+
static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
251+
uint32_t exp2_hi_mid_bits_m =
252+
EXP2_MID_5_BITS[x_mid_m] +
253+
static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
254+
// exp2_hi_mid_p = 2^(hi + mid)
255+
float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
256+
// exp2_hi_mid_m = 2^(-(hi + mid))
257+
float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();
258+
259+
// exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
260+
float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
261+
// exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
262+
float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
263+
264+
// lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
265+
float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf);
266+
float lo_sq = lo * lo;
267+
268+
// Degree-3 minimax polynomial generated by Sollya with the following
269+
// commands:
270+
// > display = hexadecimal;
271+
// > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
272+
// > 1 + x * P;
273+
constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
274+
0x1.555778p-3f};
275+
float half_p_odd =
276+
fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f);
277+
float half_p_even =
278+
fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f);
279+
280+
// sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
281+
// (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
282+
if constexpr (IsSinh)
283+
return fputil::cast<float16>(fputil::multiply_add(
284+
lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
285+
// cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
286+
// (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
287+
return fputil::cast<float16>(fputil::multiply_add(
288+
lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
289+
}
290+
177291
} // namespace LIBC_NAMESPACE_DECL
178292

179293
#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H

0 commit comments

Comments
 (0)