Skip to content

Commit

Permalink
[libc][math] Implement powf function correctly rounded to all roundin…
Browse files Browse the repository at this point in the history
…g modes. (llvm#71188)

We compute `pow(x, y)` using the formula
```
  pow(x, y) = x^y = 2^(y * log2(x))
```
We follow similar steps as in `log2f(x)` and `exp2f(x)`, by breaking
down into `hi + mid + lo` parts, in which `hi` parts are computed using
the exponent field directly, `mid` parts will use look-up tables, and
`lo` parts are approximated by polynomials.

We add some speedup for common use-cases:
```
  pow(2, y) = exp2(y)
  pow(10, y) = exp10(y)
  pow(x, 2) = x * x
  pow(x, 1/2) = sqrt(x)
  pow(x, -1/2) = rsqrt(x) - to be added
```
  • Loading branch information
lntue authored Nov 6, 2023
1 parent 0156b6e commit bc7a3bd
Show file tree
Hide file tree
Showing 23 changed files with 1,665 additions and 325 deletions.
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
Expand Down
1 change: 1 addition & 0 deletions libc/config/windows/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
Expand Down
3 changes: 2 additions & 1 deletion libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ Higher Math Functions
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| pow | | | | | | | | | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| powf | | | | | | | | | | | | |
| powf | |check| | |check| | | |check| | |check| | | | |check| | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| powl | | | | | | | | | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
Expand Down Expand Up @@ -493,6 +493,7 @@ log |check| |check|
log10 |check| |check|
log1p |check| |check|
log2 |check| |check|
pow |check|
sin |check| large
sincos |check| large
sinh |check|
Expand Down
1 change: 1 addition & 0 deletions libc/spec/stdc.td
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"nextafterl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,

FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,

FunctionSpec<"coshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
Expand Down
70 changes: 60 additions & 10 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -625,12 +625,10 @@ add_entrypoint_object(
-O3
)

add_entrypoint_object(
exp2f
SRCS
exp2f.cpp
add_header_library(
exp2f_impl
HDRS
../exp2f.h
exp2f_impl.h
DEPENDS
.explogxf
libc.src.__support.FPUtil.except_value_utils
Expand All @@ -641,9 +639,20 @@ add_entrypoint_object(
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.common
libc.include.errno
libc.src.errno.errno
libc.include.math
)

add_entrypoint_object(
exp2f
SRCS
exp2f.cpp
HDRS
../exp2f.h
DEPENDS
.exp2f_impl
COMPILE_OPTIONS
-O3
)
Expand Down Expand Up @@ -675,12 +684,10 @@ add_entrypoint_object(
-O3
)

add_entrypoint_object(
exp10f
SRCS
exp10f.cpp
add_header_library(
exp10f_impl
HDRS
../exp10f.h
exp10f_impl.h
DEPENDS
.explogxf
libc.src.__support.FPUtil.fenv_impl
Expand All @@ -690,13 +697,26 @@ add_entrypoint_object(
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.common
libc.include.errno
libc.src.errno.errno
libc.include.math
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
exp10f
SRCS
exp10f.cpp
HDRS
../exp10f.h
DEPENDS
.exp10f_impl
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
expm1
SRCS
Expand Down Expand Up @@ -747,6 +767,36 @@ add_entrypoint_object(
-O3
)

add_entrypoint_object(
powf
SRCS
powf.cpp
HDRS
../powf.h
DEPENDS
.common_constants
.explogxf
.exp2f_impl
.exp10f_impl
libc.src.__support.builtin_wrappers
libc.src.__support.CPP.bit
libc.src.__support.CPP.optional
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.FPUtil.sqrt
libc.src.__support.FPUtil.triple_double
libc.src.__support.macros.optimization
libc.include.errno
libc.src.errno.errno
libc.include.math
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
copysign
SRCS
Expand Down
87 changes: 87 additions & 0 deletions libc/src/math/generic/common_constants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,51 @@ alignas(64) const double LOG_R[128] = {
0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1,
0x1.5ee82aa24192p-1, 0x0.000000000000p0};

alignas(64) const double LOG2_R[128] = {
0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
0x1.960caf9abb7cap-4, 0x1.c7b528b70f1c5p-4, 0x1.f9c95dc1d1165p-4,
0x1.097e38ce60649p-3, 0x1.22dadc2ab3497p-3, 0x1.3c6fb650cde51p-3,
0x1.494f863b8df35p-3, 0x1.633a8bf437ce1p-3, 0x1.7046031c79f85p-3,
0x1.8a8980abfbd32p-3, 0x1.97c1cb13c7ec1p-3, 0x1.b2602497d5346p-3,
0x1.bfc67a7fff4ccp-3, 0x1.dac22d3e441d3p-3, 0x1.e857d3d361368p-3,
0x1.01d9bbcfa61d4p-2, 0x1.08bce0d95fa38p-2, 0x1.169c05363f158p-2,
0x1.1d982c9d52708p-2, 0x1.249cd2b13cd6cp-2, 0x1.32bfee370ee68p-2,
0x1.39de8e1559f6fp-2, 0x1.4106017c3eca3p-2, 0x1.4f6fbb2cec598p-2,
0x1.56b22e6b578e5p-2, 0x1.5dfdcf1eeae0ep-2, 0x1.6552b49986277p-2,
0x1.6cb0f6865c8eap-2, 0x1.7b89f02cf2aadp-2, 0x1.8304d90c11fd3p-2,
0x1.8a8980abfbd32p-2, 0x1.921800924dd3bp-2, 0x1.99b072a96c6b2p-2,
0x1.a8ff971810a5ep-2, 0x1.b0b67f4f4681p-2, 0x1.b877c57b1b07p-2,
0x1.c043859e2fdb3p-2, 0x1.c819dc2d45fe4p-2, 0x1.cffae611ad12bp-2,
0x1.d7e6c0abc3579p-2, 0x1.dfdd89d586e2bp-2, 0x1.e7df5fe538ab3p-2,
0x1.efec61b011f85p-2, 0x1.f804ae8d0cd02p-2, 0x1.0014332be0033p-1,
0x1.042bd4b9a7c99p-1, 0x1.08494c66b8efp-1, 0x1.0c6caaf0c5597p-1,
0x1.1096015dee4dap-1, 0x1.14c560fe68af9p-1, 0x1.18fadb6e2d3c2p-1,
0x1.1d368296b5255p-1, 0x1.217868b0c37e8p-1, 0x1.25c0a0463bebp-1,
0x1.2a0f3c340705cp-1, 0x1.2e644fac04fd8p-1, 0x1.2e644fac04fd8p-1,
0x1.32bfee370ee68p-1, 0x1.37222bb70747cp-1, 0x1.3b8b1c68fa6edp-1,
0x1.3ffad4e74f1d6p-1, 0x1.44716a2c08262p-1, 0x1.44716a2c08262p-1,
0x1.48eef19317991p-1, 0x1.4d7380dcc422dp-1, 0x1.51ff2e30214bcp-1,
0x1.5692101d9b4a6p-1, 0x1.5b2c3da19723bp-1, 0x1.5b2c3da19723bp-1,
0x1.5fcdce2727ddbp-1, 0x1.6476d98ad990ap-1, 0x1.6927781d932a8p-1,
0x1.6927781d932a8p-1, 0x1.6ddfc2a78fc63p-1, 0x1.729fd26b707c8p-1,
0x1.7767c12967a45p-1, 0x1.7767c12967a45p-1, 0x1.7c37a9227e7fbp-1,
0x1.810fa51bf65fdp-1, 0x1.810fa51bf65fdp-1, 0x1.85efd062c656dp-1,
0x1.8ad846cf369a4p-1, 0x1.8ad846cf369a4p-1, 0x1.8fc924c89ac84p-1,
0x1.94c287492c4dbp-1, 0x1.94c287492c4dbp-1, 0x1.99c48be2063c8p-1,
0x1.9ecf50bf43f13p-1, 0x1.9ecf50bf43f13p-1, 0x1.a3e2f4ac43f6p-1,
0x1.a8ff971810a5ep-1, 0x1.a8ff971810a5ep-1, 0x1.ae255819f022dp-1,
0x1.b35458761d479p-1, 0x1.b35458761d479p-1, 0x1.b88cb9a2ab521p-1,
0x1.b88cb9a2ab521p-1, 0x1.bdce9dcc96187p-1, 0x1.c31a27dd00b4ap-1,
0x1.c31a27dd00b4ap-1, 0x1.c86f7b7ea4a89p-1, 0x1.c86f7b7ea4a89p-1,
0x1.cdcebd2373995p-1, 0x1.d338120a6dd9dp-1, 0x1.d338120a6dd9dp-1,
0x1.d8aba045b01c8p-1, 0x1.d8aba045b01c8p-1, 0x1.de298ec0bac0dp-1,
0x1.de298ec0bac0dp-1, 0x1.e3b20546f554ap-1, 0x1.e3b20546f554ap-1,
0x1.e9452c8a71028p-1, 0x1.e9452c8a71028p-1, 0x1.eee32e2aeccbfp-1,
0x1.eee32e2aeccbfp-1, 0x1.f48c34bd1e96fp-1, 0x1.f48c34bd1e96fp-1,
0x1.fa406bd2443dfp-1, 0x1.0000000000000p0};

// Generated by Sollya with:
// for i from 0 to 127 do {
// r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
Expand Down Expand Up @@ -395,6 +440,48 @@ alignas(64) const int S2[193] = {
-0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
-0x1f0, -0x1f4, -0x1f8, -0x1fc};

alignas(64) const double R2[193] = {
0x1.0101p0, 0x1.00fdp0, 0x1.00f9p0, 0x1.00f5p0, 0x1.00f1p0,
0x1.00edp0, 0x1.00e9p0, 0x1.00e5p0, 0x1.00e1p0, 0x1.00ddp0,
0x1.00d9p0, 0x1.00d5p0, 0x1.00d1p0, 0x1.00cdp0, 0x1.00c9p0,
0x1.00c5p0, 0x1.00c1p0, 0x1.00bdp0, 0x1.00b9p0, 0x1.00b4p0,
0x1.00bp0, 0x1.00acp0, 0x1.00a8p0, 0x1.00a4p0, 0x1.00ap0,
0x1.009cp0, 0x1.0098p0, 0x1.0094p0, 0x1.009p0, 0x1.008cp0,
0x1.0088p0, 0x1.0084p0, 0x1.008p0, 0x1.007cp0, 0x1.0078p0,
0x1.0074p0, 0x1.007p0, 0x1.006cp0, 0x1.0068p0, 0x1.0064p0,
0x1.006p0, 0x1.005cp0, 0x1.0058p0, 0x1.0054p0, 0x1.005p0,
0x1.004cp0, 0x1.0048p0, 0x1.0044p0, 0x1.004p0, 0x1.003cp0,
0x1.0038p0, 0x1.0034p0, 0x1.003p0, 0x1.002cp0, 0x1.0028p0,
0x1.0024p0, 0x1.002p0, 0x1.001cp0, 0x1.0018p0, 0x1.0014p0,
0x1.001p0, 0x1.000cp0, 0x1.0008p0, 0x1.0004p0, 0x1p0,
0x1.fff8p-1, 0x1.fffp-1, 0x1.ffe8p-1, 0x1.ffep-1, 0x1.ffd8p-1,
0x1.ffdp-1, 0x1.ffc8p-1, 0x1.ffcp-1, 0x1.ffb8p-1, 0x1.ffbp-1,
0x1.ffa8p-1, 0x1.ffap-1, 0x1.ff98p-1, 0x1.ff9p-1, 0x1.ff88p-1,
0x1.ff8p-1, 0x1.ff78p-1, 0x1.ff7p-1, 0x1.ff68p-1, 0x1.ff6p-1,
0x1.ff58p-1, 0x1.ff5p-1, 0x1.ff48p-1, 0x1.ff4p-1, 0x1.ff38p-1,
0x1.ff3p-1, 0x1.ff28p-1, 0x1.ff2p-1, 0x1.ff18p-1, 0x1.ff1p-1,
0x1.ff08p-1, 0x1.ffp-1, 0x1.fef8p-1, 0x1.fefp-1, 0x1.fee8p-1,
0x1.feep-1, 0x1.fed8p-1, 0x1.fedp-1, 0x1.fec8p-1, 0x1.fecp-1,
0x1.feb8p-1, 0x1.febp-1, 0x1.fea8p-1, 0x1.feap-1, 0x1.fe98p-1,
0x1.fe92p-1, 0x1.fe8ap-1, 0x1.fe82p-1, 0x1.fe7ap-1, 0x1.fe72p-1,
0x1.fe6ap-1, 0x1.fe62p-1, 0x1.fe5ap-1, 0x1.fe52p-1, 0x1.fe4ap-1,
0x1.fe42p-1, 0x1.fe3ap-1, 0x1.fe32p-1, 0x1.fe2ap-1, 0x1.fe22p-1,
0x1.fe1ap-1, 0x1.fe12p-1, 0x1.fe0ap-1, 0x1.fe02p-1, 0x1.fdfap-1,
0x1.fdf2p-1, 0x1.fdeap-1, 0x1.fde2p-1, 0x1.fddap-1, 0x1.fdd2p-1,
0x1.fdcap-1, 0x1.fdc2p-1, 0x1.fdbap-1, 0x1.fdb2p-1, 0x1.fdaap-1,
0x1.fda2p-1, 0x1.fd9ap-1, 0x1.fd92p-1, 0x1.fd8cp-1, 0x1.fd84p-1,
0x1.fd7cp-1, 0x1.fd74p-1, 0x1.fd6cp-1, 0x1.fd64p-1, 0x1.fd5cp-1,
0x1.fd54p-1, 0x1.fd4cp-1, 0x1.fd44p-1, 0x1.fd3cp-1, 0x1.fd34p-1,
0x1.fd2cp-1, 0x1.fd24p-1, 0x1.fd1cp-1, 0x1.fd14p-1, 0x1.fd0cp-1,
0x1.fd04p-1, 0x1.fcfcp-1, 0x1.fcf4p-1, 0x1.fcecp-1, 0x1.fce4p-1,
0x1.fcdcp-1, 0x1.fcd6p-1, 0x1.fccep-1, 0x1.fcc6p-1, 0x1.fcbep-1,
0x1.fcb6p-1, 0x1.fcaep-1, 0x1.fca6p-1, 0x1.fc9ep-1, 0x1.fc96p-1,
0x1.fc8ep-1, 0x1.fc86p-1, 0x1.fc7ep-1, 0x1.fc76p-1, 0x1.fc6ep-1,
0x1.fc66p-1, 0x1.fc5ep-1, 0x1.fc56p-1, 0x1.fc4ep-1, 0x1.fc46p-1,
0x1.fc4p-1, 0x1.fc38p-1, 0x1.fc3p-1, 0x1.fc28p-1, 0x1.fc2p-1,
0x1.fc18p-1, 0x1.fc1p-1, 0x1.fc08p-1,
};

// Logarithm range reduction - Step 3:
// r(k) = 2^-21 round(2^21 / (1 + k*2^-21)) for k = -80 .. 80.
// Output range:
Expand Down
5 changes: 5 additions & 0 deletions libc/src/math/generic/common_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ extern const double CD[128];
extern const double LOG_R[128];
extern const NumberPair<double> LOG_R_DD[128];

// Lookup table for -log2(r)
extern const double LOG2_R[128];

// Minimax polynomial for (log(1 + x) - x)/x^2, generated by sollya with:
// > P = fpminimax((log(1 + x) - x)/x^2, 5, [|D...|], [-2^-8, 2^-7]);
constexpr double LOG_COEFFS[6] = {-0x1.fffffffffffffp-2, 0x1.5555555554a9bp-2,
Expand All @@ -45,6 +48,8 @@ extern const int S2[193];
extern const int S3[161];
extern const int S4[130];

extern const double R2[193];

// log(2) generated by Sollya with:
// > a = 2^-43 * nearestint(2^43*log(2));
// LSB = 2^-43 is chosen so that e_x * LOG_2_HI is exact for -1075 < e_x < 1024.
Expand Down
Loading

0 comments on commit bc7a3bd

Please sign in to comment.