|  | 
|  | 1 | +//===-- Implementation header for exp10m1f16 --------------------*- C++ -*-===// | 
|  | 2 | +// | 
|  | 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | +// See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | 6 | +// | 
|  | 7 | +//===----------------------------------------------------------------------===// | 
|  | 8 | + | 
|  | 9 | +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H | 
|  | 10 | +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H | 
|  | 11 | + | 
|  | 12 | +#include "include/llvm-libc-macros/float16-macros.h" | 
|  | 13 | + | 
|  | 14 | +#ifdef LIBC_TYPES_HAS_FLOAT16 | 
|  | 15 | + | 
|  | 16 | +#include "exp10f16_utils.h" | 
|  | 17 | +#include "src/__support/FPUtil/FEnvImpl.h" | 
|  | 18 | +#include "src/__support/FPUtil/FPBits.h" | 
|  | 19 | +#include "src/__support/FPUtil/PolyEval.h" | 
|  | 20 | +#include "src/__support/FPUtil/cast.h" | 
|  | 21 | +#include "src/__support/FPUtil/except_value_utils.h" | 
|  | 22 | +#include "src/__support/FPUtil/multiply_add.h" | 
|  | 23 | +#include "src/__support/FPUtil/rounding_mode.h" | 
|  | 24 | +#include "src/__support/common.h" | 
|  | 25 | +#include "src/__support/macros/config.h" | 
|  | 26 | +#include "src/__support/macros/optimization.h" | 
|  | 27 | +#include "src/__support/macros/properties/cpu_features.h" | 
|  | 28 | + | 
|  | 29 | +namespace LIBC_NAMESPACE_DECL { | 
|  | 30 | + | 
|  | 31 | +namespace math { | 
|  | 32 | + | 
|  | 33 | +LIBC_INLINE static constexpr float16 exp10m1f16(float16 x) { | 
|  | 34 | + | 
|  | 35 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 36 | +  constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{ | 
|  | 37 | +      // (input, RZ output, RU offset, RD offset, RN offset) | 
|  | 38 | +      // x = 0x1.5c4p-4, exp10m1f16(x) = 0x1.bacp-3 (RZ) | 
|  | 39 | +      {0x2d71U, 0x32ebU, 1U, 0U, 0U}, | 
|  | 40 | +      // x = -0x1.5ep-13, exp10m1f16(x) = -0x1.92cp-12 (RZ) | 
|  | 41 | +      {0x8978U, 0x8e4bU, 0U, 1U, 0U}, | 
|  | 42 | +      // x = -0x1.e2p-10, exp10m1f16(x) = -0x1.14cp-8 (RZ) | 
|  | 43 | +      {0x9788U, 0x9c53U, 0U, 1U, 0U}, | 
|  | 44 | +  }}; | 
|  | 45 | + | 
|  | 46 | +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT | 
|  | 47 | +  constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3; | 
|  | 48 | +#else | 
|  | 49 | +  constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6; | 
|  | 50 | +#endif | 
|  | 51 | + | 
|  | 52 | +  constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI> | 
|  | 53 | +      EXP10M1F16_EXCEPTS_HI = {{ | 
|  | 54 | +          // (input, RZ output, RU offset, RD offset, RN offset) | 
|  | 55 | +          // x = 0x1.8f4p-2, exp10m1f16(x) = 0x1.744p+0 (RZ) | 
|  | 56 | +          {0x363dU, 0x3dd1U, 1U, 0U, 0U}, | 
|  | 57 | +          // x = 0x1.95cp-2, exp10m1f16(x) = 0x1.7d8p+0 (RZ) | 
|  | 58 | +          {0x3657U, 0x3df6U, 1U, 0U, 0U}, | 
|  | 59 | +          // x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ) | 
|  | 60 | +          {0x3741U, 0x3f5cU, 1U, 0U, 1U}, | 
|  | 61 | +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT | 
|  | 62 | +          // x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ) | 
|  | 63 | +          {0x4030U, 0x57b1U, 1U, 0U, 1U}, | 
|  | 64 | +          // x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ) | 
|  | 65 | +          {0x406eU, 0x5917U, 1U, 0U, 1U}, | 
|  | 66 | +          // x = 0x1.2f4p+2, exp10m1f16(x) = 0x1.ab8p+15 (RZ) | 
|  | 67 | +          {0x44bdU, 0x7aaeU, 1U, 0U, 1U}, | 
|  | 68 | +#endif | 
|  | 69 | +      }}; | 
|  | 70 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 71 | + | 
|  | 72 | +  using FPBits = fputil::FPBits<float16>; | 
|  | 73 | +  FPBits x_bits(x); | 
|  | 74 | + | 
|  | 75 | +  uint16_t x_u = x_bits.uintval(); | 
|  | 76 | +  uint16_t x_abs = x_u & 0x7fffU; | 
|  | 77 | + | 
|  | 78 | +  // When |x| <= 2^(-3), or |x| >= 11 * log10(2), or x is NaN. | 
|  | 79 | +  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x429fU)) { | 
|  | 80 | +    // exp10m1(NaN) = NaN | 
|  | 81 | +    if (x_bits.is_nan()) { | 
|  | 82 | +      if (x_bits.is_signaling_nan()) { | 
|  | 83 | +        fputil::raise_except_if_required(FE_INVALID); | 
|  | 84 | +        return FPBits::quiet_nan().get_val(); | 
|  | 85 | +      } | 
|  | 86 | + | 
|  | 87 | +      return x; | 
|  | 88 | +    } | 
|  | 89 | + | 
|  | 90 | +    // When x >= 16 * log10(2). | 
|  | 91 | +    if (x_u >= 0x44d1U && x_bits.is_pos()) { | 
|  | 92 | +      // exp10m1(+inf) = +inf | 
|  | 93 | +      if (x_bits.is_inf()) | 
|  | 94 | +        return FPBits::inf().get_val(); | 
|  | 95 | + | 
|  | 96 | +      switch (fputil::quick_get_round()) { | 
|  | 97 | +      case FE_TONEAREST: | 
|  | 98 | +      case FE_UPWARD: | 
|  | 99 | +        fputil::set_errno_if_required(ERANGE); | 
|  | 100 | +        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); | 
|  | 101 | +        return FPBits::inf().get_val(); | 
|  | 102 | +      default: | 
|  | 103 | +        return FPBits::max_normal().get_val(); | 
|  | 104 | +      } | 
|  | 105 | +    } | 
|  | 106 | + | 
|  | 107 | +    // When x < -11 * log10(2). | 
|  | 108 | +    if (x_u > 0xc29fU) { | 
|  | 109 | +      // exp10m1(-inf) = -1 | 
|  | 110 | +      if (x_bits.is_inf()) | 
|  | 111 | +        return FPBits::one(Sign::NEG).get_val(); | 
|  | 112 | + | 
|  | 113 | +      // When x >= -0x1.ce4p+1, round(10^x - 1, HP, RN) = -0x1.ffcp-1. | 
|  | 114 | +      if (x_u <= 0xc339U) { | 
|  | 115 | +        return fputil::round_result_slightly_down( | 
|  | 116 | +            fputil::cast<float16>(-0x1.ffcp-1)); | 
|  | 117 | +      } | 
|  | 118 | + | 
|  | 119 | +      // When x < -0x1.ce4p+1, round(10^x - 1, HP, RN) = -1. | 
|  | 120 | +      switch (fputil::quick_get_round()) { | 
|  | 121 | +      case FE_TONEAREST: | 
|  | 122 | +      case FE_DOWNWARD: | 
|  | 123 | +        return FPBits::one(Sign::NEG).get_val(); | 
|  | 124 | +      default: | 
|  | 125 | +        return fputil::cast<float16>(-0x1.ffcp-1); | 
|  | 126 | +      } | 
|  | 127 | +    } | 
|  | 128 | + | 
|  | 129 | +    // When |x| <= 2^(-3). | 
|  | 130 | +    if (x_abs <= 0x3000U) { | 
|  | 131 | +      if (LIBC_UNLIKELY(x_abs == 0)) | 
|  | 132 | +        return x; | 
|  | 133 | + | 
|  | 134 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 135 | +      if (auto r = EXP10M1F16_EXCEPTS_LO.lookup(x_u); | 
|  | 136 | +          LIBC_UNLIKELY(r.has_value())) | 
|  | 137 | +        return r.value(); | 
|  | 138 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 139 | + | 
|  | 140 | +      float xf = x; | 
|  | 141 | +      // Degree-5 minimax polynomial generated by Sollya with the following | 
|  | 142 | +      // commands: | 
|  | 143 | +      //   > display = hexadecimal; | 
|  | 144 | +      //   > P = fpminimax((10^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); | 
|  | 145 | +      //   > x * P; | 
|  | 146 | +      return fputil::cast<float16>( | 
|  | 147 | +          xf * fputil::polyeval(xf, 0x1.26bb1cp+1f, 0x1.5351c8p+1f, | 
|  | 148 | +                                0x1.04704p+1f, 0x1.2ce084p+0f, 0x1.14a6bep-1f)); | 
|  | 149 | +    } | 
|  | 150 | +  } | 
|  | 151 | + | 
|  | 152 | +  // When x is 1, 2, or 3. These are hard-to-round cases with exact results. | 
|  | 153 | +  // 10^4 - 1 = 9'999 is not exactly representable as a float16, but luckily the | 
|  | 154 | +  // polynomial approximation gives the correct result for x = 4 in all | 
|  | 155 | +  // rounding modes. | 
|  | 156 | +  if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { | 
|  | 157 | +    switch (x_u) { | 
|  | 158 | +    case 0x3c00U: // x = 1.0f16 | 
|  | 159 | +      return fputil::cast<float16>(9.0); | 
|  | 160 | +    case 0x4000U: // x = 2.0f16 | 
|  | 161 | +      return fputil::cast<float16>(99.0); | 
|  | 162 | +    case 0x4200U: // x = 3.0f16 | 
|  | 163 | +      return fputil::cast<float16>(999.0); | 
|  | 164 | +    } | 
|  | 165 | +  } | 
|  | 166 | + | 
|  | 167 | +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 168 | +  if (auto r = EXP10M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) | 
|  | 169 | +    return r.value(); | 
|  | 170 | +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS | 
|  | 171 | + | 
|  | 172 | +  // exp10(x) = exp2((hi + mid) * log2(10)) * exp10(lo) | 
|  | 173 | +  auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); | 
|  | 174 | +  // exp10m1(x) = exp2((hi + mid) * log2(lo)) * exp10(lo) - 1 | 
|  | 175 | +  return fputil::cast<float16>( | 
|  | 176 | +      fputil::multiply_add(exp2_hi_mid, exp10_lo, -1.0f)); | 
|  | 177 | +} | 
|  | 178 | + | 
|  | 179 | +} // namespace math | 
|  | 180 | + | 
|  | 181 | +} // namespace LIBC_NAMESPACE_DECL | 
|  | 182 | + | 
|  | 183 | +#endif // LIBC_TYPES_HAS_FLOAT16 | 
|  | 184 | + | 
|  | 185 | +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H | 
0 commit comments