Several improvements

mordante · mordante · commit 8041880b30c7 · 2024-08-12T20:13:40.000+02:00
- using availability macros
- handle leading whitespace
- implement inf and nan
- implement negative values
diff --git a/libcxx/include/__charconv/from_chars_floating_point.h b/libcxx/include/__charconv/from_chars_floating_point.h
@@ -31,10 +31,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-_LIBCPP_EXPORTED_FROM_ABI from_chars_result
+_LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_EXPORTED_FROM_ABI from_chars_result
 __from_chars_floating_point(const char* __first, const char* __last, float& __value, chars_format __fmt);
 
-_LIBCPP_EXPORTED_FROM_ABI from_chars_result
+_LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_EXPORTED_FROM_ABI from_chars_result
 __from_chars_floating_point(const char* __first, const char* __last, double& __value, chars_format __fmt);
 
 _LIBCPP_HIDE_FROM_ABI inline from_chars_result
diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h
@@ -87,6 +87,9 @@
 // in all versions of the library are available.
 #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
 
+#  define _LIBCPP_INTRODUCED_IN_LLVM_20 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE /* nothing */
+
 #  define _LIBCPP_INTRODUCED_IN_LLVM_19 1
 #  define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE /* nothing */
 
@@ -132,6 +135,11 @@
 
 // clang-format off
 
+// LLVM 20
+// TODO: Fill this in
+#  define _LIBCPP_INTRODUCED_IN_LLVM_20 0
+#  define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE __attribute__((unavailable))
+
 // LLVM 19
 // TODO: Fill this in
 #  define _LIBCPP_INTRODUCED_IN_LLVM_19 0
@@ -375,6 +383,11 @@
 #define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19
 #define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE
 
+// This controls the availability of floating-point std::from_chars functions.
+// These overloads were added later than the integer overloads.
+#define _LIBCPP_AVAILABILITY_HAS_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20
+#define _LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE
+
 // Define availability attributes that depend on _LIBCPP_HAS_NO_EXCEPTIONS.
 // Those are defined in terms of the availability attributes above, and
 // should not be vendor-specific.
diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp
@@ -77,12 +77,12 @@ to_chars_result to_chars(char* __first, char* __last, long double __value, chars
 
 from_chars_result
 __from_chars_floating_point(const char* __first, const char* __last, float& __value, chars_format __fmt) {
-  return from_chars_floating_point<float>(__first, __last, __value, __fmt);
+  return std::__from_chars_floating_point<float>(__first, __last, __value, __fmt);
 }
 
 from_chars_result
 __from_chars_floating_point(const char* __first, const char* __last, double& __value, chars_format __fmt) {
-  return from_chars_floating_point<double>(__first, __last, __value, __fmt);
+  return std::__from_chars_floating_point<double>(__first, __last, __value, __fmt);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/src/include/from_chars_floating_point.h b/libcxx/src/include/from_chars_floating_point.h
@@ -14,27 +14,124 @@
 
 #include <__assert>
 #include <__config>
+#include <cctype>
 #include <charconv>
+#include <concepts>
 #include <limits>
+#include <cstring>
 #include <type_traits>
 
 // Included for the _Floating_type_traits class
 #include "to_chars_floating_point.h"
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <typename _Tp, __enable_if_t<std::is_floating_point<_Tp>::value, int> = 0>
-from_chars_result from_chars_floating_point(const char* __first, const char* __last, _Tp& __value, chars_format __fmt) {
+// Parses an infinity string.
+// Valid strings are case insentitive and contain INF or INFINITY.
+//
+// - __first is the first argument to std::from_chars. When the string is invalid
+//   this value is returned as ptr in the result.
+// - __last is the last argument of std::from_chars.
+// - __value is the value argument of std::from_chars,
+// - __ptr is the current position is the input string. This is points beyond
+//   the initial I character.
+// - __negative whether a valid string represents -inf or +inf.
+template <floating_point _Tp>
+from_chars_result __from_chars_floating_point_inf(
+    const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
+  if (__last - __ptr < 2) [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  __ptr += 2;
+
+  // At this point the result is valid and contains INF.
+  // When the remaining part contains INITY this will be consumed. Otherwise
+  // only INF is consumed. For example INFINITZ will consume INF and ignore
+  // INITZ.
+
+  if (__last - __ptr >= 5              //
+      && std::tolower(__ptr[0]) == 'i' //
+      && std::tolower(__ptr[1]) == 'n' //
+      && std::tolower(__ptr[2]) == 'i' //
+      && std::tolower(__ptr[3]) == 't' //
+      && std::tolower(__ptr[4]) == 'y')
+    __ptr += 5;
+
+  if constexpr (numeric_limits<_Tp>::has_infinity) {
+    if (__negative)
+      __value = -std::numeric_limits<_Tp>::infinity();
+    else
+      __value = std::numeric_limits<_Tp>::infinity();
+
+    return {__ptr, std::errc{}};
+  } else {
+    return {__ptr, errc::result_out_of_range};
+  }
+}
+
+// Parses an infinita string.
+// Valid strings are case insentitive and contain INF or INFINITY.
+//
+// - __first is the first argument to std::from_chars. When the string is invalid
+//   this value is returned as ptr in the result.
+// - __last is the last argument of std::from_chars.
+// - __value is the value argument of std::from_chars,
+// - __ptr is the current position is the input string. This is points beyond
+//   the initial N character.
+// - __negative whether a valid string represents -nan or +nan.
+template <floating_point _Tp>
+from_chars_result __from_chars_floating_point_nan(
+    const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
+  if (__last - __ptr < 2) [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  __ptr += 2;
+
+  // At this point the result is valid and contains NAN. When the remaining
+  // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
+  // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
+  // (abcd.
+  if (__last - __ptr >= 2 && __ptr[0] == '(') {
+    size_t __offset = 1;
+    do {
+      if (__ptr[__offset] == ')') {
+        __ptr += __offset + 1;
+        break;
+      }
+      if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
+        break;
+      ++__offset;
+    } while (__ptr + __offset != __last);
+  }
+
+  if (__negative)
+    __value = -std::numeric_limits<_Tp>::quiet_NaN();
+  else
+    __value = std::numeric_limits<_Tp>::quiet_NaN();
+
+  return {__ptr, std::errc{}};
+}
+
+template <floating_point _Tp>
+from_chars_result __from_chars_floating_point_decimal(
+    const char* const __first,
+    const char* __last,
+    _Tp& __value,
+    chars_format __fmt,
+    const char* __ptr,
+    bool __negative) {
   using _Traits    = _Floating_type_traits<_Tp>;
   using _Uint_type = typename _Traits::_Uint_type;
   ptrdiff_t length = __last - __first;
   _LIBCPP_ASSERT_INTERNAL(length > 0, "");
 
-  // hacky parsing code as example. Not intended for actual use. I'm just going to handle the base 10
-  // chars_format::general case. Also, no sign, inf, or nan handling.
-  _LIBCPP_ASSERT_INTERNAL(__fmt == std::chars_format::general, "");
-
-  const char* src = __first; // rename to match the libc code copied for this section.
+  const char* src = __ptr; // rename to match the libc code copied for this section.
 
   _Uint_type mantissa            = 0;
   int exponent                   = 0;
@@ -123,10 +220,67 @@ from_chars_result from_chars_floating_point(const char* __first, const char* __l
   auto result = LIBC_NAMESPACE::fputil::FPBits<_Tp>();
   result.set_mantissa(expanded_float.mantissa);
   result.set_biased_exponent(expanded_float.exponent);
-  __value = result.get_val();
+  if (__negative)
+    __value = -result.get_val();
+  else
+    __value = result.get_val();
   return {src + index, {}};
 }
 
+template <floating_point _Tp>
+from_chars_result
+__from_chars_floating_point(const char* const __first, const char* __last, _Tp& __value, chars_format __fmt) {
+  if (__first == __last) [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  const char* __ptr = __first;
+
+  // skip whitespace
+  while (std::isspace(*__ptr)) {
+    ++__ptr;
+    if (__ptr == __last) [[unlikely]]
+      return {__first, errc::invalid_argument}; // is this valid??
+  }
+
+  bool __negative = *__ptr == '-';
+  if (__negative) {
+    ++__ptr;
+    if (__ptr == __last) [[unlikely]]
+      return {__first, errc::invalid_argument};
+  }
+
+  if (!std::isdigit(*__ptr)) {
+    // TODO Evaluate the other implementations
+    // [charconv.from.chars]/6.2
+    //   if fmt has chars_format::scientific set but not chars_format::fixed,
+    //   the otherwise optional exponent part shall appear;
+    // Since INF/NAN do not have an exponent this value is not valid.
+    // See LWG3456
+    if (__fmt == chars_format::scientific)
+      return {__first, errc::invalid_argument};
+
+    switch (std::tolower(*__ptr)) {
+    case 'i':
+      return __from_chars_floating_point_inf(__first, __last, __value, __ptr + 1, __negative);
+    case 'n':
+      if constexpr (numeric_limits<_Tp>::has_quiet_NaN)
+        return __from_chars_floating_point_nan(__first, __last, __value, __ptr + 1, __negative);
+      [[fallthrough]];
+    default:
+      return {__first, errc::invalid_argument};
+    }
+  }
+
+#if 1
+  _LIBCPP_ASSERT_INTERNAL(__fmt == std::chars_format::general, "");
+#else
+  if (__fmt == chars_format::hex)
+    return std::__from_chars_floating_point_hex(__first, __last, __value);
+#endif
+
+  return std::__from_chars_floating_point_decimal(__first, __last, __value, __fmt, __ptr, __negative);
+}
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
diff --git a/libcxx/test/std/utilities/charconv/charconv.from.chars/float.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.from.chars/float.pass.cpp

Original file line number	Diff line number	Diff line change
`@@ -77,12 +77,12 @@ to_chars_result to_chars(char* __first, char* __last, long double __value, chars`
`77`	`77`
`78`	`78`	`from_chars_result`
`79`	`79`	`__from_chars_floating_point(const char* __first, const char* __last, float& __value, chars_format __fmt) {`
`80`		`- return from_chars_floating_point<float>(__first, __last, __value, __fmt);`
	`80`	`+ return std::__from_chars_floating_point<float>(__first, __last, __value, __fmt);`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`from_chars_result`
`84`	`84`	`__from_chars_floating_point(const char* __first, const char* __last, double& __value, chars_format __fmt) {`
`85`		`- return from_chars_floating_point<double>(__first, __last, __value, __fmt);`
	`85`	`+ return std::__from_chars_floating_point<double>(__first, __last, __value, __fmt);`
`86`	`86`	`}`
`87`	`87`
`88`	`88`	`_LIBCPP_END_NAMESPACE_STD`