Skip to content

Commit f2072fc

Browse files
committed
Several improvements
- address review comment - CI fixes - implemented scientific - implemented fixed - adjust pattern to subject sequence instead of allowing whitespace - Implemented out of range issues - Partly enabled the MSVC tests for from_chars floating-point - Fixes exponent out of bounds reading
1 parent 8041880 commit f2072fc

File tree

7 files changed

+706
-79
lines changed

7 files changed

+706
-79
lines changed

libcxx/src/include/from_chars_floating_point.h

Lines changed: 101 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <charconv>
1919
#include <concepts>
2020
#include <limits>
21-
#include <cstring>
2221
#include <type_traits>
2322

2423
// Included for the _Floating_type_traits class
@@ -36,9 +35,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3635
// - __ptr is the current position is the input string. This is points beyond
3736
// the initial I character.
3837
// - __negative whether a valid string represents -inf or +inf.
39-
template <floating_point _Tp>
38+
template <floating_point _Fp>
4039
from_chars_result __from_chars_floating_point_inf(
41-
const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
40+
const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
4241
if (__last - __ptr < 2) [[unlikely]]
4342
return {__first, errc::invalid_argument};
4443

@@ -60,19 +59,19 @@ from_chars_result __from_chars_floating_point_inf(
6059
&& std::tolower(__ptr[4]) == 'y')
6160
__ptr += 5;
6261

63-
if constexpr (numeric_limits<_Tp>::has_infinity) {
62+
if constexpr (numeric_limits<_Fp>::has_infinity) {
6463
if (__negative)
65-
__value = -std::numeric_limits<_Tp>::infinity();
64+
__value = -std::numeric_limits<_Fp>::infinity();
6665
else
67-
__value = std::numeric_limits<_Tp>::infinity();
66+
__value = std::numeric_limits<_Fp>::infinity();
6867

6968
return {__ptr, std::errc{}};
7069
} else {
7170
return {__ptr, errc::result_out_of_range};
7271
}
7372
}
7473

75-
// Parses an infinita string.
74+
// Parses a nan string.
7675
// Valid strings are case insentitive and contain INF or INFINITY.
7776
//
7877
// - __first is the first argument to std::from_chars. When the string is invalid
@@ -82,9 +81,9 @@ from_chars_result __from_chars_floating_point_inf(
8281
// - __ptr is the current position is the input string. This is points beyond
8382
// the initial N character.
8483
// - __negative whether a valid string represents -nan or +nan.
85-
template <floating_point _Tp>
84+
template <floating_point _Fp>
8685
from_chars_result __from_chars_floating_point_nan(
87-
const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
86+
const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
8887
if (__last - __ptr < 2) [[unlikely]]
8988
return {__first, errc::invalid_argument};
9089

@@ -111,32 +110,33 @@ from_chars_result __from_chars_floating_point_nan(
111110
}
112111

113112
if (__negative)
114-
__value = -std::numeric_limits<_Tp>::quiet_NaN();
113+
__value = -std::numeric_limits<_Fp>::quiet_NaN();
115114
else
116-
__value = std::numeric_limits<_Tp>::quiet_NaN();
115+
__value = std::numeric_limits<_Fp>::quiet_NaN();
117116

118117
return {__ptr, std::errc{}};
119118
}
120119

121-
template <floating_point _Tp>
120+
template <floating_point _Fp>
122121
from_chars_result __from_chars_floating_point_decimal(
123122
const char* const __first,
124123
const char* __last,
125-
_Tp& __value,
124+
_Fp& __value,
126125
chars_format __fmt,
127126
const char* __ptr,
128127
bool __negative) {
129-
using _Traits = _Floating_type_traits<_Tp>;
128+
using _Traits = _Floating_type_traits<_Fp>;
130129
using _Uint_type = typename _Traits::_Uint_type;
131-
ptrdiff_t length = __last - __first;
132-
_LIBCPP_ASSERT_INTERNAL(length > 0, "");
133130

134-
const char* src = __ptr; // rename to match the libc code copied for this section.
131+
const char* src = __ptr; // rename to match the libc code copied for this section.
132+
ptrdiff_t length = __last - src;
133+
_LIBCPP_ASSERT_INTERNAL(length > 0, "");
135134

136135
_Uint_type mantissa = 0;
137136
int exponent = 0;
138137
bool truncated = false;
139138
bool seen_digit = false;
139+
bool has_valid_exponent = false;
140140
bool after_decimal = false;
141141
size_t index = 0;
142142
const size_t BASE = 10;
@@ -145,6 +145,7 @@ from_chars_result __from_chars_floating_point_decimal(
145145

146146
// The loop fills the mantissa with as many digits as it can hold
147147
const _Uint_type bitstype_max_div_by_base = numeric_limits<_Uint_type>::max() / BASE;
148+
148149
while (index < static_cast<size_t>(length)) {
149150
if (LIBC_NAMESPACE::internal::isdigit(src[index])) {
150151
uint32_t digit = src[index] - '0';
@@ -178,7 +179,7 @@ from_chars_result __from_chars_floating_point_decimal(
178179
}
179180

180181
if (!seen_digit)
181-
return {src + index, {}};
182+
return {__first, errc::invalid_argument};
182183

183184
if (index < static_cast<size_t>(length) && LIBC_NAMESPACE::internal::tolower(src[index]) == EXPONENT_MARKER) {
184185
bool has_sign = false;
@@ -187,8 +188,10 @@ from_chars_result __from_chars_floating_point_decimal(
187188
}
188189
if (index + 1 + static_cast<size_t>(has_sign) < static_cast<size_t>(length) &&
189190
LIBC_NAMESPACE::internal::isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
191+
has_valid_exponent = true;
190192
++index;
191-
auto result = LIBC_NAMESPACE::internal::strtointeger<int32_t>(src + index, 10);
193+
auto result =
194+
LIBC_NAMESPACE::internal::strtointeger<int32_t>(src + index, 10, static_cast<size_t>(length) - index);
192195
// if (result.has_error())
193196
// output.error = result.error;
194197
int32_t add_to_exponent = result.value;
@@ -199,57 +202,102 @@ from_chars_result __from_chars_floating_point_decimal(
199202

200203
// If the result is in the valid range, then we use it. The valid range is
201204
// also within the int32 range, so this prevents overflow issues.
202-
if (temp_exponent > LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
203-
exponent = LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
204-
} else if (temp_exponent < -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
205-
exponent = -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
205+
if (temp_exponent > LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT) {
206+
exponent = LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT;
207+
} else if (temp_exponent < -LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT) {
208+
exponent = -LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT;
206209
} else {
207210
exponent = static_cast<int32_t>(temp_exponent);
208211
}
209212
}
210213
}
211214

212-
LIBC_NAMESPACE::internal::ExpandedFloat<_Tp> expanded_float = {0, 0};
215+
// [charconv.from.chars]
216+
switch (__fmt) {
217+
case chars_format::scientific:
218+
// 6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
219+
// the otherwise optional exponent part shall appear;
220+
if (!has_valid_exponent)
221+
return {__first, errc::invalid_argument};
222+
break;
223+
case chars_format::fixed:
224+
// 6.3 if fmt has chars_format::fixed set but not chars_format::scientific,
225+
// the optional exponent part shall not appear;
226+
if (has_valid_exponent)
227+
return {__first, errc::invalid_argument};
228+
break;
229+
case chars_format::general:
230+
case chars_format::hex: // impossible but it silences the compiler
231+
break;
232+
}
233+
234+
LIBC_NAMESPACE::internal::ExpandedFloat<_Fp> expanded_float = {0, 0};
235+
errc status{};
213236
if (mantissa != 0) {
214-
auto temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Tp>(
237+
auto temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
215238
{mantissa, exponent}, truncated, LIBC_NAMESPACE::internal::RoundDirection::Nearest, src, length);
216239
expanded_float = temp.num;
217-
// Note: there's also an error value in temp.error. I'm not doing that error handling right now though.
240+
if (temp.error == ERANGE) {
241+
status = errc::result_out_of_range;
242+
}
218243
}
219244

220-
auto result = LIBC_NAMESPACE::fputil::FPBits<_Tp>();
245+
auto result = LIBC_NAMESPACE::fputil::FPBits<_Fp>();
221246
result.set_mantissa(expanded_float.mantissa);
222247
result.set_biased_exponent(expanded_float.exponent);
248+
249+
// C17 7.12.1/6
250+
// The result underflows if the magnitude of the mathematical result is so
251+
// small that the mathematical re- sult cannot be represented, without
252+
// extraordinary roundoff error, in an object of the specified type.237) If
253+
// the result underflows, the function returns an implementation-defined
254+
// value whose magnitude is no greater than the smallest normalized positive
255+
// number in the specified type; if the integer expression math_errhandling
256+
// & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
257+
// implementation-defined; if the integer expression math_errhandling &
258+
// MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
259+
// exception is raised is implementation-defined.
260+
//
261+
// LLLVM-LIBC sets ERAGNE for subnormal values
262+
//
263+
// [charconv.from.chars]/1
264+
// ... If the parsed value is not in the range representable by the type of
265+
// value, value is unmodified and the member ec of the return value is
266+
// equal to errc::result_out_of_range. ...
267+
//
268+
// Undo the ERANGE for subnormal values.
269+
if (status == errc::result_out_of_range && result.is_subnormal() && !result.is_zero())
270+
status = errc{};
271+
223272
if (__negative)
224273
__value = -result.get_val();
225274
else
226275
__value = result.get_val();
227-
return {src + index, {}};
276+
277+
return {src + index, status};
228278
}
229279

230-
template <floating_point _Tp>
280+
template <floating_point _Fp>
231281
from_chars_result
232-
__from_chars_floating_point(const char* const __first, const char* __last, _Tp& __value, chars_format __fmt) {
282+
__from_chars_floating_point(const char* const __first, const char* __last, _Fp& __value, chars_format __fmt) {
233283
if (__first == __last) [[unlikely]]
234284
return {__first, errc::invalid_argument};
235285

236286
const char* __ptr = __first;
237-
238-
// skip whitespace
239-
while (std::isspace(*__ptr)) {
240-
++__ptr;
241-
if (__ptr == __last) [[unlikely]]
242-
return {__first, errc::invalid_argument}; // is this valid??
243-
}
244-
245-
bool __negative = *__ptr == '-';
287+
bool __negative = *__ptr == '-';
246288
if (__negative) {
247289
++__ptr;
248290
if (__ptr == __last) [[unlikely]]
249291
return {__first, errc::invalid_argument};
250292
}
251293

252-
if (!std::isdigit(*__ptr)) {
294+
// [charconv.from.chars]
295+
// [Note 1: If the pattern allows for an optional sign, but the string has
296+
// no digit characters following the sign, no characters match the pattern.
297+
// — end note]
298+
// This is true for integrals, floating point allows -.0
299+
switch (std::tolower(*__ptr)) {
300+
case 'i':
253301
// TODO Evaluate the other implementations
254302
// [charconv.from.chars]/6.2
255303
// if fmt has chars_format::scientific set but not chars_format::fixed,
@@ -259,20 +307,23 @@ __from_chars_floating_point(const char* const __first, const char* __last, _Tp&
259307
if (__fmt == chars_format::scientific)
260308
return {__first, errc::invalid_argument};
261309

262-
switch (std::tolower(*__ptr)) {
263-
case 'i':
264-
return __from_chars_floating_point_inf(__first, __last, __value, __ptr + 1, __negative);
265-
case 'n':
266-
if constexpr (numeric_limits<_Tp>::has_quiet_NaN)
267-
return __from_chars_floating_point_nan(__first, __last, __value, __ptr + 1, __negative);
268-
[[fallthrough]];
269-
default:
310+
return __from_chars_floating_point_inf(__first, __last, __value, __ptr + 1, __negative);
311+
case 'n':
312+
// TODO Evaluate the other implementations
313+
// [charconv.from.chars]/6.2
314+
// if fmt has chars_format::scientific set but not chars_format::fixed,
315+
// the otherwise optional exponent part shall appear;
316+
// Since INF/NAN do not have an exponent this value is not valid.
317+
// See LWG3456
318+
if (__fmt == chars_format::scientific)
270319
return {__first, errc::invalid_argument};
271-
}
320+
if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
321+
return __from_chars_floating_point_nan(__first, __last, __value, __ptr + 1, __negative);
322+
return {__first, errc::invalid_argument};
272323
}
273324

274325
#if 1
275-
_LIBCPP_ASSERT_INTERNAL(__fmt == std::chars_format::general, "");
326+
_LIBCPP_ASSERT_INTERNAL(__fmt != std::chars_format::hex, "");
276327
#else
277328
if (__fmt == chars_format::hex)
278329
return std::__from_chars_floating_point_hex(__first, __last, __value);

0 commit comments

Comments
 (0)