@@ -91,16 +91,20 @@ CXX20_CONSTEXPR fastfloat_really_inline bool is_made_of_eight_digits_fast(const
91
91
return is_made_of_eight_digits_fast (read_u64 (chars));
92
92
}
93
93
94
+ typedef span<const char > byte_span;
95
+
94
96
struct parsed_number_string {
95
- int64_t exponent;
96
- uint64_t mantissa;
97
- const char *lastmatch;
98
- bool negative;
99
- bool valid;
100
- bool too_many_digits;
97
+ int64_t exponent{0 };
98
+ uint64_t mantissa{0 };
99
+ const char *lastmatch{nullptr };
100
+ bool negative{false };
101
+ bool valid{false };
102
+ bool too_many_digits{false };
103
+ // contains the range of the significant digits
104
+ byte_span integer{}; // non-nullable
105
+ byte_span fraction{}; // nullable
101
106
};
102
107
103
-
104
108
// Assuming that you use no more than 19 digits, this will
105
109
// parse an ASCII string.
106
110
CXX20_CONSTEXPR fastfloat_really_inline
@@ -125,6 +129,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
125
129
126
130
uint64_t i = 0 ; // an unsigned int avoids signed overflows (which are bad)
127
131
132
+ while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
133
+ i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
134
+ p += 8 ;
135
+ }
128
136
while ((p != pend) && is_integer (*p)) {
129
137
// a multiplication by 10 is cheaper than an arbitrary integer
130
138
// multiplication
@@ -134,24 +142,24 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
134
142
}
135
143
const char *const end_of_integer_part = p;
136
144
int64_t digit_count = int64_t (end_of_integer_part - start_digits);
145
+ answer.integer = byte_span (start_digits, size_t (digit_count));
137
146
int64_t exponent = 0 ;
138
147
if ((p != pend) && (*p == decimal_point)) {
139
148
++p;
140
- // Fast approach only tested under little endian systems
141
- if ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
142
- i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
143
- p += 8 ;
144
- if ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
149
+ const char * before = p;
150
+ // can occur at most twice without overflowing, but let it occur more, since
151
+ // for integers with many digits, digit parsing is the primary bottleneck.
152
+ while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
145
153
i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
146
154
p += 8 ;
147
155
}
148
- }
149
156
while ((p != pend) && is_integer (*p)) {
150
157
uint8_t digit = uint8_t (*p - ' 0' );
151
158
++p;
152
159
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
153
160
}
154
- exponent = end_of_integer_part + 1 - p;
161
+ exponent = before - p;
162
+ answer.fraction = byte_span (before, size_t (p - before));
155
163
digit_count -= exponent;
156
164
}
157
165
// we must have encountered at least one integer!
@@ -179,7 +187,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
179
187
} else {
180
188
while ((p != pend) && is_integer (*p)) {
181
189
uint8_t digit = uint8_t (*p - ' 0' );
182
- if (exp_number < 0x10000 ) {
190
+ if (exp_number < 0x10000000 ) {
183
191
exp_number = 10 * exp_number + digit;
184
192
}
185
193
++p;
@@ -212,23 +220,26 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
212
220
if (digit_count > 19 ) {
213
221
answer.too_many_digits = true ;
214
222
// Let us start again, this time, avoiding overflows.
223
+ // We don't need to check if is_integer, since we use the
224
+ // pre-tokenized spans from above.
215
225
i = 0 ;
216
- p = start_digits;
226
+ p = answer.integer .ptr ;
227
+ const char * int_end = p + answer.integer .len ();
217
228
const uint64_t minimal_nineteen_digit_integer{1000000000000000000 };
218
- while ((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer (*p )) {
229
+ while ((i < minimal_nineteen_digit_integer) && (p != int_end )) {
219
230
i = i * 10 + uint64_t (*p - ' 0' );
220
231
++p;
221
232
}
222
233
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
223
234
exponent = end_of_integer_part - p + exp_number;
224
235
} else { // We have a value with a fractional component.
225
- p++; // skip the dot
226
- const char *first_after_period = p;
227
- while ((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer (*p )) {
236
+ p = answer. fraction . ptr ;
237
+ const char * frac_end = p + answer. fraction . len () ;
238
+ while ((i < minimal_nineteen_digit_integer) && (p != frac_end )) {
228
239
i = i * 10 + uint64_t (*p - ' 0' );
229
240
++p;
230
241
}
231
- exponent = first_after_period - p + exp_number;
242
+ exponent = answer. fraction . ptr - p + exp_number;
232
243
}
233
244
// We have now corrected both exponent and i, to a truncated value
234
245
}
@@ -238,108 +249,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
238
249
return answer;
239
250
}
240
251
241
-
242
- // This should always succeed since it follows a call to parse_number_string
243
- // This function could be optimized. In particular, we could stop after 19 digits
244
- // and try to bail out. Furthermore, we should be able to recover the computed
245
- // exponent from the pass in parse_number_string.
246
- CXX20_CONSTEXPR fastfloat_really_inline decimal parse_decimal (const char *p, const char *pend, parse_options options) noexcept {
247
- const char decimal_point = options.decimal_point ;
248
-
249
- decimal answer;
250
- answer.num_digits = 0 ;
251
- answer.decimal_point = 0 ;
252
- answer.truncated = false ;
253
- answer.negative = (*p == ' -' );
254
- if (*p == ' -' ) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
255
- ++p;
256
- }
257
- // skip leading zeroes
258
- while ((p != pend) && (*p == ' 0' )) {
259
- ++p;
260
- }
261
- while ((p != pend) && is_integer (*p)) {
262
- if (answer.num_digits < max_digits) {
263
- answer.digits [answer.num_digits ] = uint8_t (*p - ' 0' );
264
- }
265
- answer.num_digits ++;
266
- ++p;
267
- }
268
- if ((p != pend) && (*p == decimal_point)) {
269
- ++p;
270
- const char *first_after_period = p;
271
- // if we have not yet encountered a zero, we have to skip it as well
272
- if (answer.num_digits == 0 ) {
273
- // skip zeros
274
- while ((p != pend) && (*p == ' 0' )) {
275
- ++p;
276
- }
277
- }
278
- // We expect that this loop will often take the bulk of the running time
279
- // because when a value has lots of digits, these digits often
280
- while ((std::distance (p, pend) >= 8 ) && (answer.num_digits + 8 < max_digits)) {
281
- uint64_t val = read_u64 (p);
282
- if (! is_made_of_eight_digits_fast (val)) { break ; }
283
- // We have eight digits, process them in one go!
284
- val -= 0x3030303030303030 ;
285
- write_u64 (answer.digits + answer.num_digits , val);
286
- answer.num_digits += 8 ;
287
- p += 8 ;
288
- }
289
- while ((p != pend) && is_integer (*p)) {
290
- if (answer.num_digits < max_digits) {
291
- answer.digits [answer.num_digits ] = uint8_t (*p - ' 0' );
292
- }
293
- answer.num_digits ++;
294
- ++p;
295
- }
296
- answer.decimal_point = int32_t (first_after_period - p);
297
- }
298
- // We want num_digits to be the number of significant digits, excluding
299
- // leading *and* trailing zeros! Otherwise the truncated flag later is
300
- // going to be misleading.
301
- if (answer.num_digits > 0 ) {
302
- // We potentially need the answer.num_digits > 0 guard because we
303
- // prune leading zeros. So with answer.num_digits > 0, we know that
304
- // we have at least one non-zero digit.
305
- const char *preverse = p - 1 ;
306
- int32_t trailing_zeros = 0 ;
307
- while ((*preverse == ' 0' ) || (*preverse == decimal_point)) {
308
- if (*preverse == ' 0' ) { trailing_zeros++; };
309
- --preverse;
310
- }
311
- answer.decimal_point += int32_t (answer.num_digits );
312
- answer.num_digits -= uint32_t (trailing_zeros);
313
- }
314
- if (answer.num_digits > max_digits) {
315
- answer.truncated = true ;
316
- answer.num_digits = max_digits;
317
- }
318
- if ((p != pend) && ((' e' == *p) || (' E' == *p))) {
319
- ++p;
320
- bool neg_exp = false ;
321
- if ((p != pend) && (' -' == *p)) {
322
- neg_exp = true ;
323
- ++p;
324
- } else if ((p != pend) && (' +' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
325
- ++p;
326
- }
327
- int32_t exp_number = 0 ; // exponential part
328
- while ((p != pend) && is_integer (*p)) {
329
- uint8_t digit = uint8_t (*p - ' 0' );
330
- if (exp_number < 0x10000 ) {
331
- exp_number = 10 * exp_number + digit;
332
- }
333
- ++p;
334
- }
335
- answer.decimal_point += (neg_exp ? -exp_number : exp_number);
336
- }
337
- // In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
338
- // assume that all digits up to max_digit_without_overflow have been initialized.
339
- for (uint32_t i = answer.num_digits ; i < max_digit_without_overflow; i++) { answer.digits [i] = 0 ; }
340
-
341
- return answer;
342
- }
343
252
} // namespace fast_float
344
253
345
254
#endif
0 commit comments