Skip to content

Commit 3f0ba09

Browse files
authored
Merge pull request #96 from Alexhuszagh/bigint
Implement the big-integer arithmetic algorithm.
2 parents 25b240a + fc0c868 commit 3f0ba09

File tree

8 files changed

+1140
-189
lines changed

8 files changed

+1140
-189
lines changed

include/fast_float/ascii_number.h

Lines changed: 32 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,20 @@ CXX20_CONSTEXPR fastfloat_really_inline bool is_made_of_eight_digits_fast(const
9191
return is_made_of_eight_digits_fast(read_u64(chars));
9292
}
9393

94+
typedef span<const char> byte_span;
95+
9496
struct parsed_number_string {
95-
int64_t exponent;
96-
uint64_t mantissa;
97-
const char *lastmatch;
98-
bool negative;
99-
bool valid;
100-
bool too_many_digits;
97+
int64_t exponent{0};
98+
uint64_t mantissa{0};
99+
const char *lastmatch{nullptr};
100+
bool negative{false};
101+
bool valid{false};
102+
bool too_many_digits{false};
103+
// contains the range of the significant digits
104+
byte_span integer{}; // non-nullable
105+
byte_span fraction{}; // nullable
101106
};
102107

103-
104108
// Assuming that you use no more than 19 digits, this will
105109
// parse an ASCII string.
106110
CXX20_CONSTEXPR fastfloat_really_inline
@@ -125,6 +129,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
125129

126130
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
127131

132+
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
133+
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
134+
p += 8;
135+
}
128136
while ((p != pend) && is_integer(*p)) {
129137
// a multiplication by 10 is cheaper than an arbitrary integer
130138
// multiplication
@@ -134,24 +142,24 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
134142
}
135143
const char *const end_of_integer_part = p;
136144
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
145+
answer.integer = byte_span(start_digits, size_t(digit_count));
137146
int64_t exponent = 0;
138147
if ((p != pend) && (*p == decimal_point)) {
139148
++p;
140-
// Fast approach only tested under little endian systems
141-
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
142-
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
143-
p += 8;
144-
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
149+
const char* before = p;
150+
// can occur at most twice without overflowing, but let it occur more, since
151+
// for integers with many digits, digit parsing is the primary bottleneck.
152+
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
145153
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
146154
p += 8;
147155
}
148-
}
149156
while ((p != pend) && is_integer(*p)) {
150157
uint8_t digit = uint8_t(*p - '0');
151158
++p;
152159
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
153160
}
154-
exponent = end_of_integer_part + 1 - p;
161+
exponent = before - p;
162+
answer.fraction = byte_span(before, size_t(p - before));
155163
digit_count -= exponent;
156164
}
157165
// we must have encountered at least one integer!
@@ -179,7 +187,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
179187
} else {
180188
while ((p != pend) && is_integer(*p)) {
181189
uint8_t digit = uint8_t(*p - '0');
182-
if (exp_number < 0x10000) {
190+
if (exp_number < 0x10000000) {
183191
exp_number = 10 * exp_number + digit;
184192
}
185193
++p;
@@ -212,23 +220,26 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
212220
if (digit_count > 19) {
213221
answer.too_many_digits = true;
214222
// Let us start again, this time, avoiding overflows.
223+
// We don't need to check if is_integer, since we use the
224+
// pre-tokenized spans from above.
215225
i = 0;
216-
p = start_digits;
226+
p = answer.integer.ptr;
227+
const char* int_end = p + answer.integer.len();
217228
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
218-
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
229+
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
219230
i = i * 10 + uint64_t(*p - '0');
220231
++p;
221232
}
222233
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
223234
exponent = end_of_integer_part - p + exp_number;
224235
} else { // We have a value with a fractional component.
225-
p++; // skip the dot
226-
const char *first_after_period = p;
227-
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
236+
p = answer.fraction.ptr;
237+
const char* frac_end = p + answer.fraction.len();
238+
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
228239
i = i * 10 + uint64_t(*p - '0');
229240
++p;
230241
}
231-
exponent = first_after_period - p + exp_number;
242+
exponent = answer.fraction.ptr - p + exp_number;
232243
}
233244
// We have now corrected both exponent and i, to a truncated value
234245
}
@@ -238,108 +249,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
238249
return answer;
239250
}
240251

241-
242-
// This should always succeed since it follows a call to parse_number_string
243-
// This function could be optimized. In particular, we could stop after 19 digits
244-
// and try to bail out. Furthermore, we should be able to recover the computed
245-
// exponent from the pass in parse_number_string.
246-
CXX20_CONSTEXPR fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend, parse_options options) noexcept {
247-
const char decimal_point = options.decimal_point;
248-
249-
decimal answer;
250-
answer.num_digits = 0;
251-
answer.decimal_point = 0;
252-
answer.truncated = false;
253-
answer.negative = (*p == '-');
254-
if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
255-
++p;
256-
}
257-
// skip leading zeroes
258-
while ((p != pend) && (*p == '0')) {
259-
++p;
260-
}
261-
while ((p != pend) && is_integer(*p)) {
262-
if (answer.num_digits < max_digits) {
263-
answer.digits[answer.num_digits] = uint8_t(*p - '0');
264-
}
265-
answer.num_digits++;
266-
++p;
267-
}
268-
if ((p != pend) && (*p == decimal_point)) {
269-
++p;
270-
const char *first_after_period = p;
271-
// if we have not yet encountered a zero, we have to skip it as well
272-
if(answer.num_digits == 0) {
273-
// skip zeros
274-
while ((p != pend) && (*p == '0')) {
275-
++p;
276-
}
277-
}
278-
// We expect that this loop will often take the bulk of the running time
279-
// because when a value has lots of digits, these digits often
280-
while ((std::distance(p, pend) >= 8) && (answer.num_digits + 8 < max_digits)) {
281-
uint64_t val = read_u64(p);
282-
if(! is_made_of_eight_digits_fast(val)) { break; }
283-
// We have eight digits, process them in one go!
284-
val -= 0x3030303030303030;
285-
write_u64(answer.digits + answer.num_digits, val);
286-
answer.num_digits += 8;
287-
p += 8;
288-
}
289-
while ((p != pend) && is_integer(*p)) {
290-
if (answer.num_digits < max_digits) {
291-
answer.digits[answer.num_digits] = uint8_t(*p - '0');
292-
}
293-
answer.num_digits++;
294-
++p;
295-
}
296-
answer.decimal_point = int32_t(first_after_period - p);
297-
}
298-
// We want num_digits to be the number of significant digits, excluding
299-
// leading *and* trailing zeros! Otherwise the truncated flag later is
300-
// going to be misleading.
301-
if(answer.num_digits > 0) {
302-
// We potentially need the answer.num_digits > 0 guard because we
303-
// prune leading zeros. So with answer.num_digits > 0, we know that
304-
// we have at least one non-zero digit.
305-
const char *preverse = p - 1;
306-
int32_t trailing_zeros = 0;
307-
while ((*preverse == '0') || (*preverse == decimal_point)) {
308-
if(*preverse == '0') { trailing_zeros++; };
309-
--preverse;
310-
}
311-
answer.decimal_point += int32_t(answer.num_digits);
312-
answer.num_digits -= uint32_t(trailing_zeros);
313-
}
314-
if(answer.num_digits > max_digits) {
315-
answer.truncated = true;
316-
answer.num_digits = max_digits;
317-
}
318-
if ((p != pend) && (('e' == *p) || ('E' == *p))) {
319-
++p;
320-
bool neg_exp = false;
321-
if ((p != pend) && ('-' == *p)) {
322-
neg_exp = true;
323-
++p;
324-
} else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
325-
++p;
326-
}
327-
int32_t exp_number = 0; // exponential part
328-
while ((p != pend) && is_integer(*p)) {
329-
uint8_t digit = uint8_t(*p - '0');
330-
if (exp_number < 0x10000) {
331-
exp_number = 10 * exp_number + digit;
332-
}
333-
++p;
334-
}
335-
answer.decimal_point += (neg_exp ? -exp_number : exp_number);
336-
}
337-
// In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
338-
// assume that all digits up to max_digit_without_overflow have been initialized.
339-
for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; }
340-
341-
return answer;
342-
}
343252
} // namespace fast_float
344253

345254
#endif

0 commit comments

Comments
 (0)