Skip to content

Commit 1e237d4

Browse files
byrootsamyron
andcommitted
Use SWAR for parsing integers on little endian machines
Closes: #878 ``` == Parsing float parsing (2251051 bytes) ruby 3.4.6 (2025-09-16 revision dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 23.000 i/100ms Calculating ------------------------------------- after 214.382 (± 0.5%) i/s (4.66 ms/i) - 1.081k in 5.042555s Comparison: before: 189.5 i/s after: 214.4 i/s - 1.13x faster ``` Co-Authored-By: Scott Myron <samyron@gmail.com>
1 parent 11f4e7b commit 1e237d4

File tree

1 file changed

+53
-1
lines changed

1 file changed

+53
-1
lines changed

ext/json/ext/parser/parser.c

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,11 +1039,63 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10391039
return Qfalse;
10401040
}
10411041

1042+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1043+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
1044+
// Additional References:
1045+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
1046+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
1047+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
1048+
const uint64_t mask = 0x000000FF000000FF;
1049+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
1050+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
1051+
val -= 0x3030303030303030;
1052+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1053+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
1054+
return val;
1055+
}
1056+
1057+
static inline uint64_t decode_4digits_unrolled(uint64_t large_val) {
1058+
uint32_t val = (uint32_t)large_val;
1059+
1060+
const uint32_t mask = 0x000000FF;
1061+
const uint32_t mul1 = 100;
1062+
val -= 0x30303030;
1063+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1064+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
1065+
return val;
1066+
}
1067+
#endif
1068+
10421069
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
10431070
{
10441071
const char *start = state->cursor;
1045-
char next_char;
10461072

1073+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1074+
while (rest(state) >= 8) {
1075+
uint64_t next_8bytes;
1076+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
1077+
1078+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
1079+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
1080+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
1081+
1082+
if (match == 0x3333333333333333) { // 8 consecutive digits
1083+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
1084+
state->cursor += 8;
1085+
continue;
1086+
}
1087+
1088+
if ((match & 0xFFFFFFFF) == 0x33333333) { // 4 consecutive digits
1089+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled(next_8bytes);
1090+
state->cursor += 4;
1091+
break;
1092+
}
1093+
1094+
break;
1095+
}
1096+
#endif
1097+
1098+
char next_char;
10471099
while (rb_isdigit(next_char = peek(state))) {
10481100
*accumulator = *accumulator * 10 + (next_char - '0');
10491101
state->cursor++;

0 commit comments

Comments
 (0)