Skip to content

Commit acbf40b

Browse files
authored
Merge pull request #886 from byroot/parser-whitespace-switch
parser.c: Use SWAR to skip consecutive spaces
2 parents 6f1d3c3 + b3fd7b2 commit acbf40b

File tree

2 files changed

+33
-16
lines changed

2 files changed

+33
-16
lines changed

ext/json/ext/parser/parser.c

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -557,14 +557,6 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
557557

558558
static const rb_data_type_t JSON_ParserConfig_type;
559559

560-
static const bool whitespace[256] = {
561-
[' '] = 1,
562-
['\t'] = 1,
563-
['\n'] = 1,
564-
['\r'] = 1,
565-
['/'] = 1,
566-
};
567-
568560
static void
569561
json_eat_comments(JSON_ParserState *state)
570562
{
@@ -607,12 +599,38 @@ json_eat_comments(JSON_ParserState *state)
607599
static inline void
608600
json_eat_whitespace(JSON_ParserState *state)
609601
{
610-
unsigned char cursor;
611-
while (RB_UNLIKELY(whitespace[cursor = (unsigned char)peek(state)])) {
612-
if (RB_UNLIKELY(cursor == '/')) {
613-
json_eat_comments(state);
614-
} else {
615-
state->cursor++;
602+
while (true) {
603+
switch (peek(state)) {
604+
case ' ':
605+
state->cursor++;
606+
break;
607+
case '\n':
608+
state->cursor++;
609+
610+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
611+
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
612+
while (rest(state) > 8) {
613+
uint64_t chunk;
614+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
615+
size_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
616+
617+
state->cursor += consecutive_spaces;
618+
if (consecutive_spaces != 8) {
619+
break;
620+
}
621+
}
622+
#endif
623+
break;
624+
case '\t':
625+
case '\r':
626+
state->cursor++;
627+
break;
628+
case '/':
629+
json_eat_comments(state);
630+
break;
631+
632+
default:
633+
return;
616634
}
617635
}
618636
}

ext/json/ext/simd/simd.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ typedef enum {
44
SIMD_SSE2
55
} SIMD_Implementation;
66

7-
#ifdef JSON_ENABLE_SIMD
8-
97
#ifdef __clang__
108
# if __has_builtin(__builtin_ctzll)
119
# define HAVE_BUILTIN_CTZLL 1
@@ -54,6 +52,7 @@ static inline int trailing_zeros(int input)
5452
#define FORCE_INLINE
5553
#endif
5654

55+
#ifdef JSON_ENABLE_SIMD
5756

5857
#define SIMD_MINIMUM_THRESHOLD 6
5958

0 commit comments

Comments
 (0)