Skip to content

Commit c25313e

Browse files
authored
Merge pull request #884 from byroot/refactor-parser-peek
parser.c: Introduce `peek()` and `eos()` helpers
2 parents c009c85 + 8b39407 commit c25313e

File tree

1 file changed

+85
-69
lines changed

1 file changed

+85
-69
lines changed

ext/json/ext/parser/parser.c

Lines changed: 85 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,18 @@ typedef struct JSON_ParserStateStruct {
406406
int current_nesting;
407407
} JSON_ParserState;
408408

409+
static inline bool eos(JSON_ParserState *state) {
410+
return state->cursor >= state->end;
411+
}
412+
413+
static inline char peek(JSON_ParserState *state)
414+
{
415+
if (RB_UNLIKELY(eos(state))) {
416+
return 0;
417+
}
418+
return *state->cursor;
419+
}
420+
409421
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
410422
{
411423
const char *cursor = state->cursor;
@@ -571,7 +583,7 @@ json_eat_comments(JSON_ParserState *state)
571583
raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
572584
} else {
573585
state->cursor++;
574-
if (state->cursor < state->end && *state->cursor == '/') {
586+
if (peek(state) == '/') {
575587
state->cursor++;
576588
break;
577589
}
@@ -591,11 +603,12 @@ json_eat_comments(JSON_ParserState *state)
591603
static inline void
592604
json_eat_whitespace(JSON_ParserState *state)
593605
{
594-
while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
595-
if (RB_LIKELY(*state->cursor != '/')) {
596-
state->cursor++;
597-
} else {
606+
unsigned char cursor;
607+
while (RB_UNLIKELY(whitespace[cursor = (unsigned char)peek(state)])) {
608+
if (RB_UNLIKELY(cursor == '/')) {
598609
json_eat_comments(state);
610+
} else {
611+
state->cursor++;
599612
}
600613
}
601614
}
@@ -980,7 +993,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
980993
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
981994
#endif /* HAVE_SIMD */
982995

983-
while (state->cursor < state->end) {
996+
while (!eos(state)) {
984997
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
985998
return 1;
986999
}
@@ -1025,8 +1038,10 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10251038
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
10261039
{
10271040
const char *start = state->cursor;
1028-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1029-
*accumulator = *accumulator * 10 + (*state->cursor - '0');
1041+
char next_char;
1042+
1043+
while (rb_isdigit(next_char = peek(state))) {
1044+
*accumulator = *accumulator * 10 + (next_char - '0');
10301045
state->cursor++;
10311046
}
10321047
return (int)(state->cursor - start);
@@ -1050,7 +1065,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
10501065
}
10511066

10521067
// Parse fractional part
1053-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
1068+
if (peek(state) == '.') {
10541069
integer = false;
10551070
decimal_point_pos = mantissa_digits; // Remember position of decimal point
10561071
state->cursor++;
@@ -1064,13 +1079,14 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
10641079
}
10651080

10661081
// Parse exponent
1067-
if ((state->cursor < state->end) && ((rb_tolower(*state->cursor) == 'e'))) {
1082+
if (rb_tolower(peek(state)) == 'e') {
10681083
integer = false;
10691084
state->cursor++;
10701085

10711086
bool negative_exponent = false;
1072-
if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) {
1073-
negative_exponent = (*state->cursor == '-');
1087+
const char next_char = peek(state);
1088+
if (next_char == '-' || next_char == '+') {
1089+
negative_exponent = next_char == '-';
10741090
state->cursor++;
10751091
}
10761092

@@ -1111,11 +1127,8 @@ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_Par
11111127
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
11121128
{
11131129
json_eat_whitespace(state);
1114-
if (state->cursor >= state->end) {
1115-
raise_parse_error("unexpected end of input", state);
1116-
}
11171130

1118-
switch (*state->cursor) {
1131+
switch (peek(state)) {
11191132
case 'n':
11201133
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
11211134
state->cursor += 4;
@@ -1184,7 +1197,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
11841197
json_eat_whitespace(state);
11851198
long stack_head = state->stack->head;
11861199

1187-
if ((state->cursor < state->end) && (*state->cursor == ']')) {
1200+
if (peek(state) == ']') {
11881201
state->cursor++;
11891202
return json_push_value(state, config, json_decode_array(state, config, 0));
11901203
} else {
@@ -1199,26 +1212,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
11991212
while (true) {
12001213
json_eat_whitespace(state);
12011214

1202-
if (state->cursor < state->end) {
1203-
if (*state->cursor == ']') {
1204-
state->cursor++;
1205-
long count = state->stack->head - stack_head;
1206-
state->current_nesting--;
1207-
state->in_array--;
1208-
return json_push_value(state, config, json_decode_array(state, config, count));
1209-
}
1215+
const char next_char = peek(state);
12101216

1211-
if (*state->cursor == ',') {
1212-
state->cursor++;
1213-
if (config->allow_trailing_comma) {
1214-
json_eat_whitespace(state);
1215-
if ((state->cursor < state->end) && (*state->cursor == ']')) {
1216-
continue;
1217-
}
1217+
if (RB_LIKELY(next_char == ',')) {
1218+
state->cursor++;
1219+
if (config->allow_trailing_comma) {
1220+
json_eat_whitespace(state);
1221+
if (peek(state) == ']') {
1222+
continue;
12181223
}
1219-
json_parse_any(state, config);
1220-
continue;
12211224
}
1225+
json_parse_any(state, config);
1226+
continue;
1227+
}
1228+
1229+
if (next_char == ']') {
1230+
state->cursor++;
1231+
long count = state->stack->head - stack_head;
1232+
state->current_nesting--;
1233+
state->in_array--;
1234+
return json_push_value(state, config, json_decode_array(state, config, count));
12221235
}
12231236

12241237
raise_parse_error("expected ',' or ']' after array value", state);
@@ -1232,7 +1245,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
12321245
json_eat_whitespace(state);
12331246
long stack_head = state->stack->head;
12341247

1235-
if ((state->cursor < state->end) && (*state->cursor == '}')) {
1248+
if (peek(state) == '}') {
12361249
state->cursor++;
12371250
return json_push_value(state, config, json_decode_object(state, config, 0));
12381251
} else {
@@ -1241,13 +1254,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
12411254
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
12421255
}
12431256

1244-
if (*state->cursor != '"') {
1257+
if (peek(state) != '"') {
12451258
raise_parse_error("expected object key, got %s", state);
12461259
}
12471260
json_parse_string(state, config, true);
12481261

12491262
json_eat_whitespace(state);
1250-
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1263+
if (peek(state) != ':') {
12511264
raise_parse_error("expected ':' after object key", state);
12521265
}
12531266
state->cursor++;
@@ -1258,53 +1271,56 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
12581271
while (true) {
12591272
json_eat_whitespace(state);
12601273

1261-
if (state->cursor < state->end) {
1262-
if (*state->cursor == '}') {
1263-
state->cursor++;
1264-
state->current_nesting--;
1265-
size_t count = state->stack->head - stack_head;
1274+
const char next_char = peek(state);
1275+
if (next_char == '}') {
1276+
state->cursor++;
1277+
state->current_nesting--;
1278+
size_t count = state->stack->head - stack_head;
12661279

1267-
// Temporary rewind cursor in case an error is raised
1268-
const char *final_cursor = state->cursor;
1269-
state->cursor = object_start_cursor;
1270-
VALUE object = json_decode_object(state, config, count);
1271-
state->cursor = final_cursor;
1280+
// Temporary rewind cursor in case an error is raised
1281+
const char *final_cursor = state->cursor;
1282+
state->cursor = object_start_cursor;
1283+
VALUE object = json_decode_object(state, config, count);
1284+
state->cursor = final_cursor;
12721285

1273-
return json_push_value(state, config, object);
1274-
}
1286+
return json_push_value(state, config, object);
1287+
}
12751288

1276-
if (*state->cursor == ',') {
1277-
state->cursor++;
1278-
json_eat_whitespace(state);
1289+
if (next_char == ',') {
1290+
state->cursor++;
1291+
json_eat_whitespace(state);
12791292

1280-
if (config->allow_trailing_comma) {
1281-
if ((state->cursor < state->end) && (*state->cursor == '}')) {
1282-
continue;
1283-
}
1293+
if (config->allow_trailing_comma) {
1294+
if (peek(state) == '}') {
1295+
continue;
12841296
}
1297+
}
12851298

1286-
if (*state->cursor != '"') {
1287-
raise_parse_error("expected object key, got: %s", state);
1288-
}
1289-
json_parse_string(state, config, true);
1299+
if (RB_UNLIKELY(peek(state) != '"')) {
1300+
raise_parse_error("expected object key, got: %s", state);
1301+
}
1302+
json_parse_string(state, config, true);
12901303

1291-
json_eat_whitespace(state);
1292-
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1293-
raise_parse_error("expected ':' after object key, got: %s", state);
1294-
}
1295-
state->cursor++;
1304+
json_eat_whitespace(state);
1305+
if (RB_UNLIKELY(peek(state) != ':')) {
1306+
raise_parse_error("expected ':' after object key, got: %s", state);
1307+
}
1308+
state->cursor++;
12961309

1297-
json_parse_any(state, config);
1310+
json_parse_any(state, config);
12981311

1299-
continue;
1300-
}
1312+
continue;
13011313
}
13021314

13031315
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
13041316
}
13051317
break;
13061318
}
13071319

1320+
case 0:
1321+
raise_parse_error("unexpected end of input", state);
1322+
break;
1323+
13081324
default:
13091325
raise_parse_error("unexpected character: %s", state);
13101326
break;
@@ -1316,7 +1332,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
13161332
static void json_ensure_eof(JSON_ParserState *state)
13171333
{
13181334
json_eat_whitespace(state);
1319-
if (state->cursor != state->end) {
1335+
if (!eos(state)) {
13201336
raise_parse_error("unexpected token at end of stream %s", state);
13211337
}
13221338
}

0 commit comments

Comments
 (0)