|
39 | 39 | typedef struct {
|
40 | 40 | const uint8_t *text_start;
|
41 | 41 | const uint8_t *text_end;
|
42 |
| - /* text_end will be restricted in a (sub) rule, but |
| 42 | + /* text_end can be restricted by some rules, but |
43 | 43 | outer_text_end will always contain the real end of
|
44 | 44 | input, which we need to generate a line mapping */
|
45 | 45 | const uint8_t *outer_text_end;
|
@@ -510,6 +510,44 @@ static const uint8_t *peg_rule(
|
510 | 510 | return window_end;
|
511 | 511 | }
|
512 | 512 |
|
| 513 | + case RULE_SPLIT: { |
| 514 | + const uint8_t *saved_end = s->text_end; |
| 515 | + const uint32_t *rule_separator = s->bytecode + rule[1]; |
| 516 | + const uint32_t *rule_subpattern = s->bytecode + rule[2]; |
| 517 | + |
| 518 | + const uint8_t *separator_end = NULL; |
| 519 | + do { |
| 520 | + const uint8_t *text_start = text; |
| 521 | + CapState cs = cap_save(s); |
| 522 | + down1(s); |
| 523 | + while (text <= s->text_end) { |
| 524 | + separator_end = peg_rule(s, rule_separator, text); |
| 525 | + cap_load(s, cs); |
| 526 | + if (separator_end) { |
| 527 | + break; |
| 528 | + } |
| 529 | + text++; |
| 530 | + } |
| 531 | + up1(s); |
| 532 | + |
| 533 | + if (separator_end) { |
| 534 | + s->text_end = text; |
| 535 | + text = separator_end; |
| 536 | + } |
| 537 | + |
| 538 | + down1(s); |
| 539 | + const uint8_t *subpattern_end = peg_rule(s, rule_subpattern, text_start); |
| 540 | + up1(s); |
| 541 | + s->text_end = saved_end; |
| 542 | + |
| 543 | + if (!subpattern_end) { |
| 544 | + return NULL; |
| 545 | + } |
| 546 | + } while (separator_end); |
| 547 | + |
| 548 | + return text; |
| 549 | + } |
| 550 | + |
513 | 551 | case RULE_REPLACE:
|
514 | 552 | case RULE_MATCHTIME: {
|
515 | 553 | uint32_t tag = rule[3];
|
@@ -1143,6 +1181,14 @@ static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
|
1143 | 1181 | emit_2(r, RULE_SUB, subrule1, subrule2);
|
1144 | 1182 | }
|
1145 | 1183 |
|
| 1184 | +static void spec_split(Builder *b, int32_t argc, const Janet *argv) { |
| 1185 | + peg_fixarity(b, argc, 2); |
| 1186 | + Reserve r = reserve(b, 3); |
| 1187 | + uint32_t subrule1 = peg_compile1(b, argv[0]); |
| 1188 | + uint32_t subrule2 = peg_compile1(b, argv[1]); |
| 1189 | + emit_2(r, RULE_SPLIT, subrule1, subrule2); |
| 1190 | +} |
| 1191 | + |
1146 | 1192 | #ifdef JANET_INT_TYPES
|
1147 | 1193 | #define JANET_MAX_READINT_WIDTH 8
|
1148 | 1194 | #else
|
@@ -1226,6 +1272,7 @@ static const SpecialPair peg_specials[] = {
|
1226 | 1272 | {"sequence", spec_sequence},
|
1227 | 1273 | {"set", spec_set},
|
1228 | 1274 | {"some", spec_some},
|
| 1275 | + {"split", spec_split}, |
1229 | 1276 | {"sub", spec_sub},
|
1230 | 1277 | {"thru", spec_thru},
|
1231 | 1278 | {"to", spec_to},
|
@@ -1562,6 +1609,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
1562 | 1609 | i += 4;
|
1563 | 1610 | break;
|
1564 | 1611 | case RULE_SUB:
|
| 1612 | + case RULE_SPLIT: |
1565 | 1613 | /* [rule, rule] */
|
1566 | 1614 | if (rule[1] >= blen) goto bad;
|
1567 | 1615 | if (rule[2] >= blen) goto bad;
|
|
0 commit comments