Skip to content

Commit 1e365ee

Browse files
committed
add a new (til) PEG special
(til term patt) behaves like (sub (to term) patt), but it advances to the end of (thru term). It's similar to (sequence (sub (to term) patt) (drop term)), but it doesn't evaluate the term pattern again after evaluating patt (which may be significant if it contains backreferenced matches).
1 parent b4db22f commit 1e365ee

File tree

3 files changed

+53
-1
lines changed

3 files changed

+53
-1
lines changed

src/core/peg.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,41 @@ static const uint8_t *peg_rule(
548548
return text;
549549
}
550550

551+
case RULE_TIL: {
552+
const uint32_t *rule_terminator = s->bytecode + rule[1];
553+
const uint32_t *rule_subpattern = s->bytecode + rule[2];
554+
555+
const uint8_t *text_start = text;
556+
const uint8_t *terminator_end = NULL;
557+
CapState cs = cap_save(s);
558+
down1(s);
559+
while (text <= s->text_end) {
560+
terminator_end = peg_rule(s, rule_terminator, text);
561+
cap_load(s, cs);
562+
if (terminator_end) {
563+
break;
564+
}
565+
text++;
566+
}
567+
up1(s);
568+
569+
if (!terminator_end) {
570+
return NULL;
571+
}
572+
const uint8_t *saved_end = s->text_end;
573+
s->text_end = text;
574+
down1(s);
575+
const uint8_t *subpattern_end = peg_rule(s, rule_subpattern, text_start);
576+
up1(s);
577+
s->text_end = saved_end;
578+
579+
if (!subpattern_end) {
580+
return NULL;
581+
}
582+
583+
return terminator_end;
584+
}
585+
551586
case RULE_REPLACE:
552587
case RULE_MATCHTIME: {
553588
uint32_t tag = rule[3];
@@ -1189,6 +1224,14 @@ static void spec_split(Builder *b, int32_t argc, const Janet *argv) {
11891224
emit_2(r, RULE_SPLIT, subrule1, subrule2);
11901225
}
11911226

1227+
static void spec_til(Builder *b, int32_t argc, const Janet *argv) {
1228+
peg_fixarity(b, argc, 2);
1229+
Reserve r = reserve(b, 3);
1230+
uint32_t subrule1 = peg_compile1(b, argv[0]);
1231+
uint32_t subrule2 = peg_compile1(b, argv[1]);
1232+
emit_2(r, RULE_TIL, subrule1, subrule2);
1233+
}
1234+
11921235
#ifdef JANET_INT_TYPES
11931236
#define JANET_MAX_READINT_WIDTH 8
11941237
#else
@@ -1275,6 +1318,7 @@ static const SpecialPair peg_specials[] = {
12751318
{"split", spec_split},
12761319
{"sub", spec_sub},
12771320
{"thru", spec_thru},
1321+
{"til", spec_til},
12781322
{"to", spec_to},
12791323
{"uint", spec_uint_le},
12801324
{"uint-be", spec_uint_be},
@@ -1610,6 +1654,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
16101654
break;
16111655
case RULE_SUB:
16121656
case RULE_SPLIT:
1657+
case RULE_TIL:
16131658
/* [rule, rule] */
16141659
if (rule[1] >= blen) goto bad;
16151660
if (rule[2] >= blen) goto bad;

src/include/janet.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2142,7 +2142,8 @@ typedef enum {
21422142
RULE_UNREF, /* [rule, tag] */
21432143
RULE_CAPTURE_NUM, /* [rule, tag] */
21442144
RULE_SUB, /* [rule, rule] */
2145-
RULE_SPLIT /* [rule, rule] */
2145+
RULE_SPLIT, /* [rule, rule] */
2146+
RULE_TIL /* [rule, rule] */
21462147
} JanetPegOpcod;
21472148

21482149
typedef struct {

test/suite-peg.janet

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@
266266
(marshpeg '(sub "abcdf" "abc"))
267267
(marshpeg '(* (sub 1 1)))
268268
(marshpeg '(split "," (+ "a" "b" "c")))
269+
(marshpeg '(til :s+ :w+))
269270

270271
# Peg swallowing errors
271272
# 159651117
@@ -751,5 +752,10 @@
751752
"a b c"
752753
@["a" "b" "c"])
753754

755+
(test "til: find a separator, match before it, then advance past it"
756+
~(* (til "=" '(to -1)) '(to -1))
757+
"word=something"
758+
@["word" "something"])
759+
754760
(end-suite)
755761

0 commit comments

Comments
 (0)