@@ -10994,7 +10994,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
10994
10994
STATIC regnode_offset
10995
10995
S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
10996
10996
I32 *flagp,
10997
- char * parse_start ,
10997
+ char * backref_parse_start ,
10998
10998
char ch
10999
10999
)
11000
11000
{
@@ -11013,7 +11013,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
11013
11013
}
11014
11014
if (RExC_parse == name_start || *RExC_parse != ch) {
11015
11015
/* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
11016
- vFAIL2("Sequence %.3s... not terminated", parse_start );
11016
+ vFAIL2("Sequence %.3s... not terminated", backref_parse_start );
11017
11017
}
11018
11018
11019
11019
if (sv_dat) {
@@ -11115,8 +11115,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
11115
11115
SV * max_open; /* Max number of unclosed parens */
11116
11116
I32 was_in_lookaround = RExC_in_lookaround;
11117
11117
11118
- char * parse_start = RExC_parse; /* MJD */
11119
- char * const oregcomp_parse = RExC_parse;
11118
+ /* The difference between the following variables can be seen with *
11119
+ * the broken pattern /(?:foo/ where segment_parse_start will point *
11120
+ * at the 'f', and reg_parse_start will point at the '(' */
11121
+
11122
+ /* the following is used for unmatched '(' errors */
11123
+ char * const reg_parse_start = RExC_parse;
11124
+
11125
+ /* the following is used to track where various segments of
11126
+ * the pattern that we parse out started. */
11127
+ char * segment_parse_start = RExC_parse;
11120
11128
11121
11129
DECLARE_AND_GET_RE_DEBUG_FLAGS;
11122
11130
@@ -11501,7 +11509,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
11501
11509
else if (paren == '=') { /* (?P=...) named backref */
11502
11510
RExC_parse++;
11503
11511
return handle_named_backref(pRExC_state, flagp,
11504
- parse_start , ')');
11512
+ segment_parse_start , ')');
11505
11513
}
11506
11514
RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
11507
11515
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
@@ -11652,7 +11660,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
11652
11660
/*notreached*/
11653
11661
/* named and numeric backreferences */
11654
11662
case '&': /* (?&NAME) */
11655
- parse_start = RExC_parse - 1;
11663
+ segment_parse_start = RExC_parse - 1;
11656
11664
named_recursion:
11657
11665
{
11658
11666
SV *sv_dat = reg_scan_name(pRExC_state,
@@ -11683,7 +11691,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
11683
11691
{
11684
11692
bool is_neg = FALSE;
11685
11693
UV unum;
11686
- parse_start = RExC_parse - 1; /* MJD */
11694
+ segment_parse_start = RExC_parse - 1;
11687
11695
if (*RExC_parse == '-') {
11688
11696
RExC_parse++;
11689
11697
is_neg = TRUE;
@@ -12181,7 +12189,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
12181
12189
12182
12190
parse_rest:
12183
12191
/* Pick up the branches, linking them together. */
12184
- parse_start = RExC_parse;
12192
+ segment_parse_start = RExC_parse;
12185
12193
br = regbranch(pRExC_state, &flags, 1, depth+1);
12186
12194
12187
12195
/* branch_len = (paren != 0); */
@@ -12406,7 +12414,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
12406
12414
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
12407
12415
}
12408
12416
if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
12409
- RExC_parse = oregcomp_parse ;
12417
+ RExC_parse = reg_parse_start ;
12410
12418
vFAIL("Unmatched (");
12411
12419
}
12412
12420
nextchar(pRExC_state);
@@ -13509,7 +13517,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13509
13517
{
13510
13518
regnode_offset ret = 0;
13511
13519
I32 flags = 0;
13512
- char *parse_start ;
13520
+ char *atom_parse_start ;
13513
13521
U8 op;
13514
13522
int invert = 0;
13515
13523
@@ -13522,7 +13530,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13522
13530
PERL_ARGS_ASSERT_REGATOM;
13523
13531
13524
13532
tryagain:
13525
- parse_start = RExC_parse;
13533
+ atom_parse_start = RExC_parse;
13526
13534
assert(RExC_parse < RExC_end);
13527
13535
switch ((U8)*RExC_parse) {
13528
13536
case '^':
@@ -13553,7 +13561,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13553
13561
break;
13554
13562
case '[':
13555
13563
{
13556
- char * const oregcomp_parse = ++RExC_parse;
13564
+ char * const cc_parse_start = ++RExC_parse;
13557
13565
ret = regclass(pRExC_state, flagp, depth+1,
13558
13566
FALSE, /* means parse the whole char class */
13559
13567
TRUE, /* allow multi-char folds */
@@ -13567,7 +13575,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13567
13575
(UV) *flagp);
13568
13576
}
13569
13577
if (*RExC_parse != ']') {
13570
- RExC_parse = oregcomp_parse ;
13578
+ RExC_parse = cc_parse_start ;
13571
13579
vFAIL("Unmatched [");
13572
13580
}
13573
13581
nextchar(pRExC_state);
@@ -13854,7 +13862,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13854
13862
/* The escapes above that don't take a parameter can't be
13855
13863
* followed by a '{'. But 'pX', 'p{foo}' and
13856
13864
* correspondingly 'P' can be */
13857
- if ( RExC_parse - parse_start == 1
13865
+ if ( RExC_parse - atom_parse_start == 1
13858
13866
&& UCHARAT(RExC_parse + 1) == '{'
13859
13867
&& UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
13860
13868
{
@@ -13892,7 +13900,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13892
13900
RETURN_FAIL_ON_RESTART_FLAGP(flagp);
13893
13901
13894
13902
/* Here, evaluates to a single code point. Go get that */
13895
- RExC_parse = parse_start ;
13903
+ RExC_parse = atom_parse_start ;
13896
13904
goto defchar;
13897
13905
13898
13906
case 'k': /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
@@ -13906,7 +13914,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13906
13914
{
13907
13915
RExC_parse++;
13908
13916
/* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
13909
- vFAIL2("Sequence %.2s... not terminated", parse_start );
13917
+ vFAIL2("Sequence %.2s... not terminated", atom_parse_start );
13910
13918
} else {
13911
13919
RExC_parse += 2;
13912
13920
if (ch == '{') {
@@ -13916,7 +13924,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13916
13924
}
13917
13925
ret = handle_named_backref(pRExC_state,
13918
13926
flagp,
13919
- parse_start ,
13927
+ atom_parse_start ,
13920
13928
(ch == '<')
13921
13929
? '>'
13922
13930
: (ch == '{')
@@ -14027,7 +14035,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14027
14035
* to be an octal character escape, e.g. \35 or \777.
14028
14036
* The above logic should make it obvious why using
14029
14037
* octal escapes in patterns is problematic. - Yves */
14030
- RExC_parse = parse_start ;
14038
+ RExC_parse = atom_parse_start ;
14031
14039
goto defchar;
14032
14040
}
14033
14041
}
@@ -14089,7 +14097,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14089
14097
default:
14090
14098
/* Do not generate "unrecognized" warnings here, we fall
14091
14099
back into the quick-grab loop below */
14092
- RExC_parse = parse_start ;
14100
+ RExC_parse = atom_parse_start ;
14093
14101
goto defchar;
14094
14102
} /* end of switch on a \foo sequence */
14095
14103
break;
@@ -14328,7 +14336,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14328
14336
goto loopdone;
14329
14337
}
14330
14338
p = RExC_parse;
14331
- RExC_parse = parse_start ;
14339
+ RExC_parse = atom_parse_start ;
14332
14340
14333
14341
/* The \N{} means the pattern, if previously /d,
14334
14342
* becomes /u. That means it can't be an EXACTF node,
@@ -14518,7 +14526,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14518
14526
* string of characters instead of a meta construct */
14519
14527
if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
14520
14528
if ( RExC_strict
14521
- || ( p > parse_start + 1
14529
+ || ( p > atom_parse_start + 1
14522
14530
&& isALPHA_A(*(p - 1))
14523
14531
&& *(p - 2) == '\\'))
14524
14532
{
0 commit comments