Skip to content

Commit 109abe1

Browse files
committed
regcomp.c: disambiguate "parse_start" and related var names
This was originally done to make the cleanup of the offsets debug logic easier to follow and understand. 'parse_start' was heavily used in multiple functions, and given the size of the functions in regcomp.c it was often not clear which parse_start was which. 'oregcomp_parse' was also used in a similar way. This patch disambiguates them all so they are all uniquely named and relevant to the code they operate on and of the form "thing_parse_start", (or "thing_parse_start_const" where both were in use).
1 parent 52becc4 commit 109abe1

File tree

3 files changed

+32
-24
lines changed

3 files changed

+32
-24
lines changed

embed.fnc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2111,7 +2111,7 @@ ES |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
21112111
|NN SV** invlist
21122112
ES |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
21132113
|NN I32 *flagp \
2114-
|NN char * parse_start \
2114+
|NN char * backref_parse_start \
21152115
|char ch
21162116
ESTR |unsigned int|regex_set_precedence|const U8 my_operator
21172117
ES |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \

proto.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5880,9 +5880,9 @@ STATIC U32 S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state, const char *
58805880
STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
58815881
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
58825882
assert(pRExC_state); assert(flagp)
5883-
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
5883+
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * backref_parse_start, char ch);
58845884
#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \
5885-
assert(pRExC_state); assert(flagp); assert(parse_start)
5885+
assert(pRExC_state); assert(flagp); assert(backref_parse_start)
58865886
STATIC bool S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname_len, SV ** prop_definition, AV ** strings);
58875887
#define PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD \
58885888
assert(wname); assert(prop_definition); assert(strings)

regcomp.c

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10994,7 +10994,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1099410994
STATIC regnode_offset
1099510995
S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
1099610996
I32 *flagp,
10997-
char * parse_start,
10997+
char * backref_parse_start,
1099810998
char ch
1099910999
)
1100011000
{
@@ -11013,7 +11013,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
1101311013
}
1101411014
if (RExC_parse == name_start || *RExC_parse != ch) {
1101511015
/* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
11016-
vFAIL2("Sequence %.3s... not terminated", parse_start);
11016+
vFAIL2("Sequence %.3s... not terminated", backref_parse_start);
1101711017
}
1101811018

1101911019
if (sv_dat) {
@@ -11115,8 +11115,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1111511115
SV * max_open; /* Max number of unclosed parens */
1111611116
I32 was_in_lookaround = RExC_in_lookaround;
1111711117

11118-
char * parse_start = RExC_parse; /* MJD */
11119-
char * const oregcomp_parse = RExC_parse;
11118+
/* The difference between the following variables can be seen with *
11119+
* the broken pattern /(?:foo/ where segment_parse_start will point *
11120+
* at the 'f', and reg_parse_start will point at the '(' */
11121+
11122+
/* the following is used for unmatched '(' errors */
11123+
char * const reg_parse_start = RExC_parse;
11124+
11125+
/* the following is used to track where various segments of
11126+
* the pattern that we parse out started. */
11127+
char * segment_parse_start = RExC_parse;
1112011128

1112111129
DECLARE_AND_GET_RE_DEBUG_FLAGS;
1112211130

@@ -11501,7 +11509,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1150111509
else if (paren == '=') { /* (?P=...) named backref */
1150211510
RExC_parse++;
1150311511
return handle_named_backref(pRExC_state, flagp,
11504-
parse_start, ')');
11512+
segment_parse_start, ')');
1150511513
}
1150611514
RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
1150711515
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
@@ -11652,7 +11660,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1165211660
/*notreached*/
1165311661
/* named and numeric backreferences */
1165411662
case '&': /* (?&NAME) */
11655-
parse_start = RExC_parse - 1;
11663+
segment_parse_start = RExC_parse - 1;
1165611664
named_recursion:
1165711665
{
1165811666
SV *sv_dat = reg_scan_name(pRExC_state,
@@ -11683,7 +11691,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1168311691
{
1168411692
bool is_neg = FALSE;
1168511693
UV unum;
11686-
parse_start = RExC_parse - 1; /* MJD */
11694+
segment_parse_start = RExC_parse - 1;
1168711695
if (*RExC_parse == '-') {
1168811696
RExC_parse++;
1168911697
is_neg = TRUE;
@@ -12181,7 +12189,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1218112189

1218212190
parse_rest:
1218312191
/* Pick up the branches, linking them together. */
12184-
parse_start = RExC_parse;
12192+
segment_parse_start = RExC_parse;
1218512193
br = regbranch(pRExC_state, &flags, 1, depth+1);
1218612194

1218712195
/* branch_len = (paren != 0); */
@@ -12406,7 +12414,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
1240612414
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
1240712415
}
1240812416
if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
12409-
RExC_parse = oregcomp_parse;
12417+
RExC_parse = reg_parse_start;
1241012418
vFAIL("Unmatched (");
1241112419
}
1241212420
nextchar(pRExC_state);
@@ -13509,7 +13517,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1350913517
{
1351013518
regnode_offset ret = 0;
1351113519
I32 flags = 0;
13512-
char *parse_start;
13520+
char *atom_parse_start;
1351313521
U8 op;
1351413522
int invert = 0;
1351513523

@@ -13522,7 +13530,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1352213530
PERL_ARGS_ASSERT_REGATOM;
1352313531

1352413532
tryagain:
13525-
parse_start = RExC_parse;
13533+
atom_parse_start = RExC_parse;
1352613534
assert(RExC_parse < RExC_end);
1352713535
switch ((U8)*RExC_parse) {
1352813536
case '^':
@@ -13553,7 +13561,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1355313561
break;
1355413562
case '[':
1355513563
{
13556-
char * const oregcomp_parse = ++RExC_parse;
13564+
char * const cc_parse_start = ++RExC_parse;
1355713565
ret = regclass(pRExC_state, flagp, depth+1,
1355813566
FALSE, /* means parse the whole char class */
1355913567
TRUE, /* allow multi-char folds */
@@ -13567,7 +13575,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1356713575
(UV) *flagp);
1356813576
}
1356913577
if (*RExC_parse != ']') {
13570-
RExC_parse = oregcomp_parse;
13578+
RExC_parse = cc_parse_start;
1357113579
vFAIL("Unmatched [");
1357213580
}
1357313581
nextchar(pRExC_state);
@@ -13854,7 +13862,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1385413862
/* The escapes above that don't take a parameter can't be
1385513863
* followed by a '{'. But 'pX', 'p{foo}' and
1385613864
* correspondingly 'P' can be */
13857-
if ( RExC_parse - parse_start == 1
13865+
if ( RExC_parse - atom_parse_start == 1
1385813866
&& UCHARAT(RExC_parse + 1) == '{'
1385913867
&& UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
1386013868
{
@@ -13892,7 +13900,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1389213900
RETURN_FAIL_ON_RESTART_FLAGP(flagp);
1389313901

1389413902
/* Here, evaluates to a single code point. Go get that */
13895-
RExC_parse = parse_start;
13903+
RExC_parse = atom_parse_start;
1389613904
goto defchar;
1389713905

1389813906
case 'k': /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
@@ -13906,7 +13914,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1390613914
{
1390713915
RExC_parse++;
1390813916
/* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
13909-
vFAIL2("Sequence %.2s... not terminated", parse_start);
13917+
vFAIL2("Sequence %.2s... not terminated", atom_parse_start);
1391013918
} else {
1391113919
RExC_parse += 2;
1391213920
if (ch == '{') {
@@ -13916,7 +13924,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1391613924
}
1391713925
ret = handle_named_backref(pRExC_state,
1391813926
flagp,
13919-
parse_start,
13927+
atom_parse_start,
1392013928
(ch == '<')
1392113929
? '>'
1392213930
: (ch == '{')
@@ -14027,7 +14035,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1402714035
* to be an octal character escape, e.g. \35 or \777.
1402814036
* The above logic should make it obvious why using
1402914037
* octal escapes in patterns is problematic. - Yves */
14030-
RExC_parse = parse_start;
14038+
RExC_parse = atom_parse_start;
1403114039
goto defchar;
1403214040
}
1403314041
}
@@ -14089,7 +14097,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1408914097
default:
1409014098
/* Do not generate "unrecognized" warnings here, we fall
1409114099
back into the quick-grab loop below */
14092-
RExC_parse = parse_start;
14100+
RExC_parse = atom_parse_start;
1409314101
goto defchar;
1409414102
} /* end of switch on a \foo sequence */
1409514103
break;
@@ -14328,7 +14336,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1432814336
goto loopdone;
1432914337
}
1433014338
p = RExC_parse;
14331-
RExC_parse = parse_start;
14339+
RExC_parse = atom_parse_start;
1433214340

1433314341
/* The \N{} means the pattern, if previously /d,
1433414342
* becomes /u. That means it can't be an EXACTF node,
@@ -14518,7 +14526,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1451814526
* string of characters instead of a meta construct */
1451914527
if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
1452014528
if ( RExC_strict
14521-
|| ( p > parse_start + 1
14529+
|| ( p > atom_parse_start + 1
1452214530
&& isALPHA_A(*(p - 1))
1452314531
&& *(p - 2) == '\\'))
1452414532
{

0 commit comments

Comments
 (0)