Skip to content

Commit 84349ec

Browse files
committed
Remove "offsets" debugging code from regcomp.c
This code was added by Mark Jason Dominus to aid a regex debugger he wrote. The basic premise is that every opcode in a regex can be attributed back to parts of the pattern. This assumption has not been true ever since the TRIE optimizations were added, and I believe that the debugger is no longer in use anyway. The regex compiler is complicated enough without having to maintain this logic. There are essentially no tests for it, and the few tests that do cover it do so as a byproduct of testing other things. Despite the offsets logic only being used in debug supporting it does have a cost to non-debug logic as various internal routines include parameters related to it that are otherwise unused. I spoke to him many years ago about whether it was ok to remove it from the regex engine and he said yes. As part of this patch I also changed the name of the "parse_start" and "oregcomp_parse" variables in certain contexts so that the code is a bit more clear, this was partly because the offsets logic used its own parse_start variable in certain contexts and changing the names of the others made it easier to clean up.
1 parent afded0c commit 84349ec

File tree

6 files changed

+56
-328
lines changed

6 files changed

+56
-328
lines changed

embed.fnc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,9 +2029,7 @@ ERS |SV* |make_exactf_invlist |NN RExC_state_t *pRExC_state \
20292029
ES |regnode_offset|reg |NN RExC_state_t *pRExC_state \
20302030
|I32 paren|NN I32 *flagp|U32 depth
20312031
ES |regnode_offset|regnode_guts|NN RExC_state_t *pRExC_state \
2032-
|const U8 op \
2033-
|const STRLEN extra_len \
2034-
|NN const char* const name
2032+
|const STRLEN extra_len
20352033
ES |void |change_engine_size|NN RExC_state_t *pRExC_state|const Ptrdiff_t size
20362034
ES |regnode_offset|reganode|NN RExC_state_t *pRExC_state|U8 op \
20372035
|U32 arg
@@ -2108,13 +2106,12 @@ ES |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
21082106
|NN SV** invlist
21092107
ES |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
21102108
|NN I32 *flagp \
2111-
|NN char * parse_start \
2109+
|NN char * backref_parse_start \
21122110
|char ch
21132111
ESTR |unsigned int|regex_set_precedence|const U8 my_operator
21142112
ES |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \
21152113
|NULLOK SV ** return_invlist \
2116-
|NN I32 *flagp|U32 depth \
2117-
|NN char * const oregcomp_parse
2114+
|NN I32 *flagp|U32 depth
21182115
ES |void |set_regex_pv |NN RExC_state_t *pRExC_state|NN REGEXP *Rx
21192116
# if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
21202117
ES |void |dump_regex_sets_structures \

embed.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,7 @@
10771077
#define handle_named_backref(a,b,c,d) S_handle_named_backref(aTHX_ a,b,c,d)
10781078
#define handle_names_wildcard(a,b,c,d) S_handle_names_wildcard(aTHX_ a,b,c,d)
10791079
#define handle_possible_posix(a,b,c,d,e) S_handle_possible_posix(aTHX_ a,b,c,d,e)
1080-
#define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e)
1080+
#define handle_regex_sets(a,b,c,d) S_handle_regex_sets(aTHX_ a,b,c,d)
10811081
#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j) S_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
10821082
#define invlist_contents(a,b) S_invlist_contents(aTHX_ a,b)
10831083
#define invlist_is_iterating S_invlist_is_iterating
@@ -1104,7 +1104,7 @@
11041104
#define regclass(a,b,c,d,e,f,g,h,i) S_regclass(aTHX_ a,b,c,d,e,f,g,h,i)
11051105
#define regex_set_precedence S_regex_set_precedence
11061106
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
1107-
#define regnode_guts(a,b,c,d) S_regnode_guts(aTHX_ a,b,c,d)
1107+
#define regnode_guts(a,b) S_regnode_guts(aTHX_ a,b)
11081108
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
11091109
#define regpnode(a,b,c) S_regpnode(aTHX_ a,b,c)
11101110
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)

ext/re/t/regop.t

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ Freeing REx: "[f][o][o][b][a][r]"
140140
minlen 3
141141
---
142142
# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
143-
# Got 164 bytes for offset annotations.
144143
# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
145144
# Char : Match Base Ofs A B C P G E D
146145
# State|---------------------------------------------------
@@ -166,8 +165,6 @@ minlen 3
166165
# <D>
167166
# 20: END (0)
168167
# anchored "ABC" at 0 (checking anchored) minlen 4
169-
# Offsets: [20]
170-
# 1:4[3] 3:4[15] 19:32[0] 20:34[0]
171168
# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
172169
# Found anchored substr "ABC" at offset 0...
173170
# Guessed: match at offset 0
@@ -210,8 +207,6 @@ anchored "ABC" at 0
210207
# 47: EOL(48)
211208
# 48: END(0)
212209
#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
213-
#Offsets: [48]
214-
# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
215210
#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
216211
#Found floating substr ""$ at offset 30...
217212
#Starting position does not contradict /^/m...
@@ -233,30 +228,23 @@ anchored "ABC" at 0
233228
#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
234229
%MATCHED%
235230
floating ""$ at 3..4 (checking floating)
236-
#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
237231
#stclass EXACTF <.> minlen 3
238232
#Found floating substr ""$ at offset 30...
239233
#Does not contradict STCLASS...
240234
#Guessed: match at offset 26
241235
#Matching stclass EXACTF <.> against ".exe"
242236
---
243237
#Compiling REx "[q]"
244-
#size 3 nodes Got 7 bytes for offset annotations.
245238
#first at 1
246239
#Final program:
247240
# 1: EXACT <q>(3)
248241
# 3: END(0)
249242
#anchored "q" at 0 (checking anchored isall) minlen 1
250-
#Offsets: [3]
251-
# 1:1[3] 3:4[0]
252243
#Guessing start of match, REx "[q]" against "q"...
253244
#Found anchored substr "q" at offset 0...
254245
#Guessed: match at offset 0
255246
#%MATCHED%
256247
#Freeing REx: "[q]"
257-
Got 7 bytes for offset annotations.
258-
Offsets: [3]
259-
1:1[3] 3:4[0]
260248
%MATCHED%
261249
Freeing REx: "[q]"
262250
---
@@ -281,7 +269,6 @@ Freeing REx: "[q]"
281269
Freeing REx: "^(\S{1,9}):\s*(\d+)$"
282270
---
283271
#Compiling REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"...
284-
#Got 532 bytes for offset annotations.
285272
study_chunk_recursed_count: 5
286273
#Final program:
287274
# 1: DEFINEP (3)
@@ -317,8 +304,6 @@ study_chunk_recursed_count: 5
317304
# 61: TAIL (62)
318305
# 62: END (0)
319306
minlen 0
320-
#Offsets: [66]
321-
# 1:3[0] 3:10[0] 5:17[1] 7:18[3] 9:21[1] 11:21[0] 13:22[0] 14:25[0] 16:32[0] 18:39[1] 20:41[3] 23:47[3] 25:50[1] 27:50[0] 29:51[0] 30:54[0] 32:61[0] 34:68[1] 36:70[3] 39:76[3] 41:79[1] 43:79[0] 45:80[0] 46:83[0] 48:90[0] 50:97[1] 52:99[3] 55:105[3] 57:108[1] 59:108[0] 61:109[0] 62:110[0]
322307
#Matching REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... against ""
323308
# 0 <> <> | 1:DEFINEP(3)
324309
# 0 <> <> | 3:IFTHEN(14)

proto.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5877,18 +5877,18 @@ STATIC U32 S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state, const char *
58775877
STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
58785878
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
58795879
assert(pRExC_state); assert(flagp)
5880-
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
5880+
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * backref_parse_start, char ch);
58815881
#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \
5882-
assert(pRExC_state); assert(flagp); assert(parse_start)
5882+
assert(pRExC_state); assert(flagp); assert(backref_parse_start)
58835883
STATIC bool S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname_len, SV ** prop_definition, AV ** strings);
58845884
#define PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD \
58855885
assert(wname); assert(prop_definition); assert(strings)
58865886
STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
58875887
#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
58885888
assert(pRExC_state); assert(s)
5889-
STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
5889+
STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth);
58905890
#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \
5891-
assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
5891+
assert(pRExC_state); assert(flagp)
58925892
STATIC SV * S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
58935893
#define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY \
58945894
assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
@@ -5990,9 +5990,9 @@ STATIC unsigned int S_regex_set_precedence(const U8 my_operator)
59905990
STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, const regnode_offset operand, const U32 depth);
59915991
#define PERL_ARGS_ASSERT_REGINSERT \
59925992
assert(pRExC_state)
5993-
STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
5993+
STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_len);
59945994
#define PERL_ARGS_ASSERT_REGNODE_GUTS \
5995-
assert(pRExC_state); assert(name)
5995+
assert(pRExC_state)
59965996
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
59975997
#define PERL_ARGS_ASSERT_REGPIECE \
59985998
assert(pRExC_state); assert(flagp)

0 commit comments

Comments
 (0)