Skip to content

WIP: split out matched offset data from regexp structure; create SVt_RXMO for it. #20747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 31 commits into
base: blead
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5caabe9
regcomp.c - Resolve issues clearing buffers in CURLYX (MAJOR-CHANGE)
demerphq Jan 9, 2023
832a650
regexec.c - teach BRANCH and BRANCHJ nodes to reset capture buffers
demerphq Jan 9, 2023
ce6c76c
perldelta - add note about regex engine changes
demerphq Jan 10, 2023
f10b277
regexec.c - incredibly inefficient solution to backref problem
demerphq Jan 14, 2023
e633abf
regexec.c - make REF into a backtracking state
demerphq Jan 14, 2023
cce231f
regex engine - simplify regnode structures and make them consistent
demerphq Jan 15, 2023
f1583c5
regcomp.c - extend REF to hold the paren it needs to regcppush
demerphq Jan 15, 2023
a1fa545
regexec.c - minor cleanup of CAPTURE_xxx code
demerphq Jan 25, 2023
66e10a9
regcomp.c - Use RXp_OFFSp() to access offset data
demerphq Jan 25, 2023
5067a17
regexp.h - standardize macros, and parenthesize parameters
demerphq Jan 25, 2023
ebf4910
regexec.c - use RXp_LASTPAREN(rex) to access rex->lastparen
demerphq Jan 26, 2023
5025ef1
regexp.h - add missing defines
demerphq Jan 26, 2023
293ea33
dump.c - use RXp_ macros to access regexp struct members
demerphq Jan 26, 2023
d3ce0bb
regexec.c - use RXp_LASTCLOSEPAREN(r) to access r->lastcloseparen
demerphq Jan 26, 2023
75038a3
dump.c - fixup missing case
demerphq Jan 26, 2023
bdf4ee9
regexec.c - use macro to access rex->subbeg
demerphq Jan 26, 2023
a7ca58f
regexec.c - use RXp_SUBLEN(ret) for ret->sublen
demerphq Jan 26, 2023
53e1711
regexec.c - use RXp_SUBOFFSET(rx) instead of rx->suboffset
demerphq Jan 26, 2023
ac9345c
regexec.c - use RXp_SUBCOFFSET instead of rx->subcoffset
demerphq Jan 26, 2023
1d8e83a
regexec.c - use RXp_SAVED_COPY(rex) instead of rex->saved_copy
demerphq Jan 26, 2023
4948fe4
regcomp.c - use macro wrappers to minimize impact of struct split
demerphq Jan 26, 2023
ef17607
regexp.h - use RXp_SAVED_COPY(ret) to access ret->saved_copy
demerphq Jan 26, 2023
45d5d5b
regexp.h - fixup mistake in comment
demerphq Jan 26, 2023
831f563
WIP
demerphq Jan 27, 2023
d8af754
WIP - add SVt_RXMO to store regex match offsets
demerphq Jan 28, 2023
685b7f3
dump.c - add a function that returns the name for a SVt_ type
demerphq Jan 29, 2023
44675d0
sv.c - provide better error messages for sv_upgrade and newSV_type()
demerphq Jan 29, 2023
a9990f0
WIP - RXMO works-ish
demerphq Jan 29, 2023
ef26fa4
WIP
demerphq Jan 29, 2023
16779c6
WIP - _XPV_HEAD
demerphq Jan 30, 2023
f835956
WIP
demerphq Jan 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -4686,6 +4686,7 @@ ext/XS-APItest/t/push.t XS::APItest extension
ext/XS-APItest/t/refs.t Test typemap ref handling
ext/XS-APItest/t/rmagical.t XS::APItest extension
ext/XS-APItest/t/rv2cv_op_cv.t test rv2cv_op_cv() API
ext/XS-APItest/t/rxmo.t test SVt_RXMO
ext/XS-APItest/t/savehints.t test SAVEHINTS() API
ext/XS-APItest/t/savestack.t test savestack behavior, currently only in the regex engine
ext/XS-APItest/t/scopelessblock.t test recursive descent statement-sequence parsing
Expand Down
170 changes: 124 additions & 46 deletions dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ static const char* const svtypenames[SVt_LAST] = {
"PVFM",
"PVIO",
"PVOBJ",
"RXMO",
};


Expand All @@ -67,14 +68,30 @@ static const char* const svshorttypenames[SVt_LAST] = {
"CV",
"FM",
"IO",
<<<<<<< HEAD
"OBJ",
=======
"RXMO",
>>>>>>> WIP - RXMO works-ish
};

static const char unknowntypename[] = "UNKNOWN";

struct flag_to_name {
U32 flag;
const char *name;
};

const char *
Perl_sv_type_name(U8 type, bool long_name) {
if (type > SVt_LAST)
return unknowntypename;
if (long_name)
return svtypenames[type];
else
return svshorttypenames[type];
}

static void
S_append_flags(pTHX_ SV *sv, U32 flags, const struct flag_to_name *start,
const struct flag_to_name *const end)
Expand Down Expand Up @@ -982,7 +999,7 @@ S_pm_description(pTHX_ const PMOP *pm)
if (pmflags & PMf_ONCE)
sv_catpvs(desc, ",ONCE");
#ifdef USE_ITHREADS
if (SvREADONLY(PL_regex_pad[pm->op_pmoffset]))
if (SvREADONLY(PL_regex_pad[pm->op_pmrxmo_offset]))
sv_catpvs(desc, ":USED");
#else
if (pmflags & PMf_USED)
Expand Down Expand Up @@ -2006,8 +2023,15 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo

/* Dump general SV fields */

<<<<<<< HEAD
if ((type >= SVt_PVIV && type <= SVt_PVLV
&& type != SVt_REGEXP && !isGV_with_GP(sv) && !SvVALID(sv))
=======
if ((type >= SVt_PVIV && type != SVt_PVAV && type != SVt_PVHV
&& type != SVt_PVCV && type != SVt_PVFM && type != SVt_PVIO
&& type != SVt_REGEXP && type != SVt_RXMO
&& !isGV_with_GP(sv) && !SvVALID(sv))
>>>>>>> WIP - RXMO works-ish
|| (type == SVt_IV && !SvROK(sv))) {
if (SvIsUV(sv)
)
Expand All @@ -2017,8 +2041,15 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
(void)PerlIO_putc(file, '\n');
}

<<<<<<< HEAD
if ((type >= SVt_PVNV && type <= SVt_PVLV
&& type != SVt_REGEXP && !isGV_with_GP(sv) && !SvVALID(sv))
=======
if ((type >= SVt_PVNV && type != SVt_PVAV && type != SVt_PVHV
&& type != SVt_PVCV && type != SVt_PVFM
&& type != SVt_REGEXP && type != SVt_RXMO
&& type != SVt_PVIO && !isGV_with_GP(sv) && !SvVALID(sv))
>>>>>>> WIP - RXMO works-ish
|| type == SVt_NV) {
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
STORE_LC_NUMERIC_SET_STANDARD();
Expand Down Expand Up @@ -2588,29 +2619,29 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
SvPVX(d)[SvCUR(d)] = '\0'; \
} \
} STMT_END
SV_SET_STRINGIFY_REGEXP_FLAGS(d,r->compflags,regexp_extflags_names);
SV_SET_STRINGIFY_REGEXP_FLAGS(d,RXp_COMPFLAGS(r),regexp_extflags_names);
Perl_dump_indent(aTHX_ level, file, " COMPFLAGS = 0x%" UVxf " (%s)\n",
(UV)(r->compflags), SvPVX_const(d));
(UV)(RXp_COMPFLAGS(r)), SvPVX_const(d));

SV_SET_STRINGIFY_REGEXP_FLAGS(d,r->extflags,regexp_extflags_names);
SV_SET_STRINGIFY_REGEXP_FLAGS(d,RXp_EXTFLAGS(r),regexp_extflags_names);
Perl_dump_indent(aTHX_ level, file, " EXTFLAGS = 0x%" UVxf " (%s)\n",
(UV)(r->extflags), SvPVX_const(d));
(UV)(RXp_EXTFLAGS(r)), SvPVX_const(d));

Perl_dump_indent(aTHX_ level, file, " ENGINE = 0x%" UVxf " (%s)\n",
PTR2UV(r->engine), (r->engine == &PL_core_reg_engine) ? "STANDARD" : "PLUG-IN" );
if (r->engine == &PL_core_reg_engine) {
SV_SET_STRINGIFY_REGEXP_FLAGS(d,r->intflags,regexp_core_intflags_names);
PTR2UV(RXp_ENGINE(r)), (RXp_ENGINE(r) == &PL_core_reg_engine) ? "STANDARD" : "PLUG-IN" );
if (RXp_ENGINE(r) == &PL_core_reg_engine) {
SV_SET_STRINGIFY_REGEXP_FLAGS(d,RXp_INTFLAGS(r),regexp_core_intflags_names);
Perl_dump_indent(aTHX_ level, file, " INTFLAGS = 0x%" UVxf " (%s)\n",
(UV)(r->intflags), SvPVX_const(d));
(UV)(RXp_INTFLAGS(r)), SvPVX_const(d));
} else {
Perl_dump_indent(aTHX_ level, file, " INTFLAGS = 0x%" UVxf "(Plug in)\n",
(UV)(r->intflags));
(UV)(RXp_INTFLAGS(r)));
}
#undef SV_SET_STRINGIFY_REGEXP_FLAGS
Perl_dump_indent(aTHX_ level, file, " NPARENS = %" UVuf "\n",
(UV)(r->nparens));
(UV)(RXp_NPARENS(r)));
Perl_dump_indent(aTHX_ level, file, " LOGICAL_NPARENS = %" UVuf "\n",
(UV)(r->logical_nparens));
(UV)(RXp_LOGICAL_NPARENS(r)));

#define SV_SET_STRINGIFY_I32_PAREN_ARRAY(d,count,ary) \
STMT_START { \
Expand All @@ -2625,85 +2656,86 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
} STMT_END

Perl_dump_indent(aTHX_ level, file, " LOGICAL_TO_PARNO = 0x%" UVxf "\n",
PTR2UV(r->logical_to_parno));
if (r->logical_to_parno) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, r->logical_nparens, r->logical_to_parno);
PTR2UV(RXp_LOGICAL_TO_PARNO(r)));
if (RXp_LOGICAL_TO_PARNO(r)) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, RXp_LOGICAL_NPARENS(r), RXp_LOGICAL_TO_PARNO(r));
Perl_dump_indent(aTHX_ level, file, " %" SVf, d);
}
Perl_dump_indent(aTHX_ level, file, " PARNO_TO_LOGICAL = 0x%" UVxf "\n",
PTR2UV(r->parno_to_logical));
if (r->parno_to_logical) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, r->nparens, r->parno_to_logical);
PTR2UV(RXp_PARNO_TO_LOGICAL(r)));
if (RXp_PARNO_TO_LOGICAL(r)) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, RXp_NPARENS(r), RXp_PARNO_TO_LOGICAL(r));
Perl_dump_indent(aTHX_ level, file, " %" SVf, d);
}

Perl_dump_indent(aTHX_ level, file, " PARNO_TO_LOGICAL_NEXT = 0x%" UVxf "\n",
PTR2UV(r->parno_to_logical_next));
if (r->parno_to_logical_next) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, r->nparens, r->parno_to_logical_next);
PTR2UV(RXp_PARNO_TO_LOGICAL_NEXT(r)));
if (RXp_PARNO_TO_LOGICAL_NEXT(r)) {
SV_SET_STRINGIFY_I32_PAREN_ARRAY(d, RXp_NPARENS(r), RXp_PARNO_TO_LOGICAL_NEXT(r));
Perl_dump_indent(aTHX_ level, file, " %" SVf, d);
}
#undef SV_SET_STRINGIFY_I32_ARRAY

Perl_dump_indent(aTHX_ level, file, " LASTPAREN = %" UVuf "\n",
(UV)(r->lastparen));
(UV)(RXp_LASTPAREN(r)));
Perl_dump_indent(aTHX_ level, file, " LASTCLOSEPAREN = %" UVuf "\n",
(UV)(r->lastcloseparen));
(UV)(RXp_LASTCLOSEPAREN(r)));
Perl_dump_indent(aTHX_ level, file, " MINLEN = %" IVdf "\n",
(IV)(r->minlen));
(IV)(RXp_MINLEN(r)));
Perl_dump_indent(aTHX_ level, file, " MINLENRET = %" IVdf "\n",
(IV)(r->minlenret));
(IV)(RXp_MINLENRET(r)));
Perl_dump_indent(aTHX_ level, file, " GOFS = %" UVuf "\n",
(UV)(r->gofs));
(UV)(RXp_GOFS(r)));
Perl_dump_indent(aTHX_ level, file, " PRE_PREFIX = %" UVuf "\n",
(UV)(r->pre_prefix));
(UV)(RXp_PRE_PREFIX(r)));
Perl_dump_indent(aTHX_ level, file, " SUBLEN = %" IVdf "\n",
(IV)(r->sublen));
(IV)(RXp_SUBLEN(r)));
Perl_dump_indent(aTHX_ level, file, " SUBOFFSET = %" IVdf "\n",
(IV)(r->suboffset));
(IV)(RXp_SUBOFFSET(r)));
Perl_dump_indent(aTHX_ level, file, " SUBCOFFSET = %" IVdf "\n",
(IV)(r->subcoffset));
if (r->subbeg)
(IV)(RXp_SUBCOFFSET(r)));
if (RXp_SUBBEG(r))
Perl_dump_indent(aTHX_ level, file, " SUBBEG = 0x%" UVxf " %s\n",
PTR2UV(r->subbeg),
pv_display(d, r->subbeg, r->sublen, 50, pvlim));
PTR2UV(RXp_SUBBEG(r)),
pv_display(d, RXp_SUBBEG(r), RXp_SUBLEN(r), 50, pvlim));
else
Perl_dump_indent(aTHX_ level, file, " SUBBEG = 0x0\n");
Perl_dump_indent(aTHX_ level, file, " PAREN_NAMES = 0x%" UVxf "\n",
PTR2UV(r->paren_names));
PTR2UV(RXp_PAREN_NAMES(r)));
Perl_dump_indent(aTHX_ level, file, " SUBSTRS = 0x%" UVxf "\n",
PTR2UV(r->substrs));
PTR2UV(RXp_SUBSTRS(r)));
Perl_dump_indent(aTHX_ level, file, " PPRIVATE = 0x%" UVxf "\n",
PTR2UV(r->pprivate));
PTR2UV(RXp_PPRIVATE(r)));
Perl_dump_indent(aTHX_ level, file, " OFFS = 0x%" UVxf "\n",
PTR2UV(r->offs));
if (r->offs) {
PTR2UV(RXp_OFFSp(r)));
if (RXp_OFFSp(r)) {
U32 n;
sv_setpvs(d,"[ ");
/* note offs[0] is for the whole match, and
* the data for $1 is in offs[1]. Thus we have to
* show one more than we have nparens. */
for(n = 0; n <= r->nparens; n++) {
for(n = 0; n <= RXp_NPARENS(r); n++) {
sv_catpvf(d,"%" IVdf ":%" IVdf "%s",
r->offs[n].start, r->offs[n].end,
n+1 > r->nparens ? " ]\n" : ", ");
RXp_OFFSp(r)[n].start, RXp_OFFSp(r)[n].end,
n+1 > RXp_NPARENS(r) ? " ]\n" : ", ");
}
Perl_dump_indent(aTHX_ level, file, " %" SVf, d);
}
Perl_dump_indent(aTHX_ level, file, " QR_ANONCV = 0x%" UVxf "\n",
PTR2UV(r->qr_anoncv));
PTR2UV(RXp_QR_ANONCV(r)));
#ifdef PERL_ANY_COW
Perl_dump_indent(aTHX_ level, file, " SAVED_COPY = 0x%" UVxf "\n",
PTR2UV(r->saved_copy));
PTR2UV(RXp_SAVED_COPY(r)));
#endif
/* this should go LAST or the output gets really confusing */
Perl_dump_indent(aTHX_ level, file, " MOTHER_RE = 0x%" UVxf "\n",
PTR2UV(r->mother_re));
if (nest < maxnest && r->mother_re)
do_sv_dump(level+1, file, (SV *)r->mother_re, nest+1,
PTR2UV(RXp_MOTHER_RE(r)));
if (nest < maxnest && RXp_MOTHER_RE(r))
do_sv_dump(level+1, file, (SV *)RXp_MOTHER_RE(r), nest+1,
maxnest, dumpops, pvlim);
}
break;
<<<<<<< HEAD
case SVt_PVOBJ:
Perl_dump_indent(aTHX_ level, file, " MAXFIELD = %" IVdf "\n",
(IV)ObjectMAXFIELD(sv));
Expand All @@ -2727,6 +2759,52 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
}
}
break;
=======
case SVt_RXMO:
{
regexp_matched_offsets *rxmo = RxmoANY(sv);

Perl_dump_indent(aTHX_ level, file, " SUBLEN = %" IVdf "\n",
(IV)(RXMOp_SUBLEN(rxmo)));
Perl_dump_indent(aTHX_ level, file, " SUBOFFSET = %" IVdf "\n",
(IV)(RXMOp_SUBOFFSET(rxmo)));
Perl_dump_indent(aTHX_ level, file, " SUBCOFFSET = %" IVdf "\n",
(IV)(RXMOp_SUBCOFFSET(rxmo)));
if (RXMOp_SUBBEG(rxmo))
Perl_dump_indent(aTHX_ level, file, " SUBBEG = 0x%" UVxf " %s\n",
PTR2UV(RXMOp_SUBBEG(rxmo)),
pv_display(d, RXMOp_SUBBEG(rxmo), RXMOp_SUBLEN(rxmo), 50, pvlim));
else
Perl_dump_indent(aTHX_ level, file, " SUBBEG = 0x0\n");

Perl_dump_indent(aTHX_ level, file, " NPARENS = %" UVuf "\n",
(UV)(RXMOp_NPARENS(rxmo)));
Perl_dump_indent(aTHX_ level, file, " OFFS = 0x%" UVxf "\n",
PTR2UV(RXMOp_OFFSp(rxmo)));
if (RXMOp_OFFSp(rxmo)) {
U32 n;
sv_setpvs(d,"[ ");
/* note offs[0] is for the whole match, and
* the data for $1 is in offs[1]. Thus we have to
* show one more than we have nparens. */
for(n = 0; n <= RXMOp_NPARENS(rxmo); n++) {
sv_catpvf(d,"%" IVdf ":%" IVdf "%s",
RXMOp_OFFSp(rxmo)[n].start, RXMOp_OFFSp(rxmo)[n].end,
n+1 > RXMOp_NPARENS(rxmo) ? " ]\n" : ", ");
}
Perl_dump_indent(aTHX_ level, file, " %" SVf, d);
}
Perl_dump_indent(aTHX_ level, file, " OWNER_RXSV = 0x%" UVxf "\n",
PTR2UV(RXMOp_OWNER_RXSV(rxmo)));

if (nest < maxnest && RXMOp_OWNER_RXSV(rxmo)) {
do_sv_dump(level+1, file, (SV *)RXMOp_OWNER_RXSV(rxmo), nest+1,
maxnest, dumpops, pvlim);
}
}
break;

>>>>>>> WIP - RXMO works-ish
}
SvREFCNT_dec_NN(d);
}
Expand Down
24 changes: 16 additions & 8 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -2516,6 +2516,7 @@ Adhp |I32 |pregexec |NN REGEXP * const prog \
|U32 nosave
Cp |void |pregfree |NULLOK REGEXP *r
Cp |void |pregfree2 |NN REGEXP *rx

Adp |const char *|prescan_version \
|NN const char *s \
|bool strict \
Expand Down Expand Up @@ -2742,6 +2743,11 @@ Cdhp |int |runops_debug
Cdhp |int |runops_standard
Adp |CV * |rv2cv_op_cv |NN OP *cvop \
|U32 flags
Adp |void |rxmo_dup_guts |NN const RXMO *srxmo \
|NN RXMO *drxmo \
|NN CLONE_PARAMS *param
Cp |void |rxmo_free |NULLOK RXMO *rxmo
Cp |void |rxmo_free2 |NN RXMOG *rxmo
: Used in pp_hot.c
p |void |rxres_save |NN void **rsp \
|NN REGEXP *rx
Expand Down Expand Up @@ -3310,6 +3316,9 @@ Cip |bool |SvTRUE_common |NN SV *sv \
|const bool sv_2bool_is_fallback
Adip |bool |SvTRUE_NN |NN SV *sv
Adip |bool |SvTRUE_nomg |NULLOK SV *sv
ARTp |const char *|sv_type_name \
|U8 type \
|bool longname
ARdp |char * |sv_uni_display |NN SV *dsv \
|NN SV *ssv \
|STRLEN pvlim \
Expand Down Expand Up @@ -5136,9 +5145,6 @@ ES |regnode_offset|reg |NN RExC_state_t *pRExC_state \
|I32 paren \
|NN I32 *flagp \
|U32 depth
ES |regnode_offset|reganode|NN RExC_state_t *pRExC_state \
|U8 op \
|U32 arg
ES |regnode_offset|regatom |NN RExC_state_t *pRExC_state \
|NN I32 *flagp \
|U32 depth
Expand All @@ -5162,11 +5168,6 @@ ES |void |reginsert |NN RExC_state_t *pRExC_state \
|const U8 op \
|const regnode_offset operand \
|const U32 depth
ES |regnode_offset|reg2Lanode \
|NN RExC_state_t *pRExC_state \
|const U8 op \
|const U32 arg1 \
|const I32 arg2
ES |regnode_offset|reg_la_NOTHING \
|NN RExC_state_t *pRExC_state \
|U32 flags \
Expand All @@ -5175,6 +5176,13 @@ ES |regnode_offset|reg_la_OPFAIL \
|NN RExC_state_t *pRExC_state \
|U32 flags \
|NN const char *type
ES |regnode_offset|reg1node|NN RExC_state_t *pRExC_state \
|U8 op \
|U32 arg
ES |regnode_offset|reg2node|NN RExC_state_t *pRExC_state \
|const U8 op \
|const U32 arg1 \
|const I32 arg2
ES |regnode_offset|reg_node|NN RExC_state_t *pRExC_state \
|U8 op
ES |regnode_offset|regnode_guts \
Expand Down
Loading