Skip to content

Commit a963d6d

Browse files
demerphqkhwilliamson
authored andcommitted
Fixup Issue #19350 - Assert error under: use re Debug=>"ALL"
In 7c932d0 karl changed the regex parser to not do two passes always, and instead do one if it could. However in some edge cases it still must do a second past and some of the info needed for the Debug => "All" would not be available during the first pass. This was compounded by S_add_data() validly returning a 0 index for data that was stored in the data array, which meant that there was no way to tell the difference between "we dont have the data to call S_add_data() at all" and "we called S_add_data() and it returned 0", this in turn would cause the dumping logic to try to access data that was not there, and misinterpret it as a valid SV, with ensure assert fails or worse segfaults. This patch changes S_add_data() so it always returns a non-zero return, so that the regex debug logic can tell that it shouldnt do a lookup into the data array. This means create a new "what" type of "%", which is used SOLELY to populate the ->data[0] and ->what[0] with a dummy entry. With this done we can add guard statements to places that lookup things in the data array, if the index is 0 it can not be valid.
1 parent b4bc910 commit a963d6d

File tree

1 file changed

+59
-8
lines changed

1 file changed

+59
-8
lines changed

regcomp.c

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6617,22 +6617,52 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
66176617
return final_minlen;
66186618
}
66196619

6620+
/* add a data member to the struct reg_data attached to this regex, it should
6621+
* always return a non-zero return */
66206622
STATIC U32
66216623
S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
66226624
{
6623-
U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
6625+
U32 count = RExC_rxi->data ? RExC_rxi->data->count : 1;
66246626

66256627
PERL_ARGS_ASSERT_ADD_DATA;
66266628

6629+
/* in the below expression we have (count + n - 1), the minus one is there
6630+
* because the struct that we allocate already contains a slot for 1 data
6631+
* item, so we do not need to allocate it the first time. IOW, the
6632+
* sizeof(*RExC_rxi->data) already accounts for one of the elements we need
6633+
* to allocate. See struct reg_data in regcomp.h
6634+
*/
66276635
Renewc(RExC_rxi->data,
6628-
sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
6636+
sizeof(*RExC_rxi->data) + (sizeof(void*) * (count + n - 1)),
66296637
char, struct reg_data);
6630-
if(count)
6631-
Renew(RExC_rxi->data->what, count + n, U8);
6632-
else
6633-
Newx(RExC_rxi->data->what, n, U8);
6638+
/* however in the data->what expression we use (count + n) and do not
6639+
* subtract one from the result because the data structure contains a
6640+
* pointer to an array, and does not allocate the first element as part of
6641+
* the data struct. */
6642+
if (count > 1)
6643+
Renew(RExC_rxi->data->what, (count + n), U8);
6644+
else {
6645+
/* when count == 1 it means we have not initialized anything.
6646+
* we always fill the 0 slot of the data array with a '%' entry, which
6647+
* means "zero" (all the other types are letters) which exists purely
6648+
* so the return from add_data is ALWAYS true, so we can tell it apart
6649+
* from a "no value" idx=0 in places where we would return an index
6650+
* into add_data. This is particularly important with the new "single
6651+
* pass, usually, but not always" strategy that we use, where the code
6652+
* will use a 0 to represent "not able to compute this yet".
6653+
*/
6654+
Newx(RExC_rxi->data->what, n+1, U8);
6655+
/* fill in the placeholder slot of 0 with a what of '%', we use
6656+
* this because it sorta looks like a zero (0/0) and it is not a letter
6657+
* like any of the other "whats", this type should never be created
6658+
* any other way but here. '%' happens to also not appear in this
6659+
* file for any other reason (at the time of writing this comment)*/
6660+
RExC_rxi->data->what[0]= '%';
6661+
RExC_rxi->data->data[0]= NULL;
6662+
}
66346663
RExC_rxi->data->count = count + n;
66356664
Copy(s, RExC_rxi->data->what + count, n, U8);
6665+
assert(count>0);
66366666
return count;
66376667
}
66386668

@@ -21432,13 +21462,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
2143221462
} else if ( pRExC_state ) {
2143321463
name_list= RExC_paren_name_list;
2143421464
}
21435-
if (name_list) {
21465+
if ( name_list ) {
2143621466
if ( k != REF || (OP(o) < REFN)) {
2143721467
SV **name= av_fetch(name_list, parno, 0 );
2143821468
if (name)
2143921469
Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
2144021470
}
21441-
else {
21471+
else
21472+
if (parno > 0) {
21473+
/* parno must always be larger than 0 for this block
21474+
* as it represents a slot into the data array, which
21475+
* has the 0 slot reserved for a placeholder so any valid
21476+
* index into it is always true, eg non-zero
21477+
* see the '%' "what" type and the implementation of
21478+
* S_add_data()
21479+
*/
2144221480
SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
2144321481
I32 *nums=(I32*)SvPVX(sv_dat);
2144421482
SV **name= av_fetch(name_list, nums[0], 0 );
@@ -22101,6 +22139,12 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
2210122139
}
2210222140
}
2210322141
break;
22142+
case '%':
22143+
/* NO-OP a '%' data contains a null pointer, so that add_data
22144+
* always returns non-zero, this should only ever happen in the
22145+
* 0 index */
22146+
assert(n==0);
22147+
break;
2210422148
default:
2210522149
Perl_croak(aTHX_ "panic: regfree data code '%c'",
2210622150
ri->data->what[n]);
@@ -22329,6 +22373,13 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
2232922373
is not from another regexp */
2233022374
d->data[i] = ri->data->data[i];
2233122375
break;
22376+
case '%':
22377+
/* this is a placeholder type, it exists purely so that
22378+
* add_data always returns a non-zero value, this type of
22379+
* entry should ONLY be present in the 0 slot of the array */
22380+
assert(i == 0);
22381+
d->data[i]= ri->data->data[i];
22382+
break;
2233222383
default:
2233322384
Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
2233422385
ri->data->what[i]);

0 commit comments

Comments
 (0)