Skip to content

Commit ea3daa5

Browse files
author
Father Chrysostomos
committed
Use SSize_t/STRLEN in more places in regexp code
As part of getting the regexp engine to handle long strings, this com- mit changes any variables, parameters and struct members that hold lengths of the string being matched against (or parts thereof) to use SSize_t or STRLEN instead of [IU]32. To avoid having to change any logic, I kept the signedness the same. I did not change anything that affects the length of the regular expression itself, so regexps are still practically limited to I32_MAX. Changing that would involve changing the size of regnodes, which would be a lot more involved. These changes should fix bugs, but are very hard to test. In most cases, I don’t know the regexp engine well enough to come up with test cases that test the paths in question with long strings. In other cases I don’t have a box with enough memory to test the fix.
1 parent 49f5553 commit ea3daa5

File tree

6 files changed

+131
-114
lines changed

6 files changed

+131
-114
lines changed

embed.fnc

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,8 +1096,8 @@ EMsR |SV* |_new_invlist_C_array|NN const UV* const list
10961096
: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b
10971097
#endif
10981098
Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \
1099-
|NN char* strend|NN char* strbeg|I32 minend \
1100-
|NN SV* screamer|U32 nosave
1099+
|NN char* strend|NN char* strbeg \
1100+
|SSize_t minend |NN SV* screamer|U32 nosave
11011101
Ap |void |pregfree |NULLOK REGEXP* r
11021102
Ap |void |pregfree2 |NN REGEXP *rx
11031103
: FIXME - is anything in re using this now?
@@ -1128,8 +1128,9 @@ EiPR |I32 |regcurly |NN const char *s \
11281128
|const bool rbrace_must_be_escaped
11291129
#endif
11301130
Ap |I32 |regexec_flags |NN REGEXP *const rx|NN char *stringarg \
1131-
|NN char *strend|NN char *strbeg|I32 minend \
1132-
|NN SV *sv|NULLOK void *data|U32 flags
1131+
|NN char *strend|NN char *strbeg \
1132+
|SSize_t minend|NN SV *sv \
1133+
|NULLOK void *data|U32 flags
11331134
ApR |regnode*|regnext |NULLOK regnode* p
11341135
EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \
11351136
|NULLOK SV * const value|const U32 flags
@@ -2033,8 +2034,8 @@ Ei |U8 |compute_EXACTish|NN struct RExC_state_t *pRExC_state
20332034
Es |char * |nextchar |NN struct RExC_state_t *pRExC_state
20342035
Es |bool |reg_skipcomment|NN struct RExC_state_t *pRExC_state
20352036
Es |void |scan_commit |NN const struct RExC_state_t *pRExC_state \
2036-
|NN struct scan_data_t *data|NN I32 *minlenp \
2037-
|int is_inf
2037+
|NN struct scan_data_t *data \
2038+
|NN SSize_t *minlenp|int is_inf
20382039
Esn |void |cl_anything |NN const struct RExC_state_t *pRExC_state \
20392040
|NN struct regnode_charclass_class *cl
20402041
EsRn |int |cl_is_anything |NN const struct regnode_charclass_class *cl
@@ -2046,7 +2047,7 @@ Esn |void |cl_or |NN const struct RExC_state_t *pRExC_state \
20462047
|NN struct regnode_charclass_class *cl \
20472048
|NN const struct regnode_charclass_class *or_with
20482049
Es |SSize_t|study_chunk |NN struct RExC_state_t *pRExC_state \
2049-
|NN regnode **scanp|NN I32 *minlenp \
2050+
|NN regnode **scanp|NN SSize_t *minlenp \
20502051
|NN SSize_t *deltap|NN regnode *last \
20512052
|NULLOK struct scan_data_t *data \
20522053
|I32 stopparen|NULLOK U8* recursed \
@@ -2104,15 +2105,15 @@ Es |CHECKPOINT|regcppush |NN const regexp *rex|I32 parenfloor\
21042105
|U32 maxopenparen
21052106
Es |void |regcppop |NN regexp *rex\
21062107
|NN U32 *maxopenparen_p
2107-
ERsn |U8* |reghop3 |NN U8 *s|I32 off|NN const U8 *lim
2108+
ERsn |U8* |reghop3 |NN U8 *s|SSize_t off|NN const U8 *lim
21082109
ERsM |SV* |core_regclass_swash|NULLOK const regexp *prog \
21092110
|NN const struct regnode *node|bool doinit \
21102111
|NULLOK SV **listsvp
21112112
#ifdef XXX_dmq
2112-
ERsn |U8* |reghop4 |NN U8 *s|I32 off|NN const U8 *llim \
2113+
ERsn |U8* |reghop4 |NN U8 *s|SSize_t off|NN const U8 *llim \
21132114
|NN const U8 *rlim
21142115
#endif
2115-
ERsn |U8* |reghopmaybe3 |NN U8 *s|I32 off|NN const U8 *lim
2116+
ERsn |U8* |reghopmaybe3 |NN U8 *s|SSize_t off|NN const U8 *lim
21162117
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \
21172118
|NN char *s|NN const char *strend \
21182119
|NULLOK regmatch_info *reginfo

pod/perlreapi.pod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ following format:
1717
REGEXP * const rx,
1818
char* stringarg,
1919
char* strend, char* strbeg,
20-
I32 minend, SV* sv,
20+
SSize_t minend, SV* sv,
2121
void* data, U32 flags);
2222
char* (*intuit) (pTHX_
2323
REGEXP * const rx, SV *sv,
@@ -238,7 +238,7 @@ certain optimisations when this is set.
238238

239239
I32 exec(pTHX_ REGEXP * const rx,
240240
char *stringarg, char* strend, char* strbeg,
241-
I32 minend, SV* sv,
241+
SSize_t minend, SV* sv,
242242
void* data, U32 flags);
243243

244244
Execute a regexp. The arguments are

proto.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3187,7 +3187,7 @@ PERL_CALLCONV REGEXP* Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
31873187
#define PERL_ARGS_ASSERT_PREGCOMP \
31883188
assert(pattern)
31893189

3190-
PERL_CALLCONV I32 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave)
3190+
PERL_CALLCONV I32 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, SSize_t minend, SV* screamer, U32 nosave)
31913191
__attribute__nonnull__(pTHX_1)
31923192
__attribute__nonnull__(pTHX_2)
31933193
__attribute__nonnull__(pTHX_3)
@@ -3409,7 +3409,7 @@ PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r)
34093409
#define PERL_ARGS_ASSERT_REGDUMP \
34103410
assert(r)
34113411

3412-
PERL_CALLCONV I32 Perl_regexec_flags(pTHX_ REGEXP *const rx, char *stringarg, char *strend, char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
3412+
PERL_CALLCONV I32 Perl_regexec_flags(pTHX_ REGEXP *const rx, char *stringarg, char *strend, char *strbeg, SSize_t minend, SV *sv, void *data, U32 flags)
34133413
__attribute__nonnull__(pTHX_1)
34143414
__attribute__nonnull__(pTHX_2)
34153415
__attribute__nonnull__(pTHX_3)
@@ -6766,14 +6766,14 @@ STATIC char * S_regwhite(struct RExC_state_t *pRExC_state, char *p)
67666766
#define PERL_ARGS_ASSERT_REGWHITE \
67676767
assert(pRExC_state); assert(p)
67686768

6769-
STATIC void S_scan_commit(pTHX_ const struct RExC_state_t *pRExC_state, struct scan_data_t *data, I32 *minlenp, int is_inf)
6769+
STATIC void S_scan_commit(pTHX_ const struct RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf)
67706770
__attribute__nonnull__(pTHX_1)
67716771
__attribute__nonnull__(pTHX_2)
67726772
__attribute__nonnull__(pTHX_3);
67736773
#define PERL_ARGS_ASSERT_SCAN_COMMIT \
67746774
assert(pRExC_state); assert(data); assert(minlenp)
67756775

6776-
STATIC SSize_t S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth)
6776+
STATIC SSize_t S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth)
67776777
__attribute__nonnull__(pTHX_1)
67786778
__attribute__nonnull__(pTHX_2)
67796779
__attribute__nonnull__(pTHX_3)
@@ -6979,14 +6979,14 @@ STATIC CHECKPOINT S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxop
69796979
#define PERL_ARGS_ASSERT_REGCPPUSH \
69806980
assert(rex)
69816981

6982-
STATIC U8* S_reghop3(U8 *s, I32 off, const U8 *lim)
6982+
STATIC U8* S_reghop3(U8 *s, SSize_t off, const U8 *lim)
69836983
__attribute__warn_unused_result__
69846984
__attribute__nonnull__(1)
69856985
__attribute__nonnull__(3);
69866986
#define PERL_ARGS_ASSERT_REGHOP3 \
69876987
assert(s); assert(lim)
69886988

6989-
STATIC U8* S_reghopmaybe3(U8 *s, I32 off, const U8 *lim)
6989+
STATIC U8* S_reghopmaybe3(U8 *s, SSize_t off, const U8 *lim)
69906990
__attribute__warn_unused_result__
69916991
__attribute__nonnull__(1)
69926992
__attribute__nonnull__(3);
@@ -7035,7 +7035,7 @@ STATIC void S_to_utf8_substr(pTHX_ regexp * prog)
70357035
assert(prog)
70367036

70377037
# if defined(XXX_dmq)
7038-
STATIC U8* S_reghop4(U8 *s, I32 off, const U8 *llim, const U8 *rlim)
7038+
STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim)
70397039
__attribute__warn_unused_result__
70407040
__attribute__nonnull__(1)
70417041
__attribute__nonnull__(3)

0 commit comments

Comments
 (0)