Skip to content

Commit 616fc31

Browse files
committed
regcomp.c: Add wrappers for cmplng/xctng wildcard subpatterns
This is in preparation for being called from more than one place. It has the salubrious effect that the wrapping we do around the user's supplied pattern is no longer visible in the Debug output of that pattern.
1 parent 60d8e2b commit 616fc31

File tree

6 files changed

+63
-17
lines changed

6 files changed

+63
-17
lines changed

embed.fnc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1919,6 +1919,11 @@ EiR |SV* |invlist_contents|NN SV* const invlist \
19191919
|const bool traditional_style
19201920
EixRT |UV |invlist_lowest|NN SV* const invlist
19211921
#ifndef PERL_EXT_RE_BUILD
1922+
ERS |REGEXP*|compile_wildcard|NN const char * name|const STRLEN len \
1923+
|const bool ignore_case
1924+
ES |I32 |execute_wildcard|NN REGEXP * const prog|NN char* stringarg \
1925+
|NN char* strend|NN char* strbeg \
1926+
|SSize_t minend |NN SV* screamer|U32 nosave
19221927
EiRT |UV* |_invlist_array_init |NN SV* const invlist|const bool will_have_0
19231928
EiRT |UV |invlist_max |NN SV* const invlist
19241929
EiRT |IV* |get_invlist_previous_index_addr|NN SV* invlist

embed.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,8 @@
959959
# if defined(PERL_IN_REGCOMP_C)
960960
#define _append_range_to_invlist(a,b,c) S__append_range_to_invlist(aTHX_ a,b,c)
961961
#define _invlist_array_init S__invlist_array_init
962+
#define compile_wildcard(a,b,c) S_compile_wildcard(aTHX_ a,b,c)
963+
#define execute_wildcard(a,b,c,d,e,f,g) S_execute_wildcard(aTHX_ a,b,c,d,e,f,g)
962964
#define get_invlist_previous_index_addr S_get_invlist_previous_index_addr
963965
#define invlist_clear(a) S_invlist_clear(aTHX_ a)
964966
#define invlist_max S_invlist_max

pod/perldelta.pod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,11 @@ interpolated into another. The heuristics previously used have been
551551
replaced by a reliable method, and hence the diagnostics generated have
552552
changed. See L</Diagnostics>.
553553

554+
=item *
555+
The debug display (say by specifying C<-Dr> or S<C<use re>> (with
556+
appropriate options) of compiled Unicode propery wildcard subpatterns no
557+
longer has extraneous output.
558+
554559
=back
555560

556561
=head1 Known Problems

pod/perlunicode.pod

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,12 +1032,8 @@ This feature is not available when the left-hand side is prefixed by
10321032
C<Is_>, nor for any form that is marked as "Discouraged" in
10331033
L<perluniprops/Discouraged>.
10341034

1035-
Perl wraps your pattern with C<(?iaa: ... )>. This is because nothing
1036-
outside ASCII can match the Unicode property values available in this
1037-
release, and they should match caselessly. If your pattern has a syntax
1038-
error, this wrapping will be shown in the error message, even though you
1039-
didn't specify it yourself. This could be confusing if you don't know
1040-
about this.
1035+
By default, your pattern is matched case-insensitively, as if C</i> had
1036+
been specified. You can change this by saying C<(?-i)> in your pattern.
10411037

10421038
This experimental feature has been added to begin to implement
10431039
L<https://www.unicode.org/reports/tr18/#Wildcard_Properties>. Using it

proto.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4256,6 +4256,14 @@ PERL_STATIC_INLINE UV* S__invlist_array_init(SV* const invlist, const bool will_
42564256
assert(invlist)
42574257
#endif
42584258

4259+
STATIC REGEXP* S_compile_wildcard(pTHX_ const char * name, const STRLEN len, const bool ignore_case)
4260+
__attribute__warn_unused_result__;
4261+
#define PERL_ARGS_ASSERT_COMPILE_WILDCARD \
4262+
assert(name)
4263+
4264+
STATIC I32 S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, SSize_t minend, SV* screamer, U32 nosave);
4265+
#define PERL_ARGS_ASSERT_EXECUTE_WILDCARD \
4266+
assert(prog); assert(stringarg); assert(strend); assert(strbeg); assert(screamer)
42594267
#ifndef PERL_NO_INLINE_FUNCTIONS
42604268
PERL_STATIC_INLINE IV* S_get_invlist_previous_index_addr(SV* invlist)
42614269
__attribute__warn_unused_result__;

regcomp.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22814,6 +22814,42 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
2281422814

2281522815
# endif
2281622816

22817+
STATIC REGEXP *
22818+
S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
22819+
const bool ignore_case)
22820+
{
22821+
U32 flags = PMf_MULTILINE;
22822+
REGEXP * subpattern_re;
22823+
22824+
PERL_ARGS_ASSERT_COMPILE_WILDCARD;
22825+
22826+
if (ignore_case) {
22827+
flags |= PMf_FOLD;
22828+
}
22829+
set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
22830+
22831+
subpattern_re = re_op_compile_wrapper(sv_2mortal(newSVpvn(name, len)),
22832+
/* Like in op.c, we copy the compile
22833+
* time pm flags to the rx ones */
22834+
(flags & RXf_PMf_COMPILETIME), flags);
22835+
22836+
assert(subpattern_re); /* Should have died if didn't compile successfully */
22837+
return subpattern_re;
22838+
}
22839+
22840+
STATIC I32
22841+
S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
22842+
char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
22843+
{
22844+
I32 result;
22845+
22846+
PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
22847+
22848+
result = pregexec(prog, stringarg, strend, strbeg, minend, screamer, nosave);
22849+
22850+
return result;
22851+
}
22852+
2281722853
SV *
2281822854
Perl_handle_user_defined_property(pTHX_
2281922855

@@ -23410,8 +23446,6 @@ Perl_parse_uniprop_string(pTHX_
2341023446
if (table_index) {
2341123447
const char * const * prop_values
2341223448
= UNI_prop_value_ptrs[table_index];
23413-
SV * subpattern;
23414-
Size_t subpattern_len;
2341523449
REGEXP * subpattern_re;
2341623450
char open = name[i++];
2341723451
char close;
@@ -23455,14 +23489,10 @@ Perl_parse_uniprop_string(pTHX_
2345523489
* pattern fails to compile, our added text to the user's
2345623490
* pattern will be displayed to the user, which is not so
2345723491
* desirable. */
23458-
subpattern_len = name_len - i - 1 - escaped;
23459-
subpattern = Perl_newSVpvf(aTHX_ "(?iaa:%.*s)",
23460-
(unsigned) subpattern_len,
23461-
name + i);
23462-
subpattern = sv_2mortal(subpattern);
23463-
subpattern_re = re_compile(subpattern, 0);
23464-
assert(subpattern_re); /* Should have died if didn't compile
23465-
successfully */
23492+
subpattern_re = compile_wildcard(name + i,
23493+
name_len - i - 1 - escaped,
23494+
TRUE /* /i */
23495+
);
2346623496

2346723497
/* For each legal property value, see if the supplied pattern
2346823498
* matches it. */
@@ -23471,7 +23501,7 @@ Perl_parse_uniprop_string(pTHX_
2347123501
const Size_t len = strlen(entry);
2347223502
SV* entry_sv = newSVpvn_flags(entry, len, SVs_TEMP);
2347323503

23474-
if (pregexec(subpattern_re,
23504+
if (execute_wildcard(subpattern_re,
2347523505
(char *) entry,
2347623506
(char *) entry + len,
2347723507
(char *) entry, 0,

0 commit comments

Comments
 (0)