@@ -7399,7 +7399,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
73997399 *
74007400 * pm_flags contains the PMf_* flags, typically based on those from the
74017401 * pm_flags field of the related PMOP. Currently we're only interested in
7402- * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
7402+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL, PMf_WILDCARD .
74037403 *
74047404 * For many years this code had an initial sizing pass that calculated
74057405 * (sometimes incorrectly, leading to security holes) the size needed for the
@@ -10765,6 +10765,24 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1076510765 /* && memCHRs("iogcmsx", *RExC_parse) */
1076610766 /* (?g), (?gc) and (?o) are useless here
1076710767 and must be globally applied -- japhy */
10768+ if ((RExC_pm_flags & PMf_WILDCARD)) {
10769+ if (flagsp == & negflags) {
10770+ if (*RExC_parse == 'm') {
10771+ RExC_parse++;
10772+ /* diag_listed_as: Use of %s is not allowed in Unicode
10773+ property wildcard subpatterns in regex; marked by <--
10774+ HERE in m/%s/ */
10775+ vFAIL("Use of modifier '-m' is not allowed in Unicode"
10776+ " property wildcard subpatterns");
10777+ }
10778+ }
10779+ else {
10780+ if (*RExC_parse == 's') {
10781+ goto modifier_illegal_in_wildcard;
10782+ }
10783+ }
10784+ }
10785+
1076810786 switch (*RExC_parse) {
1076910787
1077010788 /* Code for the imsxn flags */
@@ -10845,6 +10863,10 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1084510863 *(RExC_parse - 1));
1084610864 NOT_REACHED; /*NOTREACHED*/
1084710865 case GLOBAL_PAT_MOD: /* 'g' */
10866+ if (RExC_pm_flags & PMf_WILDCARD) {
10867+ goto modifier_illegal_in_wildcard;
10868+ }
10869+ /*FALLTHROUGH*/
1084810870 case ONCE_PAT_MOD: /* 'o' */
1084910871 if (ckWARN(WARN_REGEXP)) {
1085010872 const I32 wflagbit = *RExC_parse == 'o'
@@ -10866,6 +10888,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1086610888 break;
1086710889
1086810890 case CONTINUE_PAT_MOD: /* 'c' */
10891+ if (RExC_pm_flags & PMf_WILDCARD) {
10892+ goto modifier_illegal_in_wildcard;
10893+ }
1086910894 if (ckWARN(WARN_REGEXP)) {
1087010895 if (! (wastedflags & WASTED_C) ) {
1087110896 wastedflags |= WASTED_GC;
@@ -10880,6 +10905,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1088010905 }
1088110906 break;
1088210907 case KEEPCOPY_PAT_MOD: /* 'p' */
10908+ if (RExC_pm_flags & PMf_WILDCARD) {
10909+ goto modifier_illegal_in_wildcard;
10910+ }
1088310911 if (flagsp == &negflags) {
1088410912 ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
1088510913 } else {
@@ -10900,6 +10928,18 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1090010928 case ':':
1090110929 case ')':
1090210930
10931+ if ( (RExC_pm_flags & PMf_WILDCARD)
10932+ && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
10933+ {
10934+ RExC_parse++;
10935+ /* diag_listed_as: Use of %s is not allowed in Unicode
10936+ property wildcard subpatterns in regex; marked by <--
10937+ HERE in m/%s/ */
10938+ vFAIL2("Use of modifier '%c' is not allowed in Unicode"
10939+ " property wildcard subpatterns",
10940+ has_charset_modifier);
10941+ }
10942+
1090310943 if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
1090410944 negflags |= RXf_PMf_EXTENDED_MORE;
1090510945 }
@@ -10925,6 +10965,13 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
1092510965 }
1092610966
1092710967 vFAIL("Sequence (?... not terminated");
10968+
10969+ modifier_illegal_in_wildcard:
10970+ RExC_parse++;
10971+ /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
10972+ subpatterns in regex; marked by <-- HERE in m/%s/ */
10973+ vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
10974+ " subpatterns", *(RExC_parse - 1));
1092810975}
1092910976
1093010977/*
@@ -12533,6 +12580,23 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1253312580 do_curly:
1253412581 if ((flags&SIMPLE)) {
1253512582 if (min == 0 && max == REG_INFTY) {
12583+
12584+ /* Going from 0..inf is currently forbidden in wildcard
12585+ * subpatterns. The only reason is to make it harder to
12586+ * write patterns that take a long long time to halt, and
12587+ * because the use of this construct isn't necessary in
12588+ * matching Unicode property values */
12589+ if (RExC_pm_flags & PMf_WILDCARD) {
12590+ RExC_parse++;
12591+ /* diag_listed_as: Use of %s is not allowed in Unicode
12592+ property wildcard subpatterns in regex; marked by
12593+ <-- HERE in m/%s/ */
12594+ vFAIL("Use of quantifier '*' is not allowed in"
12595+ " Unicode property wildcard subpatterns");
12596+ /* Note, don't need to worry about {0,}, as a '}' isn't
12597+ * legal at all in wildcards, so wouldn't get this far
12598+ * */
12599+ }
1253612600 reginsert(pRExC_state, STAR, ret, depth+1);
1253712601 MARK_NAUGHTY(4);
1253812602 RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
@@ -13404,13 +13468,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1340413468 /* Special Escapes */
1340513469 case 'A':
1340613470 RExC_seen_zerolen++;
13407- ret = reg_node(pRExC_state, SBOL);
13408- /* SBOL is shared with /^/ so we set the flags so we can tell
13409- * /\A/ from /^/ in split. */
13410- FLAGS(REGNODE_p(ret)) = 1;
13471+ if (RExC_pm_flags & PMf_WILDCARD) {
13472+ ret = reg_node(pRExC_state, MBOL);
13473+ }
13474+ else {
13475+ ret = reg_node(pRExC_state, SBOL);
13476+ /* SBOL is shared with /^/ so we set the flags so we can tell
13477+ * /\A/ from /^/ in split. */
13478+ FLAGS(REGNODE_p(ret)) = 1;
13479+ }
1341113480 *flagp |= SIMPLE;
1341213481 goto finish_meta_pat;
1341313482 case 'G':
13483+ if (RExC_pm_flags & PMf_WILDCARD) {
13484+ RExC_parse++;
13485+ /* diag_listed_as: Use of %s is not allowed in Unicode property
13486+ wildcard subpatterns in regex; marked by <-- HERE in m/%s/
13487+ */
13488+ vFAIL("Use of '\\G' is not allowed in Unicode property"
13489+ " wildcard subpatterns");
13490+ }
1341413491 ret = reg_node(pRExC_state, GPOS);
1341513492 RExC_seen |= REG_GPOS_SEEN;
1341613493 *flagp |= SIMPLE;
@@ -13432,12 +13509,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1343213509 vFAIL("\\K not permitted in lookahead/lookbehind");
1343313510 }
1343413511 case 'Z':
13435- ret = reg_node(pRExC_state, SEOL);
13512+ if (RExC_pm_flags & PMf_WILDCARD) {
13513+ ret = reg_node(pRExC_state, MEOL);
13514+ }
13515+ else {
13516+ ret = reg_node(pRExC_state, SEOL);
13517+ }
1343613518 *flagp |= SIMPLE;
1343713519 RExC_seen_zerolen++; /* Do not optimize RE away */
1343813520 goto finish_meta_pat;
1343913521 case 'z':
13440- ret = reg_node(pRExC_state, EOS);
13522+ if (RExC_pm_flags & PMf_WILDCARD) {
13523+ ret = reg_node(pRExC_state, MEOL);
13524+ }
13525+ else {
13526+ ret = reg_node(pRExC_state, EOS);
13527+ }
1344113528 *flagp |= SIMPLE;
1344213529 RExC_seen_zerolen++; /* Do not optimize RE away */
1344313530 goto finish_meta_pat;
@@ -17457,6 +17544,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1745717544 {
1745817545 char *e;
1745917546
17547+ if (RExC_pm_flags & PMf_WILDCARD) {
17548+ RExC_parse++;
17549+ /* diag_listed_as: Use of %s is not allowed in Unicode
17550+ property wildcard subpatterns in regex; marked by <--
17551+ HERE in m/%s/ */
17552+ vFAIL3("Use of '\\%c%c' is not allowed in Unicode property"
17553+ " wildcard subpatterns", value, *(RExC_parse - 1));
17554+ }
17555+
1746017556 /* \p means they want Unicode semantics */
1746117557 REQUIRE_UNI_RULES(flagp, 0);
1746217558
@@ -22818,7 +22914,7 @@ STATIC REGEXP *
2281822914S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
2281922915 const bool ignore_case)
2282022916{
22821- U32 flags = PMf_MULTILINE;
22917+ U32 flags = PMf_MULTILINE|PMf_WILDCARD ;
2282222918 REGEXP * subpattern_re;
2282322919
2282422920 PERL_ARGS_ASSERT_COMPILE_WILDCARD;
0 commit comments