@@ -177,6 +177,7 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
177177#define CHECK_KEYWORD (1 << 0)
178178#define ALLOW_PACKAGE (1 << 1)
179179#define CHECK_DOLLAR (1 << 2)
180+ #define IDFIRST_ONLY (1 << 3)
180181
181182#ifdef DEBUGGING
182183static const char * const lex_state_names [] = {
@@ -10552,9 +10553,12 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1055210553 * 1) A normal identifier whose first character matches IDFIRST followed
1055310554 * by any number of characters which match IDCONT.
1055410555 * 2) An identifier that begins with an ASCII digit followed by any number
10555- * of ASCII \w characters
10556- *
10557- * The function copies the identifier into the destination starting at *d
10556+ * of ASCII \w characters. This type can be prohibited, so that
10557+ * anything that doesn't match type 1) is not considered an identifier.
10558+ */
10559+ const bool idfirst_only = flags & IDFIRST_ONLY ;
10560+
10561+ /* The function copies the identifier into the destination starting at *d
1055810562 * (whose upper bound is 'e') and advances *d to point to just beyond the
1055910563 * end of the identifier, setting **d to a NUL character. The reason it
1056010564 * needs to copy is that it may convert apostrophe package separators into
@@ -10585,15 +10589,18 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1058510589 * Unicode definition only when UTF-8 is in effect. We have to check
1058610590 * for the subset before checking for the superset. */
1058710591 Size_t advance ;
10588- if (is_utf8 && (advance = isIDFIRST_utf8_safe (s , s_end ))) {
10592+ if ( (advance = isIDFIRST_lazy_if_safe (s , s_end , is_utf8 ))
10593+ && (is_utf8 || idfirst_only ))
10594+ {
1058910595 const char * this_start = s ;
1059010596 s += advance ;
1059110597
1059210598 /* Find the end of the identifier by accumulating characters until
1059310599 * find a non-identifier character */
1059410600 while (s < s_end ) {
10595- advance = isIDCONT_utf8_safe ((const U8 * ) s ,
10596- (const U8 * ) s_end );
10601+ advance = isIDCONT_lazy_if_safe ((const U8 * ) s ,
10602+ (const U8 * ) s_end ,
10603+ is_utf8 );
1059710604 if (advance == 0 ) { /* Not an identifier character */
1059810605 break ;
1059910606 }
@@ -10612,7 +10619,7 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1061210619 Copy (this_start , * d , this_length , char );
1061310620 * d += this_length ;
1061410621 }
10615- else if ( isWORDCHAR_A (* s ) ) {
10622+ else if (! idfirst_only && isWORDCHAR_A (* s ) ) {
1061610623
1061710624 /* This is the superset; it accepts \w+, including an initial
1061810625 * digit */
0 commit comments