@@ -178,6 +178,7 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
178178#define ALLOW_PACKAGE (1 << 1)
179179#define CHECK_DOLLAR (1 << 2)
180180#define IDFIRST_ONLY (1 << 3)
181+ #define STOP_AT_FIRST_NON_DIGIT (1 << 4)
181182
182183#ifdef DEBUGGING
183184static const char * const lex_state_names [] = {
@@ -10553,10 +10554,16 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1055310554 * 1) A normal identifier whose first character matches IDFIRST followed
1055410555 * by any number of characters which match IDCONT.
1055510556 * 2) An identifier that begins with an ASCII digit followed by any number
10556- * of ASCII \w characters. This type can be prohibited, so that
10557- * anything that doesn't match type 1) is not considered an identifier.
10558- */
10557+ * of ASCII \w characters. As a special case of this, it can
10558+ * optionally stop parsing at the first non-digit, returning just the
10559+ * initial digits. */
10560+ const bool stop_at_first_non_digit = flags & STOP_AT_FIRST_NON_DIGIT ;
10561+
10562+ /* This type of identifier can be completely prohibited, so that
10563+ * anything that doesn't match type 1) is not considered to be an
10564+ * identifier. */
1055910565 const bool idfirst_only = flags & IDFIRST_ONLY ;
10566+ assert ((stop_at_first_non_digit && idfirst_only ) == 0 );
1056010567
1056110568 /* The function copies the identifier into the destination starting at *d
1056210569 * (whose upper bound is 'e') and advances *d to point to just beyond the
@@ -10568,8 +10575,12 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1056810575 * identifier ends in the input. If no identifier was found, the return
1056910576 * will be the the input 's' unchanged.
1057010577 *
10571- * The function croaks if there is not enough room for the entire source
10572- * identifier to be copied.
10578+ * If the identifier is illegal, the function croaks.
10579+ * The possible reasons for failure are:
10580+ * 1) There is not enough room for the entire source identifier to be
10581+ * copied
10582+ * 2) 'stop_at_first_non_digit' is in effect and the identifier name has
10583+ * leading zeros
1057310584 *
1057410585 * When 'allow_package' is non-zero, the function parses a full package
1057510586 * variable path. Each iteration of the loop below picks up one segment
@@ -10619,6 +10630,26 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1061910630 Copy (this_start , * d , this_length , char );
1062010631 * d += this_length ;
1062110632 }
10633+ else if (stop_at_first_non_digit && isDIGIT_A (* s )) {
10634+ bool is_zero = * s == '0' ;
10635+ char * digit_start = * d ;
10636+ * (* d )++ = * s ++ ;
10637+
10638+ /* Stop at the first non-digit */
10639+ while (s < s_end && isDIGIT (* s )) {
10640+ if (* d >= e ) {
10641+ goto too_long ;
10642+ }
10643+
10644+ * (* d )++ = * s ++ ;
10645+ }
10646+
10647+ /* Leading zeros are not permitted */
10648+ if (is_zero && * d - digit_start > 1 )
10649+ croak (ident_var_zero_multi_digit );
10650+
10651+ break ;
10652+ }
1062210653 else if (! idfirst_only && isWORDCHAR_A (* s ) ) {
1062310654
1062410655 /* This is the superset; it accepts \w+, including an initial
@@ -10704,26 +10735,14 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1070410735
1070510736 if (isSPACE (* s ) || !* s )
1070610737 s = skipspace (s );
10707- if (isDIGIT (* s )) { /* handle $0 and $1 $2 and $10 and etc */
10708- bool is_zero = * s == '0' ;
10709- char * digit_start = d ;
10710- * d ++ = * s ++ ;
10711- while (s < PL_bufend && isDIGIT (* s )) {
10712- if (d >= e )
10713- croak ("%s" , ident_too_long );
10714- * d ++ = * s ++ ;
10715- }
10716- if (is_zero && d - digit_start > 1 )
10717- croak (ident_var_zero_multi_digit );
10718- * d = '\0' ;
10719- }
10720- else { /* See if it is a "normal" identifier */
10721- s = parse_ident (s , PL_bufend , & d , e , is_utf8 , ALLOW_PACKAGE );
10722- }
10738+
10739+ /* See if it is a "normal" identifier */
10740+ s = parse_ident (s , PL_bufend , & d , e , is_utf8 ,
10741+ (ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT ));
1072310742 d = dest ;
1072410743
1072510744 if (* d ) {
10726- /* Either a digit variable, or parse_ident() found an identifier
10745+ /* Here parse_ident() found a digit variable or an identifier
1072710746 (anything valid as a bareword), so job done and return. */
1072810747 if (PL_lex_state != LEX_NORMAL )
1072910748 PL_lex_state = LEX_INTERPENDMAYBE ;
0 commit comments