Skip to content

Commit d2a0fb8

Browse files
committed
toke.c: Move some code into a called function
S_scan_ident accepts another type of identifier: all digits. This moves the code that looks for those into S_parse_ident, adding a flag to accept them in that function. This adds a bit of complexity to S_parse_ident, removing equivalent complexity, and a bit more from S_scan_ident. Future commits will remove more complexity. This commit does just a bit beyond the bare minimum to move the code. The next commit will simplify the moved code a bit.
1 parent bd3f5d9 commit d2a0fb8

File tree

1 file changed

+41
-22
lines changed

1 file changed

+41
-22
lines changed

toke.c

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
178178
#define ALLOW_PACKAGE (1 << 1)
179179
#define CHECK_DOLLAR (1 << 2)
180180
#define IDFIRST_ONLY (1 << 3)
181+
#define STOP_AT_FIRST_NON_DIGIT (1 << 4)
181182

182183
#ifdef DEBUGGING
183184
static const char* const lex_state_names[] = {
@@ -10553,10 +10554,16 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1055310554
* 1) A normal identifier whose first character matches IDFIRST followed
1055410555
* by any number of characters which match IDCONT.
1055510556
* 2) An identifier that begins with an ASCII digit followed by any number
10556-
* of ASCII \w characters. This type can be prohibited, so that
10557-
* anything that doesn't match type 1) is not considered an identifier.
10558-
*/
10557+
* of ASCII \w characters. As a special case of this, it can
10558+
* optionally stop parsing at the first non-digit, returning just the
10559+
* initial digits. */
10560+
const bool stop_at_first_non_digit = flags & STOP_AT_FIRST_NON_DIGIT;
10561+
10562+
/* This type of identifier can be completely prohibited, so that
10563+
* anything that doesn't match type 1) is not considered to be an
10564+
* identifier. */
1055910565
const bool idfirst_only = flags & IDFIRST_ONLY;
10566+
assert((stop_at_first_non_digit && idfirst_only) == 0);
1056010567

1056110568
/* The function copies the identifier into the destination starting at *d
1056210569
* (whose upper bound is 'e') and advances *d to point to just beyond the
@@ -10568,8 +10575,12 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1056810575
* identifier ends in the input. If no identifier was found, the return
1056910576
* will be the the input 's' unchanged.
1057010577
*
10571-
* The function croaks if there is not enough room for the entire source
10572-
* identifier to be copied.
10578+
* If the identifier is illegal, the function croaks.
10579+
* The possible reasons for failure are:
10580+
* 1) There is not enough room for the entire source identifier to be
10581+
* copied
10582+
* 2) 'stop_at_first_non_digit' is in effect and the identifier name has
10583+
* leading zeros
1057310584
*
1057410585
* When 'allow_package' is non-zero, the function parses a full package
1057510586
* variable path. Each iteration of the loop below picks up one segment
@@ -10619,6 +10630,26 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
1061910630
Copy(this_start, *d, this_length, char);
1062010631
*d += this_length;
1062110632
}
10633+
else if (stop_at_first_non_digit && isDIGIT_A(*s)) {
10634+
bool is_zero = *s == '0';
10635+
char *digit_start= *d;
10636+
*(*d)++ = *s++;
10637+
10638+
/* Stop at the first non-digit */
10639+
while (s < s_end && isDIGIT(*s)) {
10640+
if (*d >= e) {
10641+
goto too_long;
10642+
}
10643+
10644+
*(*d)++ = *s++;
10645+
}
10646+
10647+
/* Leading zeros are not permitted */
10648+
if (is_zero && *d - digit_start > 1)
10649+
croak(ident_var_zero_multi_digit);
10650+
10651+
break;
10652+
}
1062210653
else if (! idfirst_only && isWORDCHAR_A(*s) ) {
1062310654

1062410655
/* This is the superset; it accepts \w+, including an initial
@@ -10704,26 +10735,14 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1070410735

1070510736
if (isSPACE(*s) || !*s)
1070610737
s = skipspace(s);
10707-
if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10708-
bool is_zero = *s == '0';
10709-
char *digit_start= d;
10710-
*d++ = *s++;
10711-
while (s < PL_bufend && isDIGIT(*s)) {
10712-
if (d >= e)
10713-
croak("%s", ident_too_long);
10714-
*d++ = *s++;
10715-
}
10716-
if (is_zero && d - digit_start > 1)
10717-
croak(ident_var_zero_multi_digit);
10718-
*d = '\0';
10719-
}
10720-
else { /* See if it is a "normal" identifier */
10721-
s = parse_ident(s, PL_bufend, &d, e, is_utf8, ALLOW_PACKAGE);
10722-
}
10738+
10739+
/* See if it is a "normal" identifier */
10740+
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10741+
(ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT));
1072310742
d = dest;
1072410743

1072510744
if (*d) {
10726-
/* Either a digit variable, or parse_ident() found an identifier
10745+
/* Here parse_ident() found a digit variable or an identifier
1072710746
(anything valid as a bareword), so job done and return. */
1072810747
if (PL_lex_state != LEX_NORMAL)
1072910748
PL_lex_state = LEX_INTERPENDMAYBE;

0 commit comments

Comments
 (0)