@@ -1049,31 +1049,39 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
10491049}
10501050
10511051/*
1052- grok_atoUV
1052+ =for apidoc grok_atoUV
10531053
1054- grok_atoUV parses a C-style zero-byte terminated string, looking for
1055- a decimal unsigned integer.
1054+ parse a string, looking for a decimal unsigned integer.
10561055
1057- Returns the unsigned integer, if a valid value can be parsed
1058- from the beginning of the string.
1056+ On entry, C<pv> points to the beginning of the string;
1057+ C<valptr> points to a UV that will receive the converted value, if found;
1058+ C<endptr> is either NULL or points to a variable that points to one byte
1059+ beyond the point in C<pv> that this routine should examine.
1060+ If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
10591061
1060- Accepts only the decimal digits '0'..'9'.
1062+ Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1063+ no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
1064+ value.
10611065
1062- As opposed to atoi or strtol, grok_atoUV does NOT allow optional
1063- leading whitespace, or negative inputs. If such features are
1064- required, the calling code needs to explicitly implement those.
1066+ If you constrain the portion of C<pv> that is looked at by this function (by
1067+ passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1068+ valid value, it will return TRUE, setting C<*endptr> to the byte following the
1069+ final digit of the value. But if there is no constraint at what's looked at,
1070+ all of C<pv> must be valid in order for TRUE to be returned.
10651071
1066- Returns true if a valid value could be parsed. In that case, valptr
1067- is set to the parsed value, and endptr (if provided) is set to point
1068- to the character after the last digit.
1072+ The only characters this accepts are the decimal digits '0'..'9'.
10691073
1070- Returns false otherwise. This can happen if a) there is a leading zero
1071- followed by another digit; b) the digits would overflow a UV; or c)
1072- there are trailing non-digits AND endptr is not provided .
1074+ As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1075+ leading whitespace, nor negative inputs. If such features are required, the
1076+ calling code needs to explicitly implement those .
10731077
1074- Background: atoi has severe problems with illegal inputs, it cannot be
1078+ Note that this function returns FALSE for inputs that would overflow a UV,
1079+ or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
1080+ C<01>, C<002>, I<etc>.
1081+
1082+ Background: C<atoi> has severe problems with illegal inputs, it cannot be
10751083used for incremental parsing, and therefore should be avoided
1076- atoi and strtol are also affected by locale settings, which can also be
1084+ C< atoi> and C< strtol> are also affected by locale settings, which can also be
10771085seen as a bug (global state controlled by user environment).
10781086
10791087*/
@@ -1088,15 +1096,27 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
10881096
10891097 PERL_ARGS_ASSERT_GROK_ATOUV ;
10901098
1091- eptr = endptr ? endptr : & end2 ;
1092- if (isDIGIT (* s )) {
1099+ if (endptr ) {
1100+ eptr = endptr ;
1101+ }
1102+ else {
1103+ end2 = s + strlen (s );
1104+ eptr = & end2 ;
1105+ }
1106+
1107+ if ( * eptr <= s
1108+ || ! isDIGIT (* s ))
1109+ {
1110+ return FALSE;
1111+ }
1112+
10931113 /* Single-digit inputs are quite common. */
10941114 val = * s ++ - '0' ;
1095- if (isDIGIT (* s )) {
1115+ if (s < * eptr && isDIGIT (* s )) {
10961116 /* Fail on extra leading zeros. */
10971117 if (val == 0 )
10981118 return FALSE;
1099- while (isDIGIT (* s )) {
1119+ while (s < * eptr && isDIGIT (* s )) {
11001120 /* This could be unrolled like in grok_number(), but
11011121 * the expected uses of this are not speed-needy, and
11021122 * unlikely to need full 64-bitness. */
@@ -1109,12 +1129,14 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
11091129 }
11101130 }
11111131 }
1132+ if (endptr == NULL ) {
1133+ if (* s ) {
1134+ return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1135+ }
1136+ }
1137+ else {
1138+ * endptr = s ;
11121139 }
1113- if (s == pv )
1114- return FALSE;
1115- if (endptr == NULL && * s )
1116- return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1117- * eptr = s ;
11181140 * valptr = val ;
11191141 return TRUE;
11201142}
0 commit comments