Skip to content

Commit f394a63

Browse files
committed
utf8.c: Use \p{nv=float}
Now that the float data is available to us (in the previous commit), we can take advantage of it, and avoid swash creation. We just use the perl atof() to convert the input string to an NV, and then convert back to a string, but in guaranteed canonical form. Then we look that up.
1 parent 2709761 commit f394a63

File tree

1 file changed

+56
-11
lines changed

1 file changed

+56
-11
lines changed

utf8.c

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6029,11 +6029,15 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
60296029
* willy-nilly, as those could be a minus sign. Other stricter
60306030
* rules also apply. However, these properties all can have the
60316031
* rhs not be a number, in which case they contain at least one
6032-
* alphabetic. In those cases, the stricter rules don't apply. We
6033-
* first parse to look for alphas */
6032+
* alphabetic. In those cases, the stricter rules don't apply.
6033+
* But the numeric value property can have the alphas [Ee] to
6034+
* signify an exponent, and it is still a number with stricter
6035+
* rules. So look for an alpha that signifys not-strict */
60346036
stricter = TRUE;
60356037
for (k = i; k < len; k++) {
6036-
if (isALPHA(name[k])) {
6038+
if ( isALPHA(name[k])
6039+
&& (! is_nv || ! isALPHA_FOLD_EQ(name[k], 'E')))
6040+
{
60376041
stricter = FALSE;
60386042
break;
60396043
}
@@ -6186,17 +6190,58 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
61866190

61876191
/* If didn't find the property, we try again stripping off any initial
61886192
* 'In' or 'Is' */
6189-
if (! starts_with_In_or_Is) {
6190-
return NULL;
6191-
}
6193+
if (starts_with_In_or_Is) {
6194+
lookup_name += 2;
6195+
lookup_len -= 2;
6196+
equals_pos -= 2;
61926197

6193-
lookup_name += 2;
6194-
lookup_len -= 2;
6198+
table_index = match_uniprop((U8 *) lookup_name, lookup_len);
6199+
}
61956200

6196-
/* If still didn't find it, give up */
6197-
table_index = match_uniprop((U8 *) lookup_name, lookup_len);
61986201
if (table_index == 0) {
6199-
return NULL;
6202+
char * canonical;
6203+
6204+
/* If not found, and not the numeric value property, isn't a legal
6205+
* property */
6206+
if (! is_nv) {
6207+
return NULL;
6208+
}
6209+
6210+
/* But the numeric value property needs more work to decide. What
6211+
* we do is make sure we have the number in canonical form and look
6212+
* that up. */
6213+
6214+
{
6215+
6216+
/* Take the input, convert it to a
6217+
* NV, then create a canonical string representation of that
6218+
* NV. */
6219+
6220+
NV value;
6221+
6222+
/* Get the value */
6223+
if (my_atof3(lookup_name + equals_pos, &value,
6224+
lookup_len - equals_pos)
6225+
!= lookup_name + lookup_len)
6226+
{
6227+
return NULL;
6228+
}
6229+
6230+
/* If the value is an integer, the canonical value is integral */
6231+
if (Perl_ceil(value) == value) {
6232+
canonical = Perl_form(aTHX_ "nv=%.0" NVff, value);
6233+
}
6234+
else { /* Otherwise, it is %e with a known precision */
6235+
canonical = Perl_form(aTHX_ "nv=%.*" NVef,
6236+
PL_E_FORMAT_PRECISION, value);
6237+
}
6238+
}
6239+
6240+
/* Here, we have the number in canonical form. Try that */
6241+
table_index = match_uniprop((U8 *) canonical, strlen(canonical));
6242+
if (table_index == 0) {
6243+
return NULL;
6244+
}
62006245
}
62016246
}
62026247

0 commit comments

Comments
 (0)