Skip to content

Handle Unicode characters in parseFloat() #481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 175 additions & 93 deletions jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,147 +424,229 @@ ecma_builtin_global_object_parse_float (ecma_value_t this_arg __attr_unused___,
ecma_string_t *number_str_p = ecma_get_string_from_value (string_var);
lit_utf8_size_t str_size = ecma_string_get_size (number_str_p);

MEM_DEFINE_LOCAL_ARRAY (utf8_string_buff, str_size + 1, lit_utf8_byte_t);

ssize_t bytes_copied = ecma_string_to_utf8_string (number_str_p,
utf8_string_buff,
(ssize_t) str_size);
JERRY_ASSERT (bytes_copied >= 0);
utf8_string_buff[str_size] = LIT_BYTE_NULL;

/* 2. Find first non whitespace char. */
lit_utf8_size_t start = 0;
for (lit_utf8_size_t i = 0; i < str_size; i++)
if (str_size > 0)
{
if (!lit_char_is_white_space (utf8_string_buff[i])
&& !lit_char_is_line_terminator (utf8_string_buff[i]))
{
start = i;
break;
}
}
MEM_DEFINE_LOCAL_ARRAY (utf8_string_buff, str_size, lit_utf8_byte_t);

bool sign = false;
ssize_t bytes_copied = ecma_string_to_utf8_string (number_str_p,
utf8_string_buff,
(ssize_t) str_size);
JERRY_ASSERT (bytes_copied >= 0);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string_buff, str_size);

/* Check if sign is present. */
if (utf8_string_buff[start] == '-')
{
sign = true;
start++;
}
else if (utf8_string_buff[start] == '+')
{
start++;
}
lit_utf8_iterator_seek_eos (&iter);

lit_utf8_iterator_pos_t start = lit_utf8_iterator_get_pos (&iter);
lit_utf8_iterator_pos_t end = lit_utf8_iterator_get_pos (&iter);

ecma_number_t *ret_num_p = ecma_alloc_number ();
lit_utf8_iterator_seek_bos (&iter);

/* Check if string is equal to "Infinity". */
const lit_utf8_byte_t *infinity_utf8_str_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING_INFINITY_UL);

for (lit_utf8_size_t i = 0; infinity_utf8_str_p[i] == utf8_string_buff[start + i]; i++)
{
if (infinity_utf8_str_p[i + 1] == 0)
/* 2. Find first non whitespace char and set starting position. */
while (!lit_utf8_iterator_is_eos (&iter))
{
*ret_num_p = ecma_number_make_infinity (sign);
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
break;
ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);

if (!lit_char_is_white_space (current_char)
&& !lit_char_is_line_terminator (current_char))
{
lit_utf8_iterator_decr (&iter);
start = lit_utf8_iterator_get_pos (&iter);
break;
}
}
}

if (ecma_is_completion_value_empty (ret_value))
{
lit_utf8_size_t current = start;
lit_utf8_size_t end = str_size;
bool has_whole_part = false;
bool has_fraction_part = false;
bool sign = false;
ecma_char_t current;

if (lit_char_is_decimal_digit (utf8_string_buff[current]))
if (!lit_utf8_iterator_is_eos (&iter))
{
has_whole_part = true;
/* Check if sign is present. */
current = lit_utf8_iterator_read_next (&iter);
if (current == LIT_CHAR_MINUS)
{
sign = true;
}

/* Check digits of whole part. */
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
if (current == LIT_CHAR_MINUS || current == LIT_CHAR_PLUS)
{
if (!lit_char_is_decimal_digit (utf8_string_buff[current]))
{
break;
}
/* Set starting position to be after the sign character. */
start = lit_utf8_iterator_get_pos (&iter);
}
else
{
lit_utf8_iterator_decr (&iter);
}
}

ecma_number_t *ret_num_p = ecma_alloc_number ();

const lit_utf8_byte_t *infinity_utf8_str_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING_INFINITY_UL);
lit_utf8_iterator_t infinity_iter = lit_utf8_iterator_create (infinity_utf8_str_p,
sizeof (*infinity_utf8_str_p));

JERRY_ASSERT (!lit_utf8_iterator_is_eos (&infinity_iter));

/* Check if string is equal to "Infinity". */
while (!lit_utf8_iterator_is_eos (&iter)
&& (lit_utf8_iterator_read_next (&iter) == lit_utf8_iterator_read_next (&infinity_iter)))
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to use lit_compare_utf8_string_and_magic_string to compare with "Infinity", the code would be more compact:

if (lit_compare_utf8_string_and_magic_string (iter.buf_p + start.offset,
                                              iter.buf_size - start.offset,
                                              LIT_MAGIC_STRING_INFINITY_UL))
{
  ...
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't use lit_compare_utf8_string_and_magic_string here, because we have to check if a prefix of the string contains "Infinity". For example, "Infinityfoo" should also be valid.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I see.

if (lit_utf8_iterator_is_eos (&infinity_iter))
{
/* String matched Infinity. */
*ret_num_p = ecma_number_make_infinity (sign);
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
break;
}
}

end = current;
/* Reset to starting position. */
lit_utf8_iterator_seek (&iter, start);

/* Check decimal point. */
if (utf8_string_buff[current] == '.')
if (ecma_is_completion_value_empty (ret_value) && !lit_utf8_iterator_is_eos (&iter))
{
current++;
current = lit_utf8_iterator_read_next (&iter);

if (lit_char_is_decimal_digit (utf8_string_buff[current]))
bool has_whole_part = false;
bool has_fraction_part = false;

/* Check digits of whole part. */
if (lit_char_is_decimal_digit (current))
{
has_fraction_part = true;
has_whole_part = true;

/* Check digits of fractional part. */
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
while (!lit_utf8_iterator_is_eos (&iter))
{
if (!lit_char_is_decimal_digit (utf8_string_buff[current]))
current = lit_utf8_iterator_read_next (&iter);
if (!lit_char_is_decimal_digit (current))
{
lit_utf8_iterator_decr (&iter);
break;
}
}

end = current;
}
}

/* Check exponent. */
if ((utf8_string_buff[current] == 'e' || utf8_string_buff[current] == 'E')
&& (has_whole_part || has_fraction_part))
{
current++;
else
{
lit_utf8_iterator_decr (&iter);
}

/* Check sign of exponent. */
if (utf8_string_buff[current] == '-' || utf8_string_buff[current] == '+')
/* Set end position to the end of whole part. */
end = lit_utf8_iterator_get_pos (&iter);
if (!lit_utf8_iterator_is_eos (&iter))
{
current++;
current = lit_utf8_iterator_read_next (&iter);
}

if (lit_char_is_decimal_digit (utf8_string_buff[current]))
/* Check decimal point. */
if (current == LIT_CHAR_DOT && !lit_utf8_iterator_is_eos (&iter))
{
current = lit_utf8_iterator_read_next (&iter);

/* Check digits of exponent part. */
for (lit_utf8_size_t i = current; i < str_size; i++, current++)
if (lit_char_is_decimal_digit (current))
{
if (!lit_char_is_decimal_digit (utf8_string_buff[current]))
has_fraction_part = true;

/* Check digits of fractional part. */
while (!lit_utf8_iterator_is_eos (&iter))
{
break;
current = lit_utf8_iterator_read_next (&iter);
if (!lit_char_is_decimal_digit (current))
{
lit_utf8_iterator_decr (&iter);
break;
}
}

/* Set end position to end of fraction part. */
end = lit_utf8_iterator_get_pos (&iter);
}
else
{
lit_utf8_iterator_decr (&iter);
}
}
else
{
lit_utf8_iterator_decr (&iter);
}

end = current;
if (!lit_utf8_iterator_is_eos (&iter))
{
current = lit_utf8_iterator_read_next (&iter);
}
}

if (start == end)
{
*ret_num_p = ecma_number_make_nan ();
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
}
else
{
/* 5. */
*ret_num_p = ecma_utf8_string_to_number (utf8_string_buff + start, end - start);
/* Check exponent. */
if ((current == LIT_CHAR_LOWERCASE_E || current == LIT_CHAR_UPPERCASE_E)
&& (has_whole_part || has_fraction_part)
&& !lit_utf8_iterator_is_eos (&iter))
{
current = lit_utf8_iterator_read_next (&iter);

if (sign)
/* Check sign of exponent. */
if ((current == LIT_CHAR_PLUS || current == LIT_CHAR_MINUS)
&& !lit_utf8_iterator_is_eos (&iter))
{
current = lit_utf8_iterator_read_next (&iter);
}

if (lit_char_is_decimal_digit (current))
{
/* Check digits of exponent part. */
while (!lit_utf8_iterator_is_eos (&iter))
{
current = lit_utf8_iterator_read_next (&iter);
if (!lit_char_is_decimal_digit (current))
{
lit_utf8_iterator_decr (&iter);
break;
}
}

/* Set end position to end of exponent part. */
end = lit_utf8_iterator_get_pos (&iter);
}
}
else
{
lit_utf8_iterator_decr (&iter);
}

/* String did not contain a valid number. */
if (start.offset == end.offset)
{
*ret_num_p *= -1;
*ret_num_p = ecma_number_make_nan ();
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
}
else
{
/* 5. */
*ret_num_p = ecma_utf8_string_to_number (utf8_string_buff + start.offset,
(lit_utf8_size_t) (end.offset - start.offset));

if (sign)
{
*ret_num_p *= -1;
}

ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
}
}
/* String ended after sign character, or was empty after removing leading whitespace. */
else if (ecma_is_completion_value_empty (ret_value))
{
*ret_num_p = ecma_number_make_nan ();
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
}
MEM_FINALIZE_LOCAL_ARRAY (utf8_string_buff);
}
/* String length is zero. */
else
{
ecma_number_t *ret_num_p = ecma_alloc_number ();
*ret_num_p = ecma_number_make_nan ();
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (ret_num_p));
}

MEM_FINALIZE_LOCAL_ARRAY (utf8_string_buff);
ECMA_FINALIZE (string_var);

return ret_value;
Expand Down
19 changes: 19 additions & 0 deletions tests/jerry/global-parsefloat.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,31 @@ assert(parseFloat("Infinityfoo") === Infinity);
assert(parseFloat("-Infinityfoo") === -Infinity);
assert(isNaN(parseFloat("")));
assert(isNaN(parseFloat(".")));
assert(isNaN(parseFloat("..")));
assert(isNaN(parseFloat("+")));
assert(isNaN(parseFloat("-")));
assert(isNaN(parseFloat("e")));
assert(isNaN(parseFloat("a")));
assert(isNaN(parseFloat("e+")));
assert(isNaN(parseFloat("+e-")));
assert(isNaN(parseFloat(".e")));
assert(isNaN(parseFloat(".a")));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should also add test-cases like these:

parseFloat("1.1")
parseFloat("1..1")
parseFloat("1.1.2")

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

assert(isNaN(parseFloat("e3")));
assert(isNaN(parseFloat(".e3")));
assert(parseFloat("1..2") === 1);
assert(parseFloat("1.2.3") === 1.2);
assert(parseFloat("1.2ee3") === 1.2);
assert(parseFloat("0") === 0);
assert(parseFloat(".0") === 0);
assert(parseFloat("0.e3") === 0);
assert(parseFloat("0.0e3") === 0);
assert(parseFloat("1.2eA") === 1.2);
assert(parseFloat("1.ae3") === 1);
assert(parseFloat("\u00a0\u00a01.2e3") === 1200);
assert(parseFloat("\u2029\u2029\u00a01.2e\u00D0") === 1.2);
assert(isNaN(parseFloat("\u2029\u2029\u00a0\u00D01.2e3")));
assert(parseFloat("\u2029\u2029\u00a01.\u20292e\u00D0") === 1);
assert(isNaN(parseFloat("\u2029\u2029")));

var obj = new Object();
var arr = [3,4,5];
Expand Down