Skip to content

Fix invalid control escapes during RegExp parsing. #552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 12, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 32 additions & 21 deletions jerry-core/parser/regexp/re-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser contex
}
case LIT_CHAR_RIGHT_SQUARE:
{
if (!char_class_in)
if (char_class_in)
{
char_class_in--;
}
Expand Down Expand Up @@ -378,18 +378,22 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
}
else if (ch == LIT_CHAR_LOWERCASE_C)
{
if (lit_utf8_iterator_is_eos (iter_p))
if (!lit_utf8_iterator_is_eos (iter_p))
{
return ecma_raise_syntax_error ("invalid character class, end of string after '\\c'");
}

ch = lit_utf8_iterator_read_next (iter_p);
ch = lit_utf8_iterator_peek_next (iter_p);

if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
{
/* See ECMA-262 v5, 15.10.2.10 (Point 3) */
ch = (ch % 32);
if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|| (ch >= LIT_CHAR_0 && ch <= LIT_CHAR_9))
{
/* See ECMA-262 v5, 15.10.2.10 (Point 3) */
ch = (ch % 32);
lit_utf8_iterator_incr (iter_p);
}
else
{
ch = LIT_CHAR_LOWERCASE_C;
}
}
}
else if (ch == LIT_CHAR_LOWERCASE_X)
Expand Down Expand Up @@ -627,19 +631,26 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
}
else if (ch == LIT_CHAR_LOWERCASE_C)
{
if (lit_utf8_iterator_is_eos (iter_p))
if (!lit_utf8_iterator_is_eos (iter_p))
{
out_token_p->value = ch;
break;
}

ch = lit_utf8_iterator_peek_next (iter_p);
ch = lit_utf8_iterator_peek_next (iter_p);

if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
{
out_token_p->value = (ch % 32);
lit_utf8_iterator_incr (iter_p);
}
else
{
out_token_p->value = LIT_CHAR_BACKSLASH;
lit_utf8_iterator_decr (iter_p);
}
}
else
{
out_token_p->value = (ch % 32);
lit_utf8_iterator_advance (iter_p, 1);
out_token_p->value = LIT_CHAR_BACKSLASH;
lit_utf8_iterator_decr (iter_p);
}
}
else if (ch == LIT_CHAR_LOWERCASE_X
Expand Down
15 changes: 15 additions & 0 deletions tests/jerry/regexp-simple-atom-and-iterations.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,18 @@ assert (/([abc]+)\40([d-f]+)\12\1/.exec("abc def\nabc") == "abc def\nabc,abc,def

var expected = "8765432911,8,7,6,5,4,3,2,9,1";
assert (/(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)\9(\d)\9/.exec("8765432911") == expected);

r = /\c/;
assert (r.exec ("\\c") == "\\c");

r = /[\c]/;
assert (r.exec ("c") == "c");

r = /[\c1]/;
assert (r.exec ("\u0011") == "\u0011");

r = /\c3/;
assert (r.exec ("\\c3") == "\\c3");

r = /\cIasd/;
assert (r.exec ("\tasd") == "\tasd");