Skip to content

Commit fe63e79

Browse files
committed
Global object unescape routine
JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
1 parent 5a09ff2 commit fe63e79

File tree

4 files changed

+148
-1
lines changed

4 files changed

+148
-1
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,122 @@ ecma_builtin_global_object_escape (ecma_value_t this_arg __attr_unused___, /**<
13291329
return ret_value;
13301330
} /* ecma_builtin_global_object_escape */
13311331

1332+
/**
1333+
* The Global object's 'unescape' routine
1334+
*
1335+
* See also:
1336+
* ECMA-262 v5, B.2.2
1337+
*
1338+
* @return completion value
1339+
* Returned value must be freed with ecma_free_completion_value.
1340+
*/
1341+
static ecma_completion_value_t
1342+
ecma_builtin_global_object_unescape (ecma_value_t this_arg __attr_unused___, /**< this argument */
1343+
ecma_value_t arg) /**< routine's first argument */
1344+
{
1345+
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
1346+
1347+
/* 1. */
1348+
ECMA_TRY_CATCH (string, ecma_op_to_string (arg), ret_value);
1349+
ecma_string_t *input_string_p = ecma_get_string_from_value (string);
1350+
/* 2. */
1351+
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
1352+
1353+
/* 3. */
1354+
MEM_DEFINE_LOCAL_ARRAY (input_start_p, input_size, lit_utf8_byte_t);
1355+
ssize_t sz = ecma_string_to_utf8_string (input_string_p, input_start_p, (ssize_t) (input_size));
1356+
JERRY_ASSERT (sz >= 0);
1357+
1358+
lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_start_p, input_size);
1359+
/* 4. */
1360+
/* The length of input string is always greater than output string
1361+
* so we re-use the input string buffer.
1362+
* E.g. %xx is 3 byte long, and the maximum is 0xff, which encoded
1363+
* as 2 bytes in UTF8. Etc. */
1364+
lit_utf8_byte_t *output_char_p = input_start_p;
1365+
1366+
/* The state of parsing that tells us where we are in an escape pattern.
1367+
* 0 we are outside of pattern,
1368+
* 1 found '%', start of pattern,
1369+
* 2 found first hex digit of '%xy' pattern
1370+
* 3 found valid '%xy' pattern
1371+
* 4 found 'u', start of '%uwxyz' pattern
1372+
* 5-7 found hex digits of '%uwxyz' pattern
1373+
* 8 found valid '%uwxyz' pattern
1374+
*/
1375+
uint8_t status = 0;
1376+
lit_code_point_t high_surrogate = 0;
1377+
lit_code_point_t hex_digits = 0;
1378+
/* 5. */
1379+
while (!lit_utf8_iterator_is_eos (&iterator))
1380+
{
1381+
/* 6. */
1382+
lit_code_point_t code_point = lit_utf8_iterator_read_next (&iterator);
1383+
1384+
/* 7-8. */
1385+
if (status == 0 && code_point == LIT_CHAR_PERCENT)
1386+
{
1387+
/* Found '%' char, start of escape sequence. */
1388+
status = 1;
1389+
}
1390+
/* 9-10. */
1391+
else if (status == 1 && code_point == LIT_CHAR_LOWERCASE_U)
1392+
{
1393+
/* Found 'u' char after '%'. */
1394+
status = 4;
1395+
}
1396+
else if (status > 0 && lit_char_is_hex_digit ((ecma_char_t) code_point))
1397+
{
1398+
/* Found hexadecimal digit in escape sequence. */
1399+
hex_digits = hex_digits * 16 + lit_char_hex_to_int ((ecma_char_t) code_point);
1400+
status++;
1401+
}
1402+
1403+
/* 11-17. Found valid '%uwxyz' or '%xy' escape. */
1404+
if (status == 8 || status == 3)
1405+
{
1406+
output_char_p -= (status == 3) ? 2 : 5;
1407+
status = 0;
1408+
code_point = (ecma_char_t) hex_digits;
1409+
hex_digits = 0;
1410+
}
1411+
1412+
/* Handle surrogate pairs. */
1413+
bool is_non_bmp_middle = iterator.buf_pos.is_non_bmp_middle;
1414+
if (!high_surrogate && lit_is_code_unit_high_surrogate ((ecma_char_t) code_point))
1415+
{
1416+
high_surrogate = code_point;
1417+
1418+
if (is_non_bmp_middle)
1419+
{
1420+
code_point = lit_utf8_iterator_read_next (&iterator);
1421+
}
1422+
}
1423+
1424+
if (high_surrogate && lit_is_code_unit_low_surrogate ((ecma_char_t) code_point))
1425+
{
1426+
output_char_p -= is_non_bmp_middle ? 0 : 3;
1427+
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) high_surrogate,
1428+
(ecma_char_t) code_point);
1429+
high_surrogate = 0;
1430+
}
1431+
1432+
/* Copying character. */
1433+
lit_utf8_size_t lit_size = lit_code_point_to_utf8 (code_point, output_char_p);
1434+
output_char_p += lit_size;
1435+
JERRY_ASSERT (output_char_p - input_start_p <= iterator.buf_pos.offset);
1436+
}
1437+
1438+
lit_utf8_size_t output_length = (lit_utf8_size_t) (output_char_p - input_start_p);
1439+
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (input_start_p, output_length);
1440+
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
1441+
1442+
MEM_FINALIZE_LOCAL_ARRAY (input_start_p);
1443+
1444+
ECMA_FINALIZE (string);
1445+
return ret_value;
1446+
} /* ecma_builtin_global_object_unescape */
1447+
13321448
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
13331449

13341450
/**

jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ ROUTINE (LIT_MAGIC_STRING_PARSE_INT, ecma_builtin_global_object_parse_int, 2, 2)
237237

238238
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN
239239
ROUTINE (LIT_MAGIC_STRING_ESCAPE, ecma_builtin_global_object_escape, 1, 1)
240+
ROUTINE (LIT_MAGIC_STRING_UNESCAPE, ecma_builtin_global_object_unescape, 1, 1)
240241
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
241242

242243
#undef OBJECT_ID

jerry-core/lit/lit-magic-strings.inc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI_COMPONENT, "decodeURIComponent
7373
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI, "encodeURI")
7474
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI_COMPONENT, "encodeURIComponent")
7575
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape")
76+
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNESCAPE, "unescape")
7677
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_PROTOTYPE_OF_UL, "getPrototypeOf")
7778
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_DESCRIPTOR_UL, "getOwnPropertyDescriptor")
7879
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_NAMES_UL, "getOwnPropertyNames")

tests/jerry/global-escaping.js

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,33 @@ assert (escape("\x80\x95\xaf\xfe\xff") === "%80%95%AF%FE%FF");
2828
assert (escape("\u0100\ud800\udc00") === "%u0100%uD800%uDC00");
2929

3030
assert (escape({}) === "%5Bobject%20Object%5D");
31-
assert (escape(true) === "true")
31+
assert (escape(true) === "true");
32+
33+
// Unescaping
34+
35+
assert (unescape ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F") ===
36+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f");
37+
assert (unescape("%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F") ===
38+
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f");
39+
assert (unescape("%20%21%22%23%24%25%26%27%28%29*+%2C-./0123456789%3A%3B%3C%3D%3E%3F@ABCDEFGHIJKLMN") ===
40+
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN");
41+
assert (unescape("OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F") ===
42+
"OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F");
43+
assert (unescape("%80%95%AF%FE%FF") === "\x80\x95\xaf\xfe\xff");
44+
assert (unescape("%ud800") === "\ud800");
45+
assert (unescape("\ud800") === "\ud800");
46+
assert (unescape("\ud800\udc00") === "\ud800\udc00");
47+
assert (unescape("%ud800%udc00") === "\ud800\udc00");
48+
assert (unescape("\ud800%udc00") === "\ud800\udc00");
49+
assert (unescape("%ud800\udc00") === "\ud800\udc00");
50+
51+
assert (unescape({}) === "[object Object]");
52+
assert (unescape(true) === "true")
53+
assert (unescape() === "undefined");
54+
assert (unescape(1985) === "1985");
55+
assert (unescape("%5#%uu") === "%5#%uu");
56+
57+
// Inversion
58+
59+
var str = "\u0001\u0000\uFFFF";
60+
assert (unescape(escape(str)) === str);

0 commit comments

Comments
 (0)