Skip to content

Commit cfb4635

Browse files
committed
Implement String.prototype.search, and some minor regexp refactors.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
1 parent c715a7c commit cfb4635

File tree

6 files changed

+211
-65
lines changed

6 files changed

+211
-65
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -67,30 +67,13 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
6767
{
6868
ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value);
6969

70-
ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
71-
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
72-
re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);
73-
7470
ECMA_TRY_CATCH (input_str_value,
7571
ecma_op_to_string (arg),
7672
ret_value);
7773

78-
ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value);
79-
80-
/* Convert ecma_String_t *to regexp_bytecode_t* */
81-
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);
82-
83-
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t);
84-
85-
ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
86-
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size);
87-
88-
ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter);
89-
90-
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
74+
ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);
9175

9276
ECMA_FINALIZE (input_str_value);
93-
9477
ECMA_FINALIZE (obj_this);
9578
}
9679

jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
#include "jrt-libc-includes.h"
3232
#include "lit-char-helpers.h"
3333

34+
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
35+
#include "ecma-regexp-object.h"
36+
#endif
37+
3438
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN
3539

3640
#define ECMA_BUILTINS_INTERNAL
@@ -560,15 +564,10 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
560564

561565
JERRY_ASSERT (ecma_is_value_boolean (global_value));
562566

563-
ecma_value_t exec_arguments[1] = { this_to_string_value };
564-
565567
if (!ecma_is_value_true (global_value))
566568
{
567569
/* 7. */
568-
ret_value = ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
569-
regexp_value,
570-
exec_arguments,
571-
1);
570+
ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false);
572571
}
573572
else
574573
{
@@ -608,10 +607,7 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
608607
{
609608
/* 8.f.i. */
610609
ECMA_TRY_CATCH (exec_value,
611-
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
612-
regexp_value,
613-
exec_arguments,
614-
1),
610+
ecma_regexp_exec_helper (regexp_value, this_to_string_value, false),
615611
ret_value);
616612

617613
if (ecma_is_value_null (exec_value))
@@ -829,13 +825,10 @@ ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_
829825

830826
if (context_p->is_regexp)
831827
{
832-
ecma_value_t exec_arguments[1] = { context_p->input_string };
833-
834828
ECMA_TRY_CATCH (match_value,
835-
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
836-
context_p->regexp_or_search_string,
837-
exec_arguments,
838-
1),
829+
ecma_regexp_exec_helper (context_p->regexp_or_search_string,
830+
context_p->input_string,
831+
false),
839832
ret_value);
840833

841834
if (!ecma_is_value_null (match_value))
@@ -1504,7 +1497,6 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
15041497

15051498
return ret_value;
15061499
} /* ecma_builtin_string_prototype_object_replace */
1507-
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
15081500

15091501
/**
15101502
* The String.prototype object's 'search' routine
@@ -1517,11 +1509,91 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
15171509
*/
15181510
static ecma_completion_value_t
15191511
ecma_builtin_string_prototype_object_search (ecma_value_t this_arg, /**< this argument */
1520-
ecma_value_t arg) /**< routine's argument */
1512+
ecma_value_t regexp_arg) /**< routine's argument */
15211513
{
1522-
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg);
1514+
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
1515+
1516+
/* 1. */
1517+
ECMA_TRY_CATCH (check_coercible_value,
1518+
ecma_op_check_object_coercible (this_arg),
1519+
ret_value);
1520+
1521+
/* 2. */
1522+
ECMA_TRY_CATCH (to_string_value,
1523+
ecma_op_to_string (this_arg),
1524+
ret_value);
1525+
1526+
ecma_value_t regexp_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
1527+
1528+
/* 3. */
1529+
if (ecma_is_value_object (regexp_arg)
1530+
&& ecma_object_get_class_name (ecma_get_object_from_value (regexp_arg)) == LIT_MAGIC_STRING_REGEXP_UL)
1531+
{
1532+
regexp_value = ecma_copy_value (regexp_arg, true);
1533+
}
1534+
else
1535+
{
1536+
/* 4. */
1537+
ecma_value_t regexp_arguments[1] = { regexp_arg };
1538+
1539+
ECMA_TRY_CATCH (new_regexp_value,
1540+
ecma_builtin_regexp_dispatch_construct (regexp_arguments, 1),
1541+
ret_value);
1542+
1543+
regexp_value = ecma_copy_value (new_regexp_value, true);
1544+
1545+
ECMA_FINALIZE (new_regexp_value);
1546+
}
1547+
1548+
/* 5. */
1549+
if (ecma_is_completion_value_empty (ret_value))
1550+
{
1551+
ECMA_TRY_CATCH (match_result,
1552+
ecma_regexp_exec_helper (regexp_value, to_string_value, true),
1553+
ret_value);
1554+
1555+
ecma_number_t offset = -1;
1556+
1557+
if (!ecma_is_value_null (match_result))
1558+
{
1559+
JERRY_ASSERT (ecma_is_value_object (match_result));
1560+
1561+
ecma_object_t *match_object_p = ecma_get_object_from_value (match_result);
1562+
ecma_string_t *index_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);
1563+
1564+
ECMA_TRY_CATCH (index_value,
1565+
ecma_op_object_get (match_object_p, index_string_p),
1566+
ret_value);
1567+
1568+
JERRY_ASSERT (ecma_is_value_number (index_value));
1569+
1570+
offset = *ecma_get_number_from_value (index_value);
1571+
1572+
ECMA_FINALIZE (index_value);
1573+
ecma_deref_ecma_string (index_string_p);
1574+
}
1575+
1576+
if (ecma_is_completion_value_empty (ret_value))
1577+
{
1578+
ecma_number_t *offset_number_p = ecma_alloc_number ();
1579+
*offset_number_p = offset;
1580+
1581+
ret_value = ecma_make_normal_completion_value (ecma_make_number_value (offset_number_p));
1582+
}
1583+
1584+
ECMA_FINALIZE (match_result);
1585+
ecma_free_value (regexp_value, true);
1586+
}
1587+
1588+
ECMA_FINALIZE (to_string_value);
1589+
ECMA_FINALIZE (check_coercible_value);
1590+
1591+
/* 6. */
1592+
return ret_value;
15231593
} /* ecma_builtin_string_prototype_object_search */
15241594

1595+
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
1596+
15251597
/**
15261598
* The String.prototype object's 'slice' routine
15271599
*

jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.inc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec
7171
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
7272
ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1)
7373
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
74+
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
7475
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
7576

76-
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
7777
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
7878
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
7979
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)

jerry-core/ecma/operations/ecma-regexp-object.cpp

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,19 +1176,45 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
11761176
* Returned value must be freed with ecma_free_completion_value
11771177
*/
11781178
ecma_completion_value_t
1179-
ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
1180-
re_bytecode_t *bc_p, /**< start of the RegExp bytecode */
1181-
lit_utf8_iterator_t *iter_p) /**< input string iterator */
1179+
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
1180+
ecma_value_t input_string, /**< input string */
1181+
bool ignore_global) /**< ignore global flag */
11821182
{
11831183
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
1184+
1185+
JERRY_ASSERT (ecma_is_value_object (regexp_value));
1186+
JERRY_ASSERT (ecma_is_value_string (input_string));
1187+
1188+
ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);
1189+
1190+
JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL);
1191+
1192+
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p,
1193+
ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
1194+
re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);
1195+
1196+
ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);
1197+
lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p);
1198+
1199+
MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t);
1200+
1201+
ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size);
1202+
lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size);
1203+
11841204
re_matcher_ctx_t re_ctx;
1185-
re_ctx.input_start_p = iter_p->buf_p;
1186-
re_ctx.input_end_p = iter_p->buf_p + iter_p->buf_size;
1205+
re_ctx.input_start_p = iterator.buf_p;
1206+
re_ctx.input_end_p = iterator.buf_p + iterator.buf_size;
11871207
re_ctx.match_limit = 0;
11881208
re_ctx.recursion_depth = 0;
11891209

11901210
/* 1. Read bytecode header and init regexp matcher context. */
11911211
re_ctx.flags = (uint8_t) re_get_value (&bc_p);
1212+
1213+
if (ignore_global)
1214+
{
1215+
re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL;
1216+
}
1217+
11921218
JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
11931219
re_ctx.flags & RE_FLAG_GLOBAL,
11941220
re_ctx.flags & RE_FLAG_IGNORE_CASE,
@@ -1217,22 +1243,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
12171243
bool is_match = false;
12181244
re_ctx.num_of_iterations_p = num_of_iter_p;
12191245
int32_t index = 0;
1220-
ecma_length_t input_str_len = lit_utf8_string_length (iter_p->buf_p, iter_p->buf_size);
1246+
ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size);
12211247

1222-
if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
1248+
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
12231249
{
12241250
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
1225-
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p);
1251+
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p);
12261252

12271253
ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value)
12281254
index = ecma_number_to_int32 (lastindex_num);
12291255

1230-
JERRY_ASSERT (iter_p->buf_pos.offset == 0 && !iter_p->buf_pos.is_non_bmp_middle);
1231-
if (!lit_utf8_iterator_is_eos (iter_p)
1256+
JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle);
1257+
if (!lit_utf8_iterator_is_eos (&iterator)
12321258
&& index <= (int32_t) input_str_len
12331259
&& index > 0)
12341260
{
1235-
lit_utf8_iterator_advance (iter_p, (ecma_length_t) index);
1261+
lit_utf8_iterator_advance (&iterator, (ecma_length_t) index);
12361262
}
12371263
ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num);
12381264
ecma_deref_ecma_string (magic_str_p);
@@ -1245,42 +1271,45 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
12451271
{
12461272
if (index < 0 || index > (int32_t) input_str_len)
12471273
{
1248-
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
1249-
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
1250-
*lastindex_num_p = ECMA_NUMBER_ZERO;
1251-
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
1252-
ecma_dealloc_number (lastindex_num_p);
1253-
ecma_deref_ecma_string (magic_str_p);
1274+
if (re_ctx.flags & RE_FLAG_GLOBAL)
1275+
{
1276+
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
1277+
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
1278+
*lastindex_num_p = ECMA_NUMBER_ZERO;
1279+
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
1280+
ecma_dealloc_number (lastindex_num_p);
1281+
ecma_deref_ecma_string (magic_str_p);
1282+
}
12541283

12551284
is_match = false;
12561285
break;
12571286
}
12581287
else
12591288
{
1260-
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, *iter_p, &sub_iter), ret_value);
1289+
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value);
12611290

12621291
if (ecma_is_value_true (match_value))
12631292
{
12641293
is_match = true;
12651294
break;
12661295
}
12671296

1268-
if (!lit_utf8_iterator_is_eos (iter_p))
1297+
if (!lit_utf8_iterator_is_eos (&iterator))
12691298
{
1270-
lit_utf8_iterator_advance (iter_p, 1);
1299+
lit_utf8_iterator_advance (&iterator, 1);
12711300
}
12721301
index++;
12731302

12741303
ECMA_FINALIZE (match_value);
12751304
}
12761305
}
12771306

1278-
if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
1307+
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
12791308
{
12801309
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
12811310
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
12821311
*lastindex_num_p = sub_iter.buf_pos.offset;
1283-
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
1312+
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
12841313
ecma_dealloc_number (lastindex_num_p);
12851314
ecma_deref_ecma_string (magic_str_p);
12861315
}
@@ -1299,9 +1328,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
12991328
{
13001329
ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2);
13011330

1302-
/* Note: 'iter_p->buf_p == NULL' means the input is empty string */
1331+
/* Note: 'iterator.buf_p == NULL' means the input is empty string */
13031332
if (((re_ctx.saved_p[i].buf_p && re_ctx.saved_p[i + 1].buf_p)
1304-
|| (!iter_p->buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
1333+
|| (!iterator.buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
13051334
&& re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset)
13061335
{
13071336
ecma_length_t capture_str_len;
@@ -1336,8 +1365,10 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
13361365
ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL));
13371366
}
13381367
}
1368+
13391369
MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p);
13401370
MEM_FINALIZE_LOCAL_ARRAY (saved_p);
1371+
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
13411372

13421373
return ret_value;
13431374
} /* ecma_regexp_exec_helper */

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,7 @@ extern ecma_completion_value_t
5959
ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p);
6060

6161
extern ecma_completion_value_t
62-
ecma_regexp_exec_helper (ecma_object_t *obj_p,
63-
re_bytecode_t *bc_p,
64-
lit_utf8_iterator_t *iter_p);
62+
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
6563

6664
/**
6765
* @}

0 commit comments

Comments
 (0)