Skip to content

Regexp refactoring and optimizations #910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions jerry-core/ecma/base/ecma-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "jrt.h"
#include "jrt-libc-includes.h"
#include "jrt-bit-fields.h"
#include "re-compiler.h"
#include "vm-defines.h"
#include "vm-stack.h"

Expand Down Expand Up @@ -549,6 +550,11 @@ ecma_gc_run (void)
ecma_gc_objects_lists[ECMA_GC_COLOR_BLACK] = NULL;

ecma_gc_visited_flip_flag = !ecma_gc_visited_flip_flag;

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
/* Free RegExp bytecodes stored in cache */
re_cache_gc_run ();
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
} /* ecma_gc_run */

/**
Expand Down
18 changes: 12 additions & 6 deletions jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
* See also:
* ECMA-262 v5, B.2.5.1
*
* @return ecma value
* @return undefined - if compiled successfully
* error ecma value - otherwise
*
* Returned value must be freed with ecma_free_value.
*/
static ecma_value_t
Expand Down Expand Up @@ -240,8 +242,10 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
* See also:
* ECMA-262 v5, 15.10.6.2
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
* @return array object containing the results - if the matched
* null - otherwise
*
* May raise error, so returned value must be freed with ecma_free_value.
*/
static ecma_value_t
ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
Expand Down Expand Up @@ -314,8 +318,10 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
* See also:
* ECMA-262 v5, 15.10.6.3
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
* @return true - if match is not null
* false - otherwise
*
* May raise error, so returned value must be freed with ecma_free_value.
*/
static ecma_value_t
ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */
Expand Down Expand Up @@ -439,4 +445,4 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume
* @}
*/

#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
4 changes: 3 additions & 1 deletion jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
* Handle calling [[Call]] of built-in RegExp object
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
ecma_value_t
ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< arguments list */
Expand All @@ -58,6 +59,7 @@ ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< ar
* Handle calling [[Construct]] of built-in RegExp object
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
ecma_value_t
ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /**< arguments list */
Expand Down Expand Up @@ -152,4 +154,4 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
* @}
*/

#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1,
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
Expand All @@ -84,7 +84,7 @@ ROUTINE (LIT_MAGIC_STRING_TRIM, ecma_builtin_string_prototype_object_trim, 0, 0)

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN
ROUTINE (LIT_MAGIC_STRING_SUBSTR, ecma_builtin_string_prototype_object_substr, 2, 2)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */

#undef OBJECT_ID
#undef SIMPLE_VALUE
Expand Down
4 changes: 2 additions & 2 deletions jerry-core/ecma/operations/ecma-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -958,13 +958,13 @@ ecma_object_get_class_name (ecma_object_t *obj_p) /**< object */
{
return LIT_MAGIC_STRING_DATE_UL;
}
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
case ECMA_BUILTIN_ID_REGEXP_PROTOTYPE:
{
return LIT_MAGIC_STRING_REGEXP_UL;
}
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
default:
{
JERRY_ASSERT (ecma_builtin_is (obj_p, ECMA_BUILTIN_ID_GLOBAL));
Expand Down
46 changes: 34 additions & 12 deletions jerry-core/ecma/operations/ecma-regexp-object.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@
#define RE_GLOBAL_START_IDX 0
#define RE_GLOBAL_END_IDX 1

/**
* Check if a RegExp opcode is a capture group or not
*/
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)

/**
* Parse RegExp flags (global, ignoreCase, multiline)
*
* See also: ECMA-262 v5, 15.10.4.1
*
* @return ecma value
* @return empty ecma value - if parsed successfully
* error ecma value - otherwise
*
* Returned value must be freed with ecma_free_value
*/
ecma_value_t
Expand Down Expand Up @@ -123,7 +130,7 @@ re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags
return ret_value;
} /* re_parse_regexp_flags */

/*
/**
* Initializes the source, global, ignoreCase, multiline, and lastIndex properties of RegExp instance.
*/
void
Expand Down Expand Up @@ -223,11 +230,11 @@ re_initialize_props (ecma_object_t *re_obj_p, /**< RegExp obejct */
*
* See also: ECMA-262 v5, 15.10.4.1
*
* @return ecma value
* @return constructed RegExp object
* Returned value must be freed with ecma_free_value
*/
ecma_value_t
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< input pattern */
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< RegExp bytecode */
{
JERRY_ASSERT (bytecode_p != NULL);

Expand Down Expand Up @@ -259,7 +266,9 @@ ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**<
*
* See also: ECMA-262 v5, 15.10.4.1
*
* @return ecma value
* @return constructed RegExp object - if pattern and flags were parsed successfully
* error ecma value - otherwise
*
* Returned value must be freed with ecma_free_value
*/
ecma_value_t
Expand Down Expand Up @@ -367,8 +376,10 @@ re_canonicalize (ecma_char_t ch, /**< character */
* See also:
* ECMA-262 v5, 15.10.2.1
*
* @return ecma value
* Returned value must be freed with ecma_free_value
* @return true - if matched
* false - otherwise
*
* May raise error, so returned value must be freed with ecma_free_value
*/
static ecma_value_t
re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
Expand Down Expand Up @@ -400,7 +411,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}

bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
ecma_char_t ch1 = (ecma_char_t) re_get_char (&bc_p); /* Already canonicalized. */
ecma_char_t ch2 = re_canonicalize (lit_utf8_read_next (&str_curr_p), is_ignorecase);
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);

Expand Down Expand Up @@ -613,8 +624,8 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */

while (num_of_ranges)
{
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
ecma_char_t ch1 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
ecma_char_t ch2 = re_canonicalize (re_get_char (&bc_p), is_ignorecase);
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
num_of_ranges, ch1, ch2, curr_ch);

Expand Down Expand Up @@ -698,6 +709,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
uint32_t offset = re_get_value (&bc_p);
lit_utf8_byte_t *sub_str_p = NULL;
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);

if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
Expand All @@ -707,6 +719,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
return match_value;
}

bc_p += offset;
old_bc_p = bc_p;
}
Expand Down Expand Up @@ -839,6 +852,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
offset = re_get_value (&bc_p);
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);

if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
Expand All @@ -848,6 +862,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
return match_value;
}

bc_p += offset;
old_bc_p = bc_p;
}
Expand Down Expand Up @@ -915,6 +930,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */

lit_utf8_byte_t *sub_str_p = NULL;
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);

if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
Expand Down Expand Up @@ -1225,7 +1241,13 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
* RegExp helper function to start the recursive matching algorithm
* and create the result Array object
*
* @return ecma value
* See also:
* ECMA-262 v5, 15.10.6.2
*
* @return array object - if matched
* null - otherwise
*
* May raise error.
* Returned value must be freed with ecma_free_value
*/
ecma_value_t
Expand Down Expand Up @@ -1475,4 +1497,4 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
* @}
*/

#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
49 changes: 20 additions & 29 deletions jerry-core/ecma/operations/ecma-regexp-object.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,48 +32,39 @@
/**
* RegExp flags
*/
#define RE_FLAG_GLOBAL (1u << 1) /* ECMA-262 v5, 15.10.7.2 */
#define RE_FLAG_IGNORE_CASE (1u << 2) /* ECMA-262 v5, 15.10.7.3 */
#define RE_FLAG_MULTILINE (1u << 3) /* ECMA-262 v5, 15.10.7.4 */
typedef enum
{
RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */
RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
RE_FLAG_MULTILINE = (1u << 3) /**< ECMA-262 v5, 15.10.7.4 */
} re_flags_t;

/**
* RegExp executor context
*/
typedef struct
{
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t *num_of_iterations_p; /**< number of iterations */
uint16_t flags; /**< RegExp flags */
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t *num_of_iterations_p; /**< number of iterations */
uint16_t flags; /**< RegExp flags */
} re_matcher_ctx_t;

extern ecma_value_t
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);

extern ecma_value_t
ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);

extern ecma_value_t
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);

extern ecma_char_t
re_canonicalize (ecma_char_t, bool);
extern void
re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);

extern ecma_value_t
re_parse_regexp_flags (ecma_string_t *, uint16_t *);

extern void
re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);
ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *);
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *, ecma_string_t *);
ecma_value_t ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
ecma_char_t re_canonicalize (ecma_char_t, bool);
void re_set_result_array_properties (ecma_object_t *, ecma_string_t *, uint32_t, int32_t);
ecma_value_t re_parse_regexp_flags (ecma_string_t *, uint16_t *);
void re_initialize_props (ecma_object_t *, ecma_string_t *, uint16_t);

/**
* @}
* @}
*/

#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
#endif /* !ECMA_REGEXP_OBJECT_H */
2 changes: 1 addition & 1 deletion jerry-core/jerry.c
Original file line number Diff line number Diff line change
Expand Up @@ -1655,9 +1655,9 @@ jerry_cleanup (void)

bool is_show_mem_stats = ((jerry_flags & JERRY_FLAG_MEM_STATS) != 0);

vm_finalize ();
ecma_finalize ();
lit_finalize ();
vm_finalize ();
mem_finalize (is_show_mem_stats);
} /* jerry_cleanup */

Expand Down
1 change: 0 additions & 1 deletion jerry-core/parser/js/js-lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,6 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
ecma_value_t completion_value;

ecma_string_t *pattern_str_p = ecma_new_ecma_string_from_utf8 (regex_start_p, length);
// FIXME: check return value of 're_compile_bytecode' and throw an error
completion_value = re_compile_bytecode (&re_bytecode_p,
pattern_str_p,
current_flags);
Expand Down
Loading