Skip to content

Commit 1c5ca4a

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 1000. For unlimited recursion depth use 0. Also added a build-option-test for the unlimited recursion depth. Fixes #2448 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos imiklos2@inf.u-szeged.hu
1 parent 685d8b3 commit 1c5ca4a

File tree

7 files changed

+93
-1
lines changed

7 files changed

+93
-1
lines changed

jerry-core/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9494
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9595
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9696
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
97+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT_N})
9798

9899
# Include directories
99100
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -242,6 +243,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
242243
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
243244
endif()
244245

246+
# RegExp recursion depth limit
247+
if(REGEXP_RECURSION_LIMIT_N)
248+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT_N})
249+
endif()
250+
245251
# RegExp byte-code dumps
246252
if(FEATURE_REGEXP_DUMP)
247253
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
364364
const lit_utf8_byte_t *str_p, /**< input string pointer */
365365
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
366366
{
367+
#ifdef REGEXP_RECURSION_LIMIT
368+
if (re_ctx_p->recursion_depth >= REGEXP_RECURSION_LIMIT)
369+
{
370+
ecma_value_t ret_value = ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
371+
return ret_value;
372+
}
373+
++re_ctx_p->recursion_depth;
374+
#endif /* REGEXP_RECURSION_LIMIT */
367375
const lit_utf8_byte_t *str_curr_p = str_p;
368376

369377
while (true)
@@ -535,6 +543,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
535543
if (!sub_str_p)
536544
{
537545
match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
546+
#ifdef REGEXP_RECURSION_LIMIT
547+
--re_ctx_p->recursion_depth;
548+
#endif /* REGEXP_RECURSION_LIMIT */
538549
if (ECMA_IS_VALUE_ERROR (match_value))
539550
{
540551
break;
@@ -553,6 +564,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
553564
{
554565
JERRY_TRACE_MSG ("match\n");
555566
match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
567+
#ifdef REGEXP_RECURSION_LIMIT
568+
--re_ctx_p->recursion_depth;
569+
#endif /* REGEXP_RECURSION_LIMIT */
556570
}
557571
else
558572
{
@@ -685,6 +699,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
685699
uint32_t offset = re_get_value (&bc_p);
686700
const lit_utf8_byte_t *sub_str_p = NULL;
687701
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
702+
#ifdef REGEXP_RECURSION_LIMIT
703+
--re_ctx_p->recursion_depth;
704+
#endif /* REGEXP_RECURSION_LIMIT */
688705

689706
if (ecma_is_value_true (match_value))
690707
{
@@ -770,6 +787,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
770787

771788
/* Try to match after the close paren if zero is allowed */
772789
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
790+
#ifdef REGEXP_RECURSION_LIMIT
791+
--re_ctx_p->recursion_depth;
792+
#endif /* REGEXP_RECURSION_LIMIT */
773793

774794
if (ecma_is_value_true (match_value))
775795
{
@@ -828,6 +848,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
828848
{
829849
offset = re_get_value (&bc_p);
830850
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
851+
#ifdef REGEXP_RECURSION_LIMIT
852+
--re_ctx_p->recursion_depth;
853+
#endif /* REGEXP_RECURSION_LIMIT */
831854

832855
if (ecma_is_value_true (match_value))
833856
{
@@ -852,6 +875,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
852875
{
853876
JERRY_ASSERT (end_bc_p);
854877
ecma_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, str_curr_p, &sub_str_p);
878+
#ifdef REGEXP_RECURSION_LIMIT
879+
--re_ctx_p->recursion_depth;
880+
#endif /* REGEXP_RECURSION_LIMIT */
855881

856882
if (ecma_is_value_true (match_value))
857883
{
@@ -906,6 +932,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
906932

907933
const lit_utf8_byte_t *sub_str_p = NULL;
908934
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
935+
#ifdef REGEXP_RECURSION_LIMIT
936+
--re_ctx_p->recursion_depth;
937+
#endif /* REGEXP_RECURSION_LIMIT */
909938

910939
if (ecma_is_value_true (match_value))
911940
{
@@ -975,6 +1004,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9751004
old_start_p = re_ctx_p->saved_p[start_idx];
9761005
re_ctx_p->saved_p[start_idx] = str_curr_p;
9771006
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
1007+
#ifdef REGEXP_RECURSION_LIMIT
1008+
--re_ctx_p->recursion_depth;
1009+
#endif /* REGEXP_RECURSION_LIMIT */
9781010

9791011
if (ecma_is_value_true (match_value))
9801012
{
@@ -999,6 +1031,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9991031
re_ctx_p->saved_p[start_idx] = str_curr_p;
10001032

10011033
match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
1034+
#ifdef REGEXP_RECURSION_LIMIT
1035+
--re_ctx_p->recursion_depth;
1036+
#endif /* REGEXP_RECURSION_LIMIT */
10021037

10031038
if (ecma_is_value_true (match_value))
10041039
{
@@ -1020,6 +1055,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10201055
{
10211056
/* Try to match the rest of the bytecode. */
10221057
ecma_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, str_curr_p, &sub_str_p);
1058+
#ifdef REGEXP_RECURSION_LIMIT
1059+
--re_ctx_p->recursion_depth;
1060+
#endif /* REGEXP_RECURSION_LIMIT */
10231061

10241062
if (ecma_is_value_true (match_value))
10251063
{
@@ -1055,6 +1093,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10551093
if (num_of_iter >= min)
10561094
{
10571095
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_curr_p, &sub_str_p);
1096+
#ifdef REGEXP_RECURSION_LIMIT
1097+
--re_ctx_p->recursion_depth;
1098+
#endif /* REGEXP_RECURSION_LIMIT */
10581099

10591100
if (ecma_is_value_true (match_value))
10601101
{
@@ -1068,6 +1109,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10681109
}
10691110

10701111
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
1112+
#ifdef REGEXP_RECURSION_LIMIT
1113+
--re_ctx_p->recursion_depth;
1114+
#endif /* REGEXP_RECURSION_LIMIT */
10711115

10721116
if (!ecma_is_value_true (match_value))
10731117
{
@@ -1103,6 +1147,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11031147
while (num_of_iter < max)
11041148
{
11051149
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_curr_p, &sub_str_p);
1150+
#ifdef REGEXP_RECURSION_LIMIT
1151+
--re_ctx_p->recursion_depth;
1152+
#endif /* REGEXP_RECURSION_LIMIT */
11061153

11071154
if (!ecma_is_value_true (match_value))
11081155
{
@@ -1121,6 +1168,9 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11211168
while (num_of_iter >= min)
11221169
{
11231170
ecma_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_curr_p, &sub_str_p);
1171+
#ifdef REGEXP_RECURSION_LIMIT
1172+
--re_ctx_p->recursion_depth;
1173+
#endif /* REGEXP_RECURSION_LIMIT */
11241174

11251175
if (ecma_is_value_true (match_value))
11261176
{
@@ -1232,6 +1282,9 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12321282
re_ctx.input_start_p = input_curr_p;
12331283
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12341284
re_ctx.input_end_p = input_end_p;
1285+
#ifdef REGEXP_RECURSION_LIMIT
1286+
re_ctx.recursion_depth = 0;
1287+
#endif /* REGEXP_RECURSION_LIMIT */
12351288

12361289
/* 1. Read bytecode header and init regexp matcher context. */
12371290
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ typedef struct
4646
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4747
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
4848
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
49+
#ifdef REGEXP_RECURSION_LIMIT
50+
uint32_t recursion_depth; /**< recursion depth limit */
51+
#endif /* REGEXP_RECURSION_LIMIT */
4952
uint32_t num_of_captures; /**< number of capture groups */
5053
uint32_t num_of_non_captures; /**< number of non-capture groups */
5154
uint32_t *num_of_iterations_p; /**< number of iterations */

jerry-core/parser/regexp/re-compiler.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,14 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
249249
uint32_t idx;
250250
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
251251
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
252+
#ifdef REGEXP_RECURSION_LIMIT
253+
if (re_ctx_p->recursion_depth >= REGEXP_RECURSION_LIMIT)
254+
{
255+
ret_value = ecma_raise_range_error ("RegExp executor recursion limit is exceeded.");
256+
return ret_value;
257+
}
258+
++re_ctx_p->recursion_depth;
259+
#endif /* REGEXP_RECURSION_LIMIT */
252260

253261
uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
254262
bool should_loop = true;
@@ -274,6 +282,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
274282
JERRY_TRACE_MSG ("Compile a capture group start (idx: %u)\n", (unsigned int) idx);
275283

276284
ret_value = re_parse_alternative (re_ctx_p, false);
285+
#ifdef REGEXP_RECURSION_LIMIT
286+
--re_ctx_p->recursion_depth;
287+
#endif /* REGEXP_RECURSION_LIMIT */
277288

278289
if (ecma_is_value_empty (ret_value))
279290
{
@@ -288,6 +299,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
288299
JERRY_TRACE_MSG ("Compile a non-capture group start (idx: %u)\n", (unsigned int) idx);
289300

290301
ret_value = re_parse_alternative (re_ctx_p, false);
302+
#ifdef REGEXP_RECURSION_LIMIT
303+
--re_ctx_p->recursion_depth;
304+
#endif /* REGEXP_RECURSION_LIMIT */
291305

292306
if (ecma_is_value_empty (ret_value))
293307
{
@@ -356,6 +370,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
356370
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);
357371

358372
ret_value = re_parse_alternative (re_ctx_p, false);
373+
#ifdef REGEXP_RECURSION_LIMIT
374+
--re_ctx_p->recursion_depth;
375+
#endif /* REGEXP_RECURSION_LIMIT */
359376

360377
if (ecma_is_value_empty (ret_value))
361378
{
@@ -373,6 +390,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
373390
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);
374391

375392
ret_value = re_parse_alternative (re_ctx_p, false);
393+
#ifdef REGEXP_RECURSION_LIMIT
394+
--re_ctx_p->recursion_depth;
395+
#endif /* REGEXP_RECURSION_LIMIT */
376396

377397
if (ecma_is_value_empty (ret_value))
378398
{
@@ -559,7 +579,9 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
559579
re_ctx.flags = flags;
560580
re_ctx.highest_backref = 0;
561581
re_ctx.num_of_non_captures = 0;
562-
582+
#ifdef REGEXP_RECURSION_LIMIT
583+
re_ctx.recursion_depth = 0;
584+
#endif /* REGEXP_RECURSION_LIMIT */
563585
re_bytecode_ctx_t bc_ctx;
564586
bc_ctx.block_start_p = NULL;
565587
bc_ctx.block_end_p = NULL;

jerry-core/parser/regexp/re-compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ typedef struct
4141
uint32_t num_of_captures; /**< number of capture groups */
4242
uint32_t num_of_non_captures; /**< number of non-capture groups */
4343
uint32_t highest_backref; /**< highest backreference */
44+
#ifdef REGEXP_RECURSION_LIMIT
45+
uint32_t recursion_depth; /**< recursion depth limit */
46+
#endif /* REGEXP_RECURSION_LIMIT */
4447
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
4548
re_token_t current_token; /**< current token */
4649
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */

tools/build.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ def devhelp(helpstring):
126126
help='specify profile file')
127127
coregrp.add_argument('--regexp-strict-mode', metavar='X', choices=['ON', 'OFF'], type=str.upper,
128128
help=devhelp('enable regexp strict mode (%(choices)s)'))
129+
coregrp.add_argument('--regexp-recursion-limit', metavar='N', type=int, default=1000,
130+
help='RegExp recursion depth limit (0 for unlimited, default=1000)')
129131
coregrp.add_argument('--show-opcodes', metavar='X', choices=['ON', 'OFF'], type=str.upper,
130132
help=devhelp('enable parser byte-code dumps (%(choices)s)'))
131133
coregrp.add_argument('--show-regexp-opcodes', metavar='X', choices=['ON', 'OFF'], type=str.upper,
@@ -194,6 +196,7 @@ def build_options_append(cmakeopt, cliarg):
194196
build_options_append('FEATURE_MEM_STRESS_TEST', arguments.mem_stress_test)
195197
build_options_append('FEATURE_PROFILE', arguments.profile)
196198
build_options_append('FEATURE_REGEXP_STRICT_MODE', arguments.regexp_strict_mode)
199+
build_options_append('REGEXP_RECURSION_LIMIT_N', arguments.regexp_recursion_limit)
197200
build_options_append('FEATURE_PARSER_DUMP', arguments.show_opcodes)
198201
build_options_append('FEATURE_REGEXP_DUMP', arguments.show_regexp_opcodes)
199202
build_options_append('FEATURE_SNAPSHOT_EXEC', arguments.snapshot_exec)

tools/run-tests.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@
142142
['--jerry-cmdline-test=on']),
143143
Options('buildoption_test-cmdline_snapshot',
144144
['--jerry-cmdline-snapshot=on']),
145+
Options('buildoption_test-regexp_recursion_limit',
146+
['--regexp-recursion-limit=0']),
145147
]
146148

147149
def get_arguments():

0 commit comments

Comments
 (0)