Skip to content

Commit 59659c0

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 1000. For unlimited recursion depth use 0. Also added a build-option-test for the unlimited recursion depth. Fixes #2448 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos imiklos2@inf.u-szeged.hu
1 parent 9ab33e8 commit 59659c0

File tree

8 files changed

+79
-1
lines changed

8 files changed

+79
-1
lines changed

jerry-core/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
3939
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
4040
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
4141
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
42+
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recurion depth")
4243

4344
# Option overrides
4445
if(USING_MSVC)
@@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9495
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9596
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9697
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
98+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9799

98100
# Include directories
99101
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -242,6 +244,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
242244
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
243245
endif()
244246

247+
# RegExp recursion depth limit
248+
if(REGEXP_RECURSION_LIMIT)
249+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
250+
endif()
251+
245252
# RegExp byte-code dumps
246253
if(FEATURE_REGEXP_DUMP)
247254
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
*/
6464
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
6565

66+
/**
67+
* Check RegExp recursion depth limit
68+
*/
69+
#ifdef REGEXP_RECURSION_LIMIT
70+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
71+
#endif /* REGEXP_RECURSION_LIMIT */
72+
6673
/**
6774
* Parse RegExp flags (global, ignoreCase, multiline)
6875
*
@@ -364,6 +371,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
364371
const lit_utf8_byte_t *str_p, /**< input string pointer */
365372
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
366373
{
374+
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
367375
const lit_utf8_byte_t *str_curr_p = str_p;
368376

369377
while (true)
@@ -376,12 +384,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
376384
{
377385
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
378386
*out_str_p = str_curr_p;
387+
REGEXP_RECURSION_COUNTER_INCREASE ();
379388
return ECMA_VALUE_TRUE; /* match */
380389
}
381390
case RE_OP_CHAR:
382391
{
383392
if (str_curr_p >= re_ctx_p->input_end_p)
384393
{
394+
REGEXP_RECURSION_COUNTER_INCREASE ();
385395
return ECMA_VALUE_FALSE; /* fail */
386396
}
387397

@@ -393,6 +403,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
393403
if (ch1 != ch2)
394404
{
395405
JERRY_TRACE_MSG ("fail\n");
406+
REGEXP_RECURSION_COUNTER_INCREASE ();
396407
return ECMA_VALUE_FALSE; /* fail */
397408
}
398409

@@ -404,6 +415,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
404415
{
405416
if (str_curr_p >= re_ctx_p->input_end_p)
406417
{
418+
REGEXP_RECURSION_COUNTER_INCREASE ();
407419
return ECMA_VALUE_FALSE; /* fail */
408420
}
409421

@@ -413,6 +425,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
413425
if (lit_char_is_line_terminator (ch))
414426
{
415427
JERRY_TRACE_MSG ("fail\n");
428+
REGEXP_RECURSION_COUNTER_INCREASE ();
416429
return ECMA_VALUE_FALSE; /* fail */
417430
}
418431

@@ -432,6 +445,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
432445
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
433446
{
434447
JERRY_TRACE_MSG ("fail\n");
448+
REGEXP_RECURSION_COUNTER_INCREASE ();
435449
return ECMA_VALUE_FALSE; /* fail */
436450
}
437451

@@ -442,6 +456,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
442456
}
443457

444458
JERRY_TRACE_MSG ("fail\n");
459+
REGEXP_RECURSION_COUNTER_INCREASE ();
445460
return ECMA_VALUE_FALSE; /* fail */
446461
}
447462
case RE_OP_ASSERT_END:
@@ -457,6 +472,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
457472
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
458473
{
459474
JERRY_TRACE_MSG ("fail\n");
475+
REGEXP_RECURSION_COUNTER_INCREASE ();
460476
return ECMA_VALUE_FALSE; /* fail */
461477
}
462478

@@ -467,6 +483,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
467483
}
468484

469485
JERRY_TRACE_MSG ("fail\n");
486+
REGEXP_RECURSION_COUNTER_INCREASE ();
470487
return ECMA_VALUE_FALSE; /* fail */
471488
}
472489
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -498,6 +515,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
498515
if (is_wordchar_left == is_wordchar_right)
499516
{
500517
JERRY_TRACE_MSG ("fail\n");
518+
REGEXP_RECURSION_COUNTER_INCREASE ();
501519
return ECMA_VALUE_FALSE; /* fail */
502520
}
503521
}
@@ -509,6 +527,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
509527
if (is_wordchar_left != is_wordchar_right)
510528
{
511529
JERRY_TRACE_MSG ("fail\n");
530+
REGEXP_RECURSION_COUNTER_INCREASE ();
512531
return ECMA_VALUE_FALSE; /* fail */
513532
}
514533
}
@@ -563,6 +582,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
563582

564583
if (!ECMA_IS_VALUE_ERROR (match_value))
565584
{
585+
REGEXP_RECURSION_COUNTER_INCREASE ();
566586
if (ecma_is_value_true (match_value))
567587
{
568588
*out_str_p = sub_str_p;
@@ -588,6 +608,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
588608
if (str_curr_p >= re_ctx_p->input_end_p)
589609
{
590610
JERRY_TRACE_MSG ("fail\n");
611+
REGEXP_RECURSION_COUNTER_INCREASE ();
591612
return ECMA_VALUE_FALSE; /* fail */
592613
}
593614

@@ -618,6 +639,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
618639
if (!is_match)
619640
{
620641
JERRY_TRACE_MSG ("fail\n");
642+
REGEXP_RECURSION_COUNTER_INCREASE ();
621643
return ECMA_VALUE_FALSE; /* fail */
622644
}
623645
}
@@ -627,6 +649,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
627649
if (is_match)
628650
{
629651
JERRY_TRACE_MSG ("fail\n");
652+
REGEXP_RECURSION_COUNTER_INCREASE ();
630653
return ECMA_VALUE_FALSE; /* fail */
631654
}
632655
}
@@ -657,6 +680,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
657680
if (str_curr_p >= re_ctx_p->input_end_p)
658681
{
659682
JERRY_TRACE_MSG ("fail\n");
683+
REGEXP_RECURSION_COUNTER_INCREASE ();
660684
return ECMA_VALUE_FALSE; /* fail */
661685
}
662686

@@ -666,6 +690,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
666690
if (ch1 != ch2)
667691
{
668692
JERRY_TRACE_MSG ("fail\n");
693+
REGEXP_RECURSION_COUNTER_INCREASE ();
669694
return ECMA_VALUE_FALSE; /* fail */
670695
}
671696
}
@@ -689,6 +714,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
689714
if (ecma_is_value_true (match_value))
690715
{
691716
*out_str_p = sub_str_p;
717+
REGEXP_RECURSION_COUNTER_INCREASE ();
692718
return match_value; /* match */
693719
}
694720
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -703,13 +729,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
703729
bc_p = old_bc_p;
704730

705731
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
732+
REGEXP_RECURSION_COUNTER_INCREASE ();
706733
return ECMA_VALUE_FALSE; /* fail */
707734
}
708735
case RE_OP_SAVE_AND_MATCH:
709736
{
710737
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
711738
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
712739
*out_str_p = str_curr_p;
740+
REGEXP_RECURSION_COUNTER_INCREASE ();
713741
return ECMA_VALUE_TRUE; /* match */
714742
}
715743
case RE_OP_ALTERNATIVE:
@@ -774,6 +802,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
774802
if (ecma_is_value_true (match_value))
775803
{
776804
*out_str_p = sub_str_p;
805+
REGEXP_RECURSION_COUNTER_INCREASE ();
777806
return match_value; /* match */
778807
}
779808
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -832,6 +861,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
832861
if (ecma_is_value_true (match_value))
833862
{
834863
*out_str_p = sub_str_p;
864+
REGEXP_RECURSION_COUNTER_INCREASE ();
835865
return match_value; /* match */
836866
}
837867
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -856,6 +886,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
856886
if (ecma_is_value_true (match_value))
857887
{
858888
*out_str_p = sub_str_p;
889+
REGEXP_RECURSION_COUNTER_INCREASE ();
859890
return match_value; /* match */
860891
}
861892
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -865,6 +896,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
865896
}
866897

867898
re_ctx_p->saved_p[start_idx] = old_start_p;
899+
REGEXP_RECURSION_COUNTER_INCREASE ();
868900
return ECMA_VALUE_FALSE; /* fail */
869901
}
870902
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -910,6 +942,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
910942
if (ecma_is_value_true (match_value))
911943
{
912944
*out_str_p = sub_str_p;
945+
REGEXP_RECURSION_COUNTER_INCREASE ();
913946
return match_value; /* match */
914947
}
915948
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -958,6 +991,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
958991
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
959992
&& str_curr_p== re_ctx_p->saved_p[start_idx])
960993
{
994+
REGEXP_RECURSION_COUNTER_INCREASE ();
961995
return ECMA_VALUE_FALSE; /* fail */
962996
}
963997

@@ -979,6 +1013,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9791013
if (ecma_is_value_true (match_value))
9801014
{
9811015
*out_str_p = sub_str_p;
1016+
REGEXP_RECURSION_COUNTER_INCREASE ();
9821017
return match_value; /* match */
9831018
}
9841019
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1003,6 +1038,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10031038
if (ecma_is_value_true (match_value))
10041039
{
10051040
*out_str_p = sub_str_p;
1041+
REGEXP_RECURSION_COUNTER_INCREASE ();
10061042
return match_value; /* match */
10071043
}
10081044
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1024,6 +1060,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10241060
if (ecma_is_value_true (match_value))
10251061
{
10261062
*out_str_p = sub_str_p;
1063+
REGEXP_RECURSION_COUNTER_INCREASE ();
10271064
return match_value; /* match */
10281065
}
10291066
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1035,6 +1072,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10351072
/* restore if fails */
10361073
re_ctx_p->saved_p[end_idx] = old_end_p;
10371074
re_ctx_p->num_of_iterations_p[iter_idx]--;
1075+
REGEXP_RECURSION_COUNTER_INCREASE ();
10381076
return ECMA_VALUE_FALSE; /* fail */
10391077
}
10401078
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1059,6 +1097,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10591097
if (ecma_is_value_true (match_value))
10601098
{
10611099
*out_str_p = sub_str_p;
1100+
REGEXP_RECURSION_COUNTER_INCREASE ();
10621101
return match_value; /* match */
10631102
}
10641103
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1082,6 +1121,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10821121
str_curr_p = sub_str_p;
10831122
num_of_iter++;
10841123
}
1124+
REGEXP_RECURSION_COUNTER_INCREASE ();
10851125
return ECMA_VALUE_FALSE; /* fail */
10861126
}
10871127
default:
@@ -1125,6 +1165,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11251165
if (ecma_is_value_true (match_value))
11261166
{
11271167
*out_str_p = sub_str_p;
1168+
REGEXP_RECURSION_COUNTER_INCREASE ();
11281169
return match_value; /* match */
11291170
}
11301171
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1140,6 +1181,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11401181
lit_utf8_read_prev (&str_curr_p);
11411182
num_of_iter--;
11421183
}
1184+
REGEXP_RECURSION_COUNTER_INCREASE ();
11431185
return ECMA_VALUE_FALSE; /* fail */
11441186
}
11451187
}
@@ -1232,6 +1274,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12321274
re_ctx.input_start_p = input_curr_p;
12331275
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12341276
re_ctx.input_end_p = input_end_p;
1277+
REGEXP_RECURSION_COUNTER_INIT ();
12351278

12361279
/* 1. Read bytecode header and init regexp matcher context. */
12371280
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,19 @@
1818

1919
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
2020

21+
#ifdef REGEXP_RECURSION_LIMIT
22+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() if (--re_ctx_p->recursion_counter == 0) \
23+
{ \
24+
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp executor recursion limit is exceeded.")); \
25+
}
26+
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
27+
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
28+
#else
29+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
30+
#define REGEXP_RECURSION_COUNTER_INCREASE()
31+
#define REGEXP_RECURSION_COUNTER_INIT()
32+
#endif /* REGEXP_RECURSION_LIMIT */
33+
2134
#include "ecma-globals.h"
2235
#include "re-compiler.h"
2336

@@ -46,6 +59,9 @@ typedef struct
4659
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4760
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
4861
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
62+
#ifdef REGEXP_RECURSION_LIMIT
63+
uint32_t recursion_counter; /**< RegExp recursion counter */
64+
#endif /* REGEXP_RECURSION_LIMIT */
4965
uint32_t num_of_captures; /**< number of capture groups */
5066
uint32_t num_of_non_captures; /**< number of non-capture groups */
5167
uint32_t *num_of_iterations_p; /**< number of iterations */

jerry-core/parser/regexp/re-compiler.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ static ecma_value_t
246246
re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
247247
bool expect_eof) /**< expect end of file */
248248
{
249+
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
249250
uint32_t idx;
250251
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
251252
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
@@ -440,6 +441,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
440441
else
441442
{
442443
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
444+
REGEXP_RECURSION_COUNTER_INCREASE ();
443445
should_loop = false;
444446
}
445447
break;
@@ -453,6 +455,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
453455
else
454456
{
455457
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
458+
REGEXP_RECURSION_COUNTER_INCREASE ();
456459
should_loop = false;
457460
}
458461

@@ -559,7 +562,7 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
559562
re_ctx.flags = flags;
560563
re_ctx.highest_backref = 0;
561564
re_ctx.num_of_non_captures = 0;
562-
565+
REGEXP_RECURSION_COUNTER_INIT ();
563566
re_bytecode_ctx_t bc_ctx;
564567
bc_ctx.block_start_p = NULL;
565568
bc_ctx.block_end_p = NULL;

0 commit comments

Comments
 (0)