Skip to content

Commit f3a3d35

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 0, which is for unlimited recursion depth. Also added a build-option-test. Fixes #2448 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos imiklos2@inf.u-szeged.hu
1 parent a1595fa commit f3a3d35

File tree

8 files changed

+98
-1
lines changed

8 files changed

+98
-1
lines changed

jerry-core/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
3939
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
4040
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
4141
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
42+
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recursion depth")
4243

4344
# Option overrides
4445
if(USING_MSVC)
@@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9495
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9596
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9697
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
98+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9799

98100
# Include directories
99101
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -242,6 +244,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
242244
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
243245
endif()
244246

247+
# RegExp recursion depth limit
248+
if(REGEXP_RECURSION_LIMIT)
249+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
250+
endif()
251+
245252
# RegExp byte-code dumps
246253
if(FEATURE_REGEXP_DUMP)
247254
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
*/
6464
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
6565

66+
/**
67+
* Check RegExp recursion depth limit
68+
*/
69+
#ifdef REGEXP_RECURSION_LIMIT
70+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
71+
#endif /* REGEXP_RECURSION_LIMIT */
72+
6673
/**
6774
* Parse RegExp flags (global, ignoreCase, multiline)
6875
*
@@ -347,6 +354,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
347354
const lit_utf8_byte_t *str_p, /**< input string pointer */
348355
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
349356
{
357+
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
350358
const lit_utf8_byte_t *str_curr_p = str_p;
351359

352360
while (true)
@@ -359,12 +367,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
359367
{
360368
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
361369
*out_str_p = str_curr_p;
370+
REGEXP_RECURSION_COUNTER_INCREASE ();
362371
return ECMA_VALUE_TRUE; /* match */
363372
}
364373
case RE_OP_CHAR:
365374
{
366375
if (str_curr_p >= re_ctx_p->input_end_p)
367376
{
377+
REGEXP_RECURSION_COUNTER_INCREASE ();
368378
return ECMA_VALUE_FALSE; /* fail */
369379
}
370380

@@ -376,6 +386,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
376386
if (ch1 != ch2)
377387
{
378388
JERRY_TRACE_MSG ("fail\n");
389+
REGEXP_RECURSION_COUNTER_INCREASE ();
379390
return ECMA_VALUE_FALSE; /* fail */
380391
}
381392

@@ -387,6 +398,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
387398
{
388399
if (str_curr_p >= re_ctx_p->input_end_p)
389400
{
401+
REGEXP_RECURSION_COUNTER_INCREASE ();
390402
return ECMA_VALUE_FALSE; /* fail */
391403
}
392404

@@ -396,6 +408,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
396408
if (lit_char_is_line_terminator (ch))
397409
{
398410
JERRY_TRACE_MSG ("fail\n");
411+
REGEXP_RECURSION_COUNTER_INCREASE ();
399412
return ECMA_VALUE_FALSE; /* fail */
400413
}
401414

@@ -415,6 +428,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
415428
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
416429
{
417430
JERRY_TRACE_MSG ("fail\n");
431+
REGEXP_RECURSION_COUNTER_INCREASE ();
418432
return ECMA_VALUE_FALSE; /* fail */
419433
}
420434

@@ -425,6 +439,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
425439
}
426440

427441
JERRY_TRACE_MSG ("fail\n");
442+
REGEXP_RECURSION_COUNTER_INCREASE ();
428443
return ECMA_VALUE_FALSE; /* fail */
429444
}
430445
case RE_OP_ASSERT_END:
@@ -440,6 +455,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
440455
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
441456
{
442457
JERRY_TRACE_MSG ("fail\n");
458+
REGEXP_RECURSION_COUNTER_INCREASE ();
443459
return ECMA_VALUE_FALSE; /* fail */
444460
}
445461

@@ -450,6 +466,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
450466
}
451467

452468
JERRY_TRACE_MSG ("fail\n");
469+
REGEXP_RECURSION_COUNTER_INCREASE ();
453470
return ECMA_VALUE_FALSE; /* fail */
454471
}
455472
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -481,6 +498,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
481498
if (is_wordchar_left == is_wordchar_right)
482499
{
483500
JERRY_TRACE_MSG ("fail\n");
501+
REGEXP_RECURSION_COUNTER_INCREASE ();
484502
return ECMA_VALUE_FALSE; /* fail */
485503
}
486504
}
@@ -492,6 +510,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
492510
if (is_wordchar_left != is_wordchar_right)
493511
{
494512
JERRY_TRACE_MSG ("fail\n");
513+
REGEXP_RECURSION_COUNTER_INCREASE ();
495514
return ECMA_VALUE_FALSE; /* fail */
496515
}
497516
}
@@ -546,6 +565,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
546565

547566
if (!ECMA_IS_VALUE_ERROR (match_value))
548567
{
568+
REGEXP_RECURSION_COUNTER_INCREASE ();
549569
if (ecma_is_value_true (match_value))
550570
{
551571
*out_str_p = sub_str_p;
@@ -571,6 +591,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
571591
if (str_curr_p >= re_ctx_p->input_end_p)
572592
{
573593
JERRY_TRACE_MSG ("fail\n");
594+
REGEXP_RECURSION_COUNTER_INCREASE ();
574595
return ECMA_VALUE_FALSE; /* fail */
575596
}
576597

@@ -601,6 +622,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
601622
if (!is_match)
602623
{
603624
JERRY_TRACE_MSG ("fail\n");
625+
REGEXP_RECURSION_COUNTER_INCREASE ();
604626
return ECMA_VALUE_FALSE; /* fail */
605627
}
606628
}
@@ -610,6 +632,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
610632
if (is_match)
611633
{
612634
JERRY_TRACE_MSG ("fail\n");
635+
REGEXP_RECURSION_COUNTER_INCREASE ();
613636
return ECMA_VALUE_FALSE; /* fail */
614637
}
615638
}
@@ -640,6 +663,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
640663
if (str_curr_p >= re_ctx_p->input_end_p)
641664
{
642665
JERRY_TRACE_MSG ("fail\n");
666+
REGEXP_RECURSION_COUNTER_INCREASE ();
643667
return ECMA_VALUE_FALSE; /* fail */
644668
}
645669

@@ -649,6 +673,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
649673
if (ch1 != ch2)
650674
{
651675
JERRY_TRACE_MSG ("fail\n");
676+
REGEXP_RECURSION_COUNTER_INCREASE ();
652677
return ECMA_VALUE_FALSE; /* fail */
653678
}
654679
}
@@ -672,6 +697,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
672697
if (ecma_is_value_true (match_value))
673698
{
674699
*out_str_p = sub_str_p;
700+
REGEXP_RECURSION_COUNTER_INCREASE ();
675701
return match_value; /* match */
676702
}
677703
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -686,13 +712,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
686712
bc_p = old_bc_p;
687713

688714
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
715+
REGEXP_RECURSION_COUNTER_INCREASE ();
689716
return ECMA_VALUE_FALSE; /* fail */
690717
}
691718
case RE_OP_SAVE_AND_MATCH:
692719
{
693720
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
694721
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
695722
*out_str_p = str_curr_p;
723+
REGEXP_RECURSION_COUNTER_INCREASE ();
696724
return ECMA_VALUE_TRUE; /* match */
697725
}
698726
case RE_OP_ALTERNATIVE:
@@ -757,6 +785,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
757785
if (ecma_is_value_true (match_value))
758786
{
759787
*out_str_p = sub_str_p;
788+
REGEXP_RECURSION_COUNTER_INCREASE ();
760789
return match_value; /* match */
761790
}
762791
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -815,6 +844,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
815844
if (ecma_is_value_true (match_value))
816845
{
817846
*out_str_p = sub_str_p;
847+
REGEXP_RECURSION_COUNTER_INCREASE ();
818848
return match_value; /* match */
819849
}
820850
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -839,6 +869,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
839869
if (ecma_is_value_true (match_value))
840870
{
841871
*out_str_p = sub_str_p;
872+
REGEXP_RECURSION_COUNTER_INCREASE ();
842873
return match_value; /* match */
843874
}
844875
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -848,6 +879,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
848879
}
849880

850881
re_ctx_p->saved_p[start_idx] = old_start_p;
882+
REGEXP_RECURSION_COUNTER_INCREASE ();
851883
return ECMA_VALUE_FALSE; /* fail */
852884
}
853885
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -893,6 +925,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
893925
if (ecma_is_value_true (match_value))
894926
{
895927
*out_str_p = sub_str_p;
928+
REGEXP_RECURSION_COUNTER_INCREASE ();
896929
return match_value; /* match */
897930
}
898931
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -941,6 +974,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
941974
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
942975
&& str_curr_p== re_ctx_p->saved_p[start_idx])
943976
{
977+
REGEXP_RECURSION_COUNTER_INCREASE ();
944978
return ECMA_VALUE_FALSE; /* fail */
945979
}
946980

@@ -962,6 +996,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
962996
if (ecma_is_value_true (match_value))
963997
{
964998
*out_str_p = sub_str_p;
999+
REGEXP_RECURSION_COUNTER_INCREASE ();
9651000
return match_value; /* match */
9661001
}
9671002
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -986,6 +1021,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9861021
if (ecma_is_value_true (match_value))
9871022
{
9881023
*out_str_p = sub_str_p;
1024+
REGEXP_RECURSION_COUNTER_INCREASE ();
9891025
return match_value; /* match */
9901026
}
9911027
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1007,6 +1043,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10071043
if (ecma_is_value_true (match_value))
10081044
{
10091045
*out_str_p = sub_str_p;
1046+
REGEXP_RECURSION_COUNTER_INCREASE ();
10101047
return match_value; /* match */
10111048
}
10121049
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1018,6 +1055,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10181055
/* restore if fails */
10191056
re_ctx_p->saved_p[end_idx] = old_end_p;
10201057
re_ctx_p->num_of_iterations_p[iter_idx]--;
1058+
REGEXP_RECURSION_COUNTER_INCREASE ();
10211059
return ECMA_VALUE_FALSE; /* fail */
10221060
}
10231061
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1042,6 +1080,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10421080
if (ecma_is_value_true (match_value))
10431081
{
10441082
*out_str_p = sub_str_p;
1083+
REGEXP_RECURSION_COUNTER_INCREASE ();
10451084
return match_value; /* match */
10461085
}
10471086
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1065,6 +1104,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10651104
str_curr_p = sub_str_p;
10661105
num_of_iter++;
10671106
}
1107+
REGEXP_RECURSION_COUNTER_INCREASE ();
10681108
return ECMA_VALUE_FALSE; /* fail */
10691109
}
10701110
default:
@@ -1108,6 +1148,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11081148
if (ecma_is_value_true (match_value))
11091149
{
11101150
*out_str_p = sub_str_p;
1151+
REGEXP_RECURSION_COUNTER_INCREASE ();
11111152
return match_value; /* match */
11121153
}
11131154
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1123,6 +1164,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11231164
lit_utf8_read_prev (&str_curr_p);
11241165
num_of_iter--;
11251166
}
1167+
REGEXP_RECURSION_COUNTER_INCREASE ();
11261168
return ECMA_VALUE_FALSE; /* fail */
11271169
}
11281170
}
@@ -1211,6 +1253,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12111253
re_ctx.input_start_p = input_curr_p;
12121254
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12131255
re_ctx.input_end_p = input_end_p;
1256+
REGEXP_RECURSION_COUNTER_INIT ();
12141257

12151258
/* 1. Read bytecode header and init regexp matcher context. */
12161259
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,38 @@
1818

1919
#ifndef CONFIG_DISABLE_REGEXP_BUILTIN
2020

21+
#ifdef REGEXP_RECURSION_LIMIT
22+
23+
/**
24+
* Decrease the recursion counter and test it.
25+
* If the counter reaches the limit of the recursion depth
26+
* it will return with a range error.
27+
*/
28+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() \
29+
do \
30+
{ \
31+
if (--re_ctx_p->recursion_counter == 0) \
32+
{ \
33+
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp executor recursion limit is exceeded.")); \
34+
} \
35+
} \
36+
while (0)
37+
38+
/**
39+
* Increase the recursion counter.
40+
*/
41+
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
42+
43+
/**
44+
* Set the recursion counter to the max depth of the recursion.
45+
*/
46+
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
47+
#else /* !REGEXP_RECURSION_LIMIT */
48+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
49+
#define REGEXP_RECURSION_COUNTER_INCREASE()
50+
#define REGEXP_RECURSION_COUNTER_INIT()
51+
#endif /* REGEXP_RECURSION_LIMIT */
52+
2153
#include "ecma-globals.h"
2254
#include "re-compiler.h"
2355

@@ -48,6 +80,9 @@ typedef struct
4880
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4981
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
5082
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
83+
#ifdef REGEXP_RECURSION_LIMIT
84+
uint32_t recursion_counter; /**< RegExp recursion counter */
85+
#endif /* REGEXP_RECURSION_LIMIT */
5186
uint32_t num_of_captures; /**< number of capture groups */
5287
uint32_t num_of_non_captures; /**< number of non-capture groups */
5388
uint32_t *num_of_iterations_p; /**< number of iterations */

0 commit comments

Comments
 (0)