Skip to content

Commit ebf605f

Browse files
author
Istvan Miklos
committed
Add RegExp recursion depth limit
The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose default value is 0, which is for unlimited recursion depth. Also added a build-option-test. Fixes #2448 Fixes #2190 JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos imiklos2@inf.u-szeged.hu
1 parent 0b3d5ed commit ebf605f

File tree

7 files changed

+105
-0
lines changed

7 files changed

+105
-0
lines changed

jerry-core/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
3939
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
4040
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
4141
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
42+
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recursion depth")
4243

4344
# Option overrides
4445
if(USING_MSVC)
@@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
9495
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
9596
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
9697
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
98+
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
9799

98100
# Include directories
99101
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -228,6 +230,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
228230
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
229231
endif()
230232

233+
# RegExp recursion depth limit
234+
if(REGEXP_RECURSION_LIMIT)
235+
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
236+
endif()
237+
231238
# RegExp byte-code dumps
232239
if(FEATURE_REGEXP_DUMP)
233240
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

jerry-core/ecma/operations/ecma-regexp-object.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
*/
6464
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
6565

66+
/*
67+
* Check RegExp recursion depth limit
68+
*/
69+
#ifdef REGEXP_RECURSION_LIMIT
70+
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
71+
#endif /* REGEXP_RECURSION_LIMIT */
72+
6673
/**
6774
* Parse RegExp flags (global, ignoreCase, multiline)
6875
*
@@ -344,6 +351,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
344351
const lit_utf8_byte_t *str_p, /**< input string pointer */
345352
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
346353
{
354+
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
347355
const lit_utf8_byte_t *str_curr_p = str_p;
348356

349357
while (true)
@@ -356,12 +364,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
356364
{
357365
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
358366
*out_str_p = str_curr_p;
367+
REGEXP_RECURSION_COUNTER_INCREASE ();
359368
return ECMA_VALUE_TRUE; /* match */
360369
}
361370
case RE_OP_CHAR:
362371
{
363372
if (str_curr_p >= re_ctx_p->input_end_p)
364373
{
374+
REGEXP_RECURSION_COUNTER_INCREASE ();
365375
return ECMA_VALUE_FALSE; /* fail */
366376
}
367377

@@ -373,6 +383,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
373383
if (ch1 != ch2)
374384
{
375385
JERRY_TRACE_MSG ("fail\n");
386+
REGEXP_RECURSION_COUNTER_INCREASE ();
376387
return ECMA_VALUE_FALSE; /* fail */
377388
}
378389

@@ -384,6 +395,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
384395
{
385396
if (str_curr_p >= re_ctx_p->input_end_p)
386397
{
398+
REGEXP_RECURSION_COUNTER_INCREASE ();
387399
return ECMA_VALUE_FALSE; /* fail */
388400
}
389401

@@ -393,6 +405,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
393405
if (lit_char_is_line_terminator (ch))
394406
{
395407
JERRY_TRACE_MSG ("fail\n");
408+
REGEXP_RECURSION_COUNTER_INCREASE ();
396409
return ECMA_VALUE_FALSE; /* fail */
397410
}
398411

@@ -412,6 +425,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
412425
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
413426
{
414427
JERRY_TRACE_MSG ("fail\n");
428+
REGEXP_RECURSION_COUNTER_INCREASE ();
415429
return ECMA_VALUE_FALSE; /* fail */
416430
}
417431

@@ -422,6 +436,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
422436
}
423437

424438
JERRY_TRACE_MSG ("fail\n");
439+
REGEXP_RECURSION_COUNTER_INCREASE ();
425440
return ECMA_VALUE_FALSE; /* fail */
426441
}
427442
case RE_OP_ASSERT_END:
@@ -437,6 +452,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
437452
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
438453
{
439454
JERRY_TRACE_MSG ("fail\n");
455+
REGEXP_RECURSION_COUNTER_INCREASE ();
440456
return ECMA_VALUE_FALSE; /* fail */
441457
}
442458

@@ -447,6 +463,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
447463
}
448464

449465
JERRY_TRACE_MSG ("fail\n");
466+
REGEXP_RECURSION_COUNTER_INCREASE ();
450467
return ECMA_VALUE_FALSE; /* fail */
451468
}
452469
case RE_OP_ASSERT_WORD_BOUNDARY:
@@ -478,6 +495,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
478495
if (is_wordchar_left == is_wordchar_right)
479496
{
480497
JERRY_TRACE_MSG ("fail\n");
498+
REGEXP_RECURSION_COUNTER_INCREASE ();
481499
return ECMA_VALUE_FALSE; /* fail */
482500
}
483501
}
@@ -489,6 +507,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
489507
if (is_wordchar_left != is_wordchar_right)
490508
{
491509
JERRY_TRACE_MSG ("fail\n");
510+
REGEXP_RECURSION_COUNTER_INCREASE ();
492511
return ECMA_VALUE_FALSE; /* fail */
493512
}
494513
}
@@ -556,6 +575,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
556575
}
557576

558577
JMEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
578+
REGEXP_RECURSION_COUNTER_INCREASE ();
559579
return match_value;
560580
}
561581
case RE_OP_CHAR_CLASS:
@@ -568,6 +588,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
568588
if (str_curr_p >= re_ctx_p->input_end_p)
569589
{
570590
JERRY_TRACE_MSG ("fail\n");
591+
REGEXP_RECURSION_COUNTER_INCREASE ();
571592
return ECMA_VALUE_FALSE; /* fail */
572593
}
573594

@@ -598,6 +619,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
598619
if (!is_match)
599620
{
600621
JERRY_TRACE_MSG ("fail\n");
622+
REGEXP_RECURSION_COUNTER_INCREASE ();
601623
return ECMA_VALUE_FALSE; /* fail */
602624
}
603625
}
@@ -607,6 +629,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
607629
if (is_match)
608630
{
609631
JERRY_TRACE_MSG ("fail\n");
632+
REGEXP_RECURSION_COUNTER_INCREASE ();
610633
return ECMA_VALUE_FALSE; /* fail */
611634
}
612635
}
@@ -637,6 +660,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
637660
if (str_curr_p >= re_ctx_p->input_end_p)
638661
{
639662
JERRY_TRACE_MSG ("fail\n");
663+
REGEXP_RECURSION_COUNTER_INCREASE ();
640664
return ECMA_VALUE_FALSE; /* fail */
641665
}
642666

@@ -646,6 +670,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
646670
if (ch1 != ch2)
647671
{
648672
JERRY_TRACE_MSG ("fail\n");
673+
REGEXP_RECURSION_COUNTER_INCREASE ();
649674
return ECMA_VALUE_FALSE; /* fail */
650675
}
651676
}
@@ -669,6 +694,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
669694
if (ecma_is_value_true (match_value))
670695
{
671696
*out_str_p = sub_str_p;
697+
REGEXP_RECURSION_COUNTER_INCREASE ();
672698
return match_value; /* match */
673699
}
674700
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -683,13 +709,15 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
683709
bc_p = old_bc_p;
684710

685711
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
712+
REGEXP_RECURSION_COUNTER_INCREASE ();
686713
return ECMA_VALUE_FALSE; /* fail */
687714
}
688715
case RE_OP_SAVE_AND_MATCH:
689716
{
690717
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
691718
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
692719
*out_str_p = str_curr_p;
720+
REGEXP_RECURSION_COUNTER_INCREASE ();
693721
return ECMA_VALUE_TRUE; /* match */
694722
}
695723
case RE_OP_ALTERNATIVE:
@@ -754,6 +782,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
754782
if (ecma_is_value_true (match_value))
755783
{
756784
*out_str_p = sub_str_p;
785+
REGEXP_RECURSION_COUNTER_INCREASE ();
757786
return match_value; /* match */
758787
}
759788
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -812,6 +841,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
812841
if (ecma_is_value_true (match_value))
813842
{
814843
*out_str_p = sub_str_p;
844+
REGEXP_RECURSION_COUNTER_INCREASE ();
815845
return match_value; /* match */
816846
}
817847
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -836,6 +866,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
836866
if (ecma_is_value_true (match_value))
837867
{
838868
*out_str_p = sub_str_p;
869+
REGEXP_RECURSION_COUNTER_INCREASE ();
839870
return match_value; /* match */
840871
}
841872
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -845,6 +876,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
845876
}
846877

847878
re_ctx_p->saved_p[start_idx] = old_start_p;
879+
REGEXP_RECURSION_COUNTER_INCREASE ();
848880
return ECMA_VALUE_FALSE; /* fail */
849881
}
850882
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@@ -890,6 +922,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
890922
if (ecma_is_value_true (match_value))
891923
{
892924
*out_str_p = sub_str_p;
925+
REGEXP_RECURSION_COUNTER_INCREASE ();
893926
return match_value; /* match */
894927
}
895928
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -938,6 +971,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
938971
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
939972
&& str_curr_p== re_ctx_p->saved_p[start_idx])
940973
{
974+
REGEXP_RECURSION_COUNTER_INCREASE ();
941975
return ECMA_VALUE_FALSE; /* fail */
942976
}
943977

@@ -959,6 +993,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
959993
if (ecma_is_value_true (match_value))
960994
{
961995
*out_str_p = sub_str_p;
996+
REGEXP_RECURSION_COUNTER_INCREASE ();
962997
return match_value; /* match */
963998
}
964999
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -983,6 +1018,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
9831018
if (ecma_is_value_true (match_value))
9841019
{
9851020
*out_str_p = sub_str_p;
1021+
REGEXP_RECURSION_COUNTER_INCREASE ();
9861022
return match_value; /* match */
9871023
}
9881024
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1004,6 +1040,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10041040
if (ecma_is_value_true (match_value))
10051041
{
10061042
*out_str_p = sub_str_p;
1043+
REGEXP_RECURSION_COUNTER_INCREASE ();
10071044
return match_value; /* match */
10081045
}
10091046
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1015,6 +1052,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10151052
/* restore if fails */
10161053
re_ctx_p->saved_p[end_idx] = old_end_p;
10171054
re_ctx_p->num_of_iterations_p[iter_idx]--;
1055+
REGEXP_RECURSION_COUNTER_INCREASE ();
10181056
return ECMA_VALUE_FALSE; /* fail */
10191057
}
10201058
case RE_OP_NON_GREEDY_ITERATOR:
@@ -1039,6 +1077,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10391077
if (ecma_is_value_true (match_value))
10401078
{
10411079
*out_str_p = sub_str_p;
1080+
REGEXP_RECURSION_COUNTER_INCREASE ();
10421081
return match_value; /* match */
10431082
}
10441083
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1062,6 +1101,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
10621101
str_curr_p = sub_str_p;
10631102
num_of_iter++;
10641103
}
1104+
REGEXP_RECURSION_COUNTER_INCREASE ();
10651105
return ECMA_VALUE_FALSE; /* fail */
10661106
}
10671107
default:
@@ -1105,6 +1145,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11051145
if (ecma_is_value_true (match_value))
11061146
{
11071147
*out_str_p = sub_str_p;
1148+
REGEXP_RECURSION_COUNTER_INCREASE ();
11081149
return match_value; /* match */
11091150
}
11101151
else if (ECMA_IS_VALUE_ERROR (match_value))
@@ -1120,6 +1161,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
11201161
lit_utf8_read_prev (&str_curr_p);
11211162
num_of_iter--;
11221163
}
1164+
REGEXP_RECURSION_COUNTER_INCREASE ();
11231165
return ECMA_VALUE_FALSE; /* fail */
11241166
}
11251167
}
@@ -1208,6 +1250,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
12081250
re_ctx.input_start_p = input_curr_p;
12091251
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
12101252
re_ctx.input_end_p = input_end_p;
1253+
REGEXP_RECURSION_COUNTER_INIT ();
12111254

12121255
/* 1. Read bytecode header and init regexp matcher context. */
12131256
re_ctx.flags = bc_p->header.status_flags;

jerry-core/ecma/operations/ecma-regexp-object.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,46 @@
2828
* @{
2929
*/
3030

31+
#ifdef REGEXP_RECURSION_LIMIT
32+
/**
33+
* Decrease the recursion counter and test it.
34+
* If the counter reaches the limit of the recursion depth
35+
* it will return with a range error.
36+
*/
37+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() \
38+
do \
39+
{ \
40+
if (--re_ctx_p->recursion_counter == 0) \
41+
{ \
42+
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp recursion limit is exceeded.")); \
43+
} \
44+
} \
45+
while (0)
46+
/**
47+
* Increase the recursion counter.
48+
*/
49+
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
50+
/**
51+
* Set the recursion counter to the max depth of the recursion.
52+
*/
53+
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
54+
#else /* !REGEXP_RECURSION_LIMIT */
55+
/**
56+
* Decrease the recursion counter and test it.
57+
* If the counter reaches the limit of the recursion depth
58+
* it will return with a range error.
59+
*/
60+
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
61+
/**
62+
* Increase the recursion counter.
63+
*/
64+
#define REGEXP_RECURSION_COUNTER_INCREASE()
65+
/**
66+
* Set the recursion counter to the max depth of the recursion.
67+
*/
68+
#define REGEXP_RECURSION_COUNTER_INIT()
69+
#endif /* REGEXP_RECURSION_LIMIT */
70+
3171
/**
3272
* RegExp flags
3373
* Note:
@@ -48,6 +88,9 @@ typedef struct
4888
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
4989
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
5090
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
91+
#ifdef REGEXP_RECURSION_LIMIT
92+
uint32_t recursion_counter; /**< RegExp recursion counter */
93+
#endif /* REGEXP_RECURSION_LIMIT */
5194
uint32_t num_of_captures; /**< number of capture groups */
5295
uint32_t num_of_non_captures; /**< number of non-capture groups */
5396
uint32_t *num_of_iterations_p; /**< number of iterations */

0 commit comments

Comments
 (0)