Skip to content

Commit

Permalink
<regex>: add checks for integer overflow (#2169)
Browse files Browse the repository at this point in the history
Co-authored-by: Alex Guteniev <gutenev@gmail.com>
Co-authored-by: Nicole Mazzuca <mazzucan@outlook.com>
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
4 people authored Aug 27, 2022
1 parent fdb9c99 commit f241c79
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 13 deletions.
27 changes: 16 additions & 11 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1678,8 +1678,8 @@ private:
void _Expect(_Meta_type, regex_constants::error_type);

// parsing
int _Do_digits(int _Base, int _Count);
bool _DecimalDigits();
int _Do_digits(int _Base, int _Count, regex_constants::error_type _Error_type);
bool _DecimalDigits(regex_constants::error_type _Error_type);
void _HexDigits(int);
bool _OctalDigits();
void _Do_ex_class(_Meta_type);
Expand Down Expand Up @@ -3899,10 +3899,14 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants:
}

template <class _FwdIt, class _Elem, class _RxTraits>
int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits(int _Base, int _Count) { // translate digits to numeric value
int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits(
int _Base, int _Count, regex_constants::error_type _Error_type) { // translate digits to numeric value
int _Chv;
_Val = 0;
while (_Count != 0 && (_Chv = _Traits.value(_Char, _Base)) != -1) { // append next digit
if (_Val > (INT_MAX - _Chv) / _Base) {
_Error(_Error_type);
}
--_Count;
_Val *= _Base;
_Val += _Chv;
Expand All @@ -3912,20 +3916,21 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits(int _Base, int _Count) { // tr
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits() { // check for decimal value
return _Do_digits(10, INT_MAX) != INT_MAX;
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits(
regex_constants::error_type _Error_type) { // check for decimal value
return _Do_digits(10, INT_MAX, _Error_type) != INT_MAX;
}

template <class _FwdIt, class _Elem, class _RxTraits>
void _Parser<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits
if (_Do_digits(16, _Count) != 0) {
if (_Do_digits(16, _Count, regex_constants::error_escape) != 0) {
_Error(regex_constants::error_escape);
}
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Parser<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits
return _Do_digits(8, 3) != 3;
return _Do_digits(8, 3, regex_constants::error_escape) != 3;
}

template <class _FwdIt, class _Elem, class _RxTraits>
Expand Down Expand Up @@ -3995,7 +4000,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape(bool _Addit) { // check
return _Prs_chr;
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(_Addit)) {
return _Prs_set;
} else if (_DecimalDigits()) { // check for invalid value
} else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value
if (_Val != 0) {
_Error(regex_constants::error_escape);
}
Expand Down Expand Up @@ -4282,7 +4287,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid

template <class _FwdIt, class _Elem, class _RxTraits>
void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape
if ((_L_flags & _L_bckr) && _DecimalDigits()) { // check for valid back reference
if ((_L_flags & _L_bckr) && _DecimalDigits(regex_constants::error_backref)) { // check for valid back reference
if (_Val == 0) { // handle \0
if (!(_L_flags & _L_bzr_chr)) {
_Error(regex_constants::error_escape);
Expand Down Expand Up @@ -4314,7 +4319,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
_Max = 1;
} else if (_Mchar == _Meta_lbr) { // check for valid bracketed value
_Next();
if (!_DecimalDigits()) {
if (!_DecimalDigits(regex_constants::error_badbrace)) {
_Error(regex_constants::error_badbrace);
}

Expand All @@ -4324,7 +4329,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
} else { // check for decimal constant following comma
_Next();
if (_Mchar != _Meta_rbr) {
if (!_DecimalDigits()) {
if (!_DecimalDigits(regex_constants::error_badbrace)) {
_Error(regex_constants::error_badbrace);
}

Expand Down
1 change: 0 additions & 1 deletion tests/libcxx/expected_results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,6 @@ std/re/re.alg/re.alg.search/extended.pass.cpp FAIL
std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL
std/re/re.badexp/regex_error.pass.cpp FAIL
std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL
std/re/re.grammar/excessive_brace_count.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_backref.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_escape.pass.cpp FAIL
std/re/re.regex/re.regex.construct/bad_range.pass.cpp FAIL
Expand Down
1 change: 0 additions & 1 deletion tests/libcxx/skipped_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,6 @@ re\re.alg\re.alg.search\extended.pass.cpp
re\re.alg\re.alg.search\no_update_pos.pass.cpp
re\re.badexp\regex_error.pass.cpp
re\re.const\re.synopt\syntax_option_type.pass.cpp
re\re.grammar\excessive_brace_count.pass.cpp
re\re.regex\re.regex.construct\bad_backref.pass.cpp
re\re.regex\re.regex.construct\bad_escape.pass.cpp
re\re.regex\re.regex.construct\bad_range.pass.cpp
Expand Down
1 change: 1 addition & 0 deletions tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ tests\GH_002039_byte_is_not_trivially_swappable
tests\GH_002045_put_time_changes_errno
tests\GH_002058_debug_iterator_race
tests\GH_002120_streambuf_seekpos_and_seekoff
tests\GH_002168_regex_overflow
tests\GH_002299_implicit_sfinae_constraints
tests\GH_002307_usual_scope_guard
tests\GH_002334_branchless_clamp
Expand Down
4 changes: 4 additions & 0 deletions tests/std/tests/GH_002168_regex_overflow/env.lst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

RUNALL_INCLUDE ..\usual_matrix.lst
70 changes: 70 additions & 0 deletions tests/std/tests/GH_002168_regex_overflow/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <cassert>
#include <regex>

using namespace std;

// GH-2168 <regex>: integer overflow on large backreference value
int main() {
try {
// 4294967297 = 1 mod 2^32, so this will succeed if we don't check for overflow.
regex testRegex{R"((a)\4294967297)", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_backref);
}

try {
regex testRegex{"a{100000000000000000}", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_badbrace);
}

try {
regex testRegex{"a{100,10000000000000000}", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_badbrace);
}

try {
// 4294967296 = 0 mod 2^32, so this will succeed if we don't check for overflow.
regex testRegex{R"([\4294967296-1])", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_escape);
}

// Also test 2147483648 = 2^31, the first value that overflows for int:

try {
regex testRegex{R"((a)\2147483648)", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_backref);
}

try {
regex testRegex{"a{2147483648}", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_badbrace);
}

try {
regex testRegex{"a{100,2147483648}", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_badbrace);
}

try {
regex testRegex{R"([\2147483648-1])", regex_constants::ECMAScript};
assert(false);
} catch (const regex_error& e) {
assert(e.code() == regex_constants::error_escape);
}
}

0 comments on commit f241c79

Please sign in to comment.