Skip to content

Commit 22bbb0c

Browse files
[3.10] gh-98740: Fix validation of conditional expressions in RE (GH-98764) (GH-99046)
In very rare circumstances the JUMP opcode could be confused with the argument of the opcode in the "then" part which doesn't end with the JUMP opcode. This led to incorrect detection of the final JUMP opcode and incorrect calculation of the size of the subexpression. NOTE: Changed return value of functions _validate_inner() and _validate_charset() in Modules/_sre/sre.c. Now they return 0 on success, -1 on failure, and 1 if the last op is JUMP (which usually is a failure). Previously they returned 1 on success and 0 on failure. (cherry picked from commit e9ac890) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 27dc6db commit 22bbb0c

File tree

4 files changed

+39
-26
lines changed

4 files changed

+39
-26
lines changed

Doc/library/re.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,9 @@ The special characters are:
421421
some fixed length. Patterns which start with negative lookbehind assertions may
422422
match at the beginning of the string being searched.
423423

424+
.. _re-conditional-expression:
425+
.. index:: single: (?(; in regular expressions
426+
424427
``(?(id/name)yes-pattern|no-pattern)``
425428
Will try to match with ``yes-pattern`` if the group with given *id* or
426429
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is

Lib/test/test_re.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,11 @@ def test_re_groupref_exists_errors(self):
578578
self.checkPatternError(r'()(?(2)a)',
579579
"invalid group reference 2", 5)
580580

581+
def test_re_groupref_exists_validation_bug(self):
582+
for i in range(256):
583+
with self.subTest(code=i):
584+
re.compile(r'()(?(1)\x%02x?)' % i)
585+
581586
def test_re_groupref_overflow(self):
582587
from sre_constants import MAXGROUPS
583588
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix internal error in the :mod:`re` module which in very rare circumstances
2+
prevented compilation of a regular expression containing a :ref:`conditional
3+
expression <re-conditional-expression>` without the "else" branch.

Modules/_sre.c

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15191519
#endif
15201520

15211521
/* Report failure */
1522-
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1522+
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
15231523

15241524
/* Extract opcode, argument, or skip count from code array */
15251525
#define GET_OP \
@@ -1543,7 +1543,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15431543
skip = *code; \
15441544
VTRACE(("%lu (skip to %p)\n", \
15451545
(unsigned long)skip, code+skip)); \
1546-
if (skip-adj > (uintptr_t)(end - code)) \
1546+
if (skip-adj > (uintptr_t)(end - code)) \
15471547
FAIL; \
15481548
code++; \
15491549
} while (0)
@@ -1632,9 +1632,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
16321632
}
16331633
}
16341634

1635-
return 1;
1635+
return 0;
16361636
}
16371637

1638+
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
16381639
static int
16391640
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
16401641
{
@@ -1712,7 +1713,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17121713
case SRE_OP_IN_LOC_IGNORE:
17131714
GET_SKIP;
17141715
/* Stop 1 before the end; we check the FAILURE below */
1715-
if (!_validate_charset(code, code+skip-2))
1716+
if (_validate_charset(code, code+skip-2))
17161717
FAIL;
17171718
if (code[skip-2] != SRE_OP_FAILURE)
17181719
FAIL;
@@ -1766,7 +1767,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17661767
}
17671768
/* Validate the charset */
17681769
if (flags & SRE_INFO_CHARSET) {
1769-
if (!_validate_charset(code, newcode-1))
1770+
if (_validate_charset(code, newcode-1))
17701771
FAIL;
17711772
if (newcode[-1] != SRE_OP_FAILURE)
17721773
FAIL;
@@ -1787,7 +1788,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17871788
if (skip == 0)
17881789
break;
17891790
/* Stop 2 before the end; we check the JUMP below */
1790-
if (!_validate_inner(code, code+skip-3, groups))
1791+
if (_validate_inner(code, code+skip-3, groups))
17911792
FAIL;
17921793
code += skip-3;
17931794
/* Check that it ends with a JUMP, and that each JUMP
@@ -1801,6 +1802,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18011802
else if (code+skip-1 != target)
18021803
FAIL;
18031804
}
1805+
if (code != target)
1806+
FAIL;
18041807
}
18051808
break;
18061809

@@ -1815,7 +1818,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18151818
FAIL;
18161819
if (max > SRE_MAXREPEAT)
18171820
FAIL;
1818-
if (!_validate_inner(code, code+skip-4, groups))
1821+
if (_validate_inner(code, code+skip-4, groups))
18191822
FAIL;
18201823
code += skip-4;
18211824
GET_OP;
@@ -1834,7 +1837,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18341837
FAIL;
18351838
if (max > SRE_MAXREPEAT)
18361839
FAIL;
1837-
if (!_validate_inner(code, code+skip-3, groups))
1840+
if (_validate_inner(code, code+skip-3, groups))
18381841
FAIL;
18391842
code += skip-3;
18401843
GET_OP;
@@ -1886,24 +1889,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18861889
to allow arbitrary jumps anywhere in the code; so we just look
18871890
for a JUMP opcode preceding our skip target.
18881891
*/
1889-
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1890-
code[skip-3] == SRE_OP_JUMP)
1891-
{
1892-
VTRACE(("both then and else parts present\n"));
1893-
if (!_validate_inner(code+1, code+skip-3, groups))
1894-
FAIL;
1892+
VTRACE(("then part:\n"));
1893+
int rc = _validate_inner(code+1, code+skip-1, groups);
1894+
if (rc == 1) {
1895+
VTRACE(("else part:\n"));
18951896
code += skip-2; /* Position after JUMP, at <skipno> */
18961897
GET_SKIP;
1897-
if (!_validate_inner(code, code+skip-1, groups))
1898-
FAIL;
1899-
code += skip-1;
1900-
}
1901-
else {
1902-
VTRACE(("only a then part present\n"));
1903-
if (!_validate_inner(code+1, code+skip-1, groups))
1904-
FAIL;
1905-
code += skip-1;
1898+
rc = _validate_inner(code, code+skip-1, groups);
19061899
}
1900+
if (rc)
1901+
FAIL;
1902+
code += skip-1;
19071903
break;
19081904

19091905
case SRE_OP_ASSERT:
@@ -1914,22 +1910,28 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19141910
if (arg & 0x80000000)
19151911
FAIL; /* Width too large */
19161912
/* Stop 1 before the end; we check the SUCCESS below */
1917-
if (!_validate_inner(code+1, code+skip-2, groups))
1913+
if (_validate_inner(code+1, code+skip-2, groups))
19181914
FAIL;
19191915
code += skip-2;
19201916
GET_OP;
19211917
if (op != SRE_OP_SUCCESS)
19221918
FAIL;
19231919
break;
19241920

1921+
case SRE_OP_JUMP:
1922+
if (code + 1 != end)
1923+
FAIL;
1924+
VTRACE(("JUMP: %d\n", __LINE__));
1925+
return 1;
1926+
19251927
default:
19261928
FAIL;
19271929

19281930
}
19291931
}
19301932

19311933
VTRACE(("okay\n"));
1932-
return 1;
1934+
return 0;
19331935
}
19341936

19351937
static int
@@ -1944,7 +1946,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19441946
static int
19451947
_validate(PatternObject *self)
19461948
{
1947-
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1949+
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
19481950
{
19491951
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
19501952
return 0;

0 commit comments

Comments
 (0)