Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5908300

Browse files
authoredApr 13, 2017
bpo-29995: re.escape() now escapes only special characters. (#1007)
1 parent a6e395d commit 5908300

File tree

6 files changed

+40
-51
lines changed

6 files changed

+40
-51
lines changed
 

‎Doc/library/re.rst

+7-3
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ form.
786786

787787
.. function:: escape(pattern)
788788

789-
Escape all the characters in *pattern* except ASCII letters, numbers and ``'_'``.
789+
Escape special characters in *pattern*.
790790
This is useful if you want to match an arbitrary literal string that may
791791
have regular expression metacharacters in it. For example::
792792

@@ -795,15 +795,19 @@ form.
795795

796796
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
797797
>>> print('[%s]+' % re.escape(legal_chars))
798-
[abcdefghijklmnopqrstuvwxyz0123456789\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\:]+
798+
[abcdefghijklmnopqrstuvwxyz0123456789!\#\$%&'\*\+\-\.\^_`\|~:]+
799799

800800
>>> operators = ['+', '-', '*', '/', '**']
801801
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
802-
\/|\-|\+|\*\*|\*
802+
/|\-|\+|\*\*|\*
803803

804804
.. versionchanged:: 3.3
805805
The ``'_'`` character is no longer escaped.
806806

807+
.. versionchanged:: 3.7
808+
Only characters that can have special meaning in a regular expression
809+
are escaped.
810+
807811

808812
.. function:: purge()
809813

‎Doc/tools/susp-ignored.csv

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a
303303
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
304304
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
305305
library/re,,`,!#$%&'*+-.^_`|~:
306-
library/re,,`,\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\:
306+
library/re,,`,!\#\$%&'\*\+\-\.\^_`\|~:
307307
library/tarfile,,:xz,'x:xz'
308308
library/xml.etree.elementtree,,:sometag,prefix:sometag
309309
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""

‎Lib/idlelib/idle_test/test_replace.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@ def test_replace_regex(self):
221221
self.assertIn('Invalid Replace Expression', showerror.message)
222222

223223
# test access method
224-
self.engine.setcookedpat("\'")
225-
equal(pv.get(), "\\'")
224+
self.engine.setcookedpat("?")
225+
equal(pv.get(), "\\?")
226226

227227
def test_replace_backwards(self):
228228
equal = self.assertEqual

‎Lib/re.py

+9-27
Original file line numberDiff line numberDiff line change
@@ -241,39 +241,21 @@ def template(pattern, flags=0):
241241
"Compile a template pattern, returning a pattern object"
242242
return _compile(pattern, flags|T)
243243

244-
_alphanum_str = frozenset(
245-
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
246-
_alphanum_bytes = frozenset(
247-
b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
244+
# SPECIAL_CHARS
245+
# closing ')', '}' and ']'
246+
# '-' (a range in character set)
247+
# '#' (comment) and WHITESPACE (ignored) in verbose mode
248+
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.# \t\n\r\v\f'}
248249

249250
def escape(pattern):
250251
"""
251-
Escape all the characters in pattern except ASCII letters, numbers and '_'.
252+
Escape special characters in a string.
252253
"""
253254
if isinstance(pattern, str):
254-
alphanum = _alphanum_str
255-
s = list(pattern)
256-
for i, c in enumerate(pattern):
257-
if c not in alphanum:
258-
if c == "\000":
259-
s[i] = "\\000"
260-
else:
261-
s[i] = "\\" + c
262-
return "".join(s)
255+
return pattern.translate(_special_chars_map)
263256
else:
264-
alphanum = _alphanum_bytes
265-
s = []
266-
esc = ord(b"\\")
267-
for c in pattern:
268-
if c in alphanum:
269-
s.append(c)
270-
else:
271-
if c == 0:
272-
s.extend(b"\\000")
273-
else:
274-
s.append(esc)
275-
s.append(c)
276-
return bytes(s)
257+
pattern = str(pattern, 'latin1')
258+
return pattern.translate(_special_chars_map).encode('latin1')
277259

278260
# --------------------------------------------------------------------
279261
# internals

‎Lib/test/test_re.py

+19-18
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ def test_search_coverage(self):
904904
self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
905905

906906
def assertMatch(self, pattern, text, match=None, span=None,
907-
matcher=re.match):
907+
matcher=re.fullmatch):
908908
if match is None and span is None:
909909
# the pattern matches the whole text
910910
match = text
@@ -917,45 +917,46 @@ def assertMatch(self, pattern, text, match=None, span=None,
917917
self.assertEqual(m.group(), match)
918918
self.assertEqual(m.span(), span)
919919

920+
LITERAL_CHARS = string.ascii_letters + string.digits + '!"%&\',/:;<=>@_`~'
921+
920922
def test_re_escape(self):
921-
alnum_chars = string.ascii_letters + string.digits + '_'
922923
p = ''.join(chr(i) for i in range(256))
923924
for c in p:
924-
if c in alnum_chars:
925-
self.assertEqual(re.escape(c), c)
926-
elif c == '\x00':
927-
self.assertEqual(re.escape(c), '\\000')
928-
else:
929-
self.assertEqual(re.escape(c), '\\' + c)
930925
self.assertMatch(re.escape(c), c)
926+
self.assertMatch('[' + re.escape(c) + ']', c)
927+
self.assertMatch('(?x)' + re.escape(c), c)
931928
self.assertMatch(re.escape(p), p)
929+
for c in '-.]{}':
930+
self.assertEqual(re.escape(c)[:1], '\\')
931+
literal_chars = self.LITERAL_CHARS
932+
self.assertEqual(re.escape(literal_chars), literal_chars)
932933

933-
def test_re_escape_byte(self):
934-
alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
934+
def test_re_escape_bytes(self):
935935
p = bytes(range(256))
936936
for i in p:
937937
b = bytes([i])
938-
if b in alnum_chars:
939-
self.assertEqual(re.escape(b), b)
940-
elif i == 0:
941-
self.assertEqual(re.escape(b), b'\\000')
942-
else:
943-
self.assertEqual(re.escape(b), b'\\' + b)
944938
self.assertMatch(re.escape(b), b)
939+
self.assertMatch(b'[' + re.escape(b) + b']', b)
940+
self.assertMatch(b'(?x)' + re.escape(b), b)
945941
self.assertMatch(re.escape(p), p)
942+
for i in b'-.]{}':
943+
b = bytes([i])
944+
self.assertEqual(re.escape(b)[:1], b'\\')
945+
literal_chars = self.LITERAL_CHARS.encode('ascii')
946+
self.assertEqual(re.escape(literal_chars), literal_chars)
946947

947948
def test_re_escape_non_ascii(self):
948949
s = 'xxx\u2620\u2620\u2620xxx'
949950
s_escaped = re.escape(s)
950-
self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
951+
self.assertEqual(s_escaped, s)
951952
self.assertMatch(s_escaped, s)
952953
self.assertMatch('.%s+.' % re.escape('\u2620'), s,
953954
'x\u2620\u2620\u2620x', (2, 7), re.search)
954955

955956
def test_re_escape_non_ascii_bytes(self):
956957
b = 'y\u2620y\u2620y'.encode('utf-8')
957958
b_escaped = re.escape(b)
958-
self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
959+
self.assertEqual(b_escaped, b)
959960
self.assertMatch(b_escaped, b)
960961
res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
961962
self.assertEqual(len(res), 2)

‎Misc/NEWS

+2
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ Library
320320
- bpo-29998: Pickling and copying ImportError now preserves name and path
321321
attributes.
322322

323+
- bpo-29995: re.escape() now escapes only regex special characters.
324+
323325
- bpo-29962: Add math.remainder operation, implementing remainder
324326
as specified in IEEE 754.
325327

0 commit comments

Comments
 (0)
Please sign in to comment.