Skip to content

Commit cdcbf36

Browse files
author
Matthew Barnett
committed
The escape function no longer escapes \x00. It's not necessary.
Inline flags can now be turned off and apply to what follows. Added \R to match line endings.
1 parent 34333d5 commit cdcbf36

File tree

7 files changed

+54
-58
lines changed

7 files changed

+54
-58
lines changed

.github/workflows/main.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545

4646
env:
4747
# macOS archs
48-
CIBW_ARCHS_MACOS: "x86_64 arm64"
48+
CIBW_ARCHS_MACOS: "x86_64 arm64 universal2"
4949

5050
steps:
5151
- uses: actions/checkout@v3
@@ -64,6 +64,14 @@ jobs:
6464
name: regex-files
6565
path: wheelhouse/*.whl
6666

67+
- name: Create GitHub release
68+
uses: actions/create-release@v1
69+
env:
70+
GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
71+
with:
72+
tag_name: ${{ github.ref }}
73+
title: Release ${{ github.ref }}
74+
6775
# Build source distribution & manylinux1_x86_64 wheels
6876
# These two jobs build:
6977
# 1, build_wheels (above): manylinux1_i686 / manylinux2014_x86_64

changelog.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
Version: 2023.12.23
2+
3+
The escape function no longer escapes \x00. It's not necessary.
4+
5+
Inline flags can now be turned off and apply to what follows.
6+
7+
Added \R to match line endings.
8+
19
Version: 2023.10.3
210

311
Updated to Unicode 15.1.0.

regex_3/_regex_core.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,22 +1150,7 @@ def parse_flags_subpattern(source, info):
11501150

11511151
def parse_positional_flags(source, info, flags_on, flags_off):
11521152
"Parses positional flags."
1153-
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
1154-
if version == VERSION0:
1155-
# Positional flags are global and can only be turned on.
1156-
if flags_off:
1157-
raise error("bad inline flags: cannot turn flags off",
1158-
source.string, source.pos)
1159-
1160-
new_global_flags = flags_on & ~info.global_flags
1161-
if new_global_flags:
1162-
info.global_flags |= new_global_flags
1163-
1164-
# A global has been turned on, so reparse the pattern.
1165-
raise _UnscopedFlagSet(info.global_flags)
1166-
else:
1167-
info.flags = (info.flags | flags_on) & ~flags_off
1168-
1153+
info.flags = (info.flags | flags_on) & ~flags_off
11691154
source.ignore_space = bool(info.flags & VERBOSE)
11701155

11711156
def parse_name(source, allow_numeric=False, allow_group_0=False):
@@ -1233,6 +1218,14 @@ def parse_escape(source, info, in_set):
12331218
elif ch in "pP":
12341219
# A Unicode property, positive or negative.
12351220
return parse_property(source, info, ch == "p", in_set)
1221+
elif ch == "R" and not in_set:
1222+
# A line ending.
1223+
charset = [0x0A, 0x0B, 0x0C, 0x0D]
1224+
if info.guess_encoding == UNICODE:
1225+
charset.extend([0x85, 0x2028, 0x2029])
1226+
1227+
return Atomic(Branch([String([0x0D, 0x0A]), SetUnion(info, [Character(c)
1228+
for c in charset])]))
12361229
elif ch == "X" and not in_set:
12371230
# A grapheme cluster.
12381231
return Grapheme()

regex_3/regex.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@
241241
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
242242
"__doc__", "RegexFlag"]
243243

244-
__version__ = "2.5.135"
244+
__version__ = "2.5.136"
245245

246246
# --------------------------------------------------------------------
247247
# Public interface.
@@ -392,8 +392,6 @@ def escape(pattern, special_only=True, literal_spaces=False):
392392
elif c in _METACHARS or c.isspace():
393393
s.append("\\")
394394
s.append(c)
395-
elif c == "\x00":
396-
s.append("\\000")
397395
else:
398396
s.append(c)
399397
else:
@@ -402,8 +400,6 @@ def escape(pattern, special_only=True, literal_spaces=False):
402400
s.append(c)
403401
elif c in _ALNUM:
404402
s.append(c)
405-
elif c == "\x00":
406-
s.append("\\000")
407403
else:
408404
s.append("\\")
409405
s.append(c)

regex_3/test_regex.py

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -911,10 +911,9 @@ def test_inline_flags(self):
911911
p = regex.compile('(?iu)' + lower_char)
912912
self.assertEqual(bool(p.match(upper_char)), True)
913913

914+
# Changed to positional flags in regex 2023.12.23.
914915
self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)
915-
self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)
916-
self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)
917-
self.assertEqual(regex.match(r"a(?iV1)", "A"), None)
916+
self.assertEqual(regex.match(r"a(?i)", "A"), None)
918917

919918
def test_dollar_matches_twice(self):
920919
# $ matches the end of string, and just before the terminating \n.
@@ -1396,18 +1395,15 @@ def test_scoped_and_inline_flags(self):
13961395
# Issues 433028, 433024, 433027.
13971396
self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2))
13981397
self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2))
1399-
self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2))
1400-
self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None)
1401-
1402-
self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda:
1403-
regex.search(r"(?V0-i)Ab", "ab", flags=regex.I))
1398+
# Changed to positional flags in regex 2023.12.23.
1399+
self.assertEqual(regex.search(r"A(?i)b", "ab"), None)
14041400

14051401
self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None)
14061402
self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None)
1407-
self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None)
1403+
self.assertEqual(regex.search(r"(?-i)Ab", "ab", flags=regex.I), None)
14081404
self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None)
1409-
self.assertEqual(regex.search(r"A(?V1-i)b", "ab",
1410-
flags=regex.I).span(), (0, 2))
1405+
self.assertEqual(regex.search(r"A(?-i)b", "ab", flags=regex.I).span(),
1406+
(0, 2))
14111407

14121408
def test_repeated_repeats(self):
14131409
# Issue 2537.
@@ -1820,12 +1816,10 @@ def test_various(self):
18201816
('a.*b', 'acc\nccb', '', ascii(None)),
18211817
('a.{4,5}b', 'acc\nccb', '', ascii(None)),
18221818
('a.b', 'a\rb', '0', ascii('a\rb')),
1823-
# The new behaviour is that the inline flag affects only what follows.
1824-
('a.b(?s)', 'a\nb', '0', ascii('a\nb')),
1825-
('a.b(?sV1)', 'a\nb', '', ascii(None)),
1819+
# Changed to positional flags in regex 2023.12.23.
1820+
('a.b(?s)', 'a\nb', '', ascii(None)),
18261821
('(?s)a.b', 'a\nb', '0', ascii('a\nb')),
1827-
('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')),
1828-
('a.*(?sV1)b', 'acc\nccb', '', ascii(None)),
1822+
('a.*(?s)b', 'acc\nccb', '', ascii(None)),
18291823
('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')),
18301824
('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')),
18311825

@@ -2345,12 +2339,9 @@ def test_various(self):
23452339
# Not an error under PCRE/PRE:
23462340
# When the new behaviour is turned on positional inline flags affect
23472341
# only what follows.
2348-
('w(?i)', 'W', '0', ascii('W')),
2349-
('w(?iV1)', 'W', '0', ascii(None)),
2342+
('w(?i)', 'W', '0', ascii(None)),
23502343
('w(?i)', 'w', '0', ascii('w')),
2351-
('w(?iV1)', 'w', '0', ascii('w')),
23522344
('(?i)w', 'W', '0', ascii('W')),
2353-
('(?iV1)w', 'W', '0', ascii('W')),
23542345

23552346
# Comments using the x embedded pattern modifier.
23562347
("""(?x)w# comment 1
@@ -2403,14 +2394,10 @@ def test_various(self):
24032394
# Bug 114033: nothing to repeat.
24042395
(r'(x?)?', 'x', '0', ascii('x')),
24052396
# Bug 115040: rescan if flags are modified inside pattern.
2406-
# If the new behaviour is turned on then positional inline flags
2407-
# affect only what follows.
2408-
(r' (?x)foo ', 'foo', '0', ascii('foo')),
2409-
(r' (?V1x)foo ', 'foo', '0', ascii(None)),
2397+
# Changed to positional flags in regex 2023.12.23.
2398+
(r' (?x)foo ', 'foo', '0', ascii(None)),
24102399
(r'(?x) foo ', 'foo', '0', ascii('foo')),
2411-
(r'(?V1x) foo ', 'foo', '0', ascii('foo')),
24122400
(r'(?x)foo ', 'foo', '0', ascii('foo')),
2413-
(r'(?V1x)foo ', 'foo', '0', ascii('foo')),
24142401
# Bug 115618: negative lookahead.
24152402
(r'(?<!abc)(d.f)', 'abcdefdof', '0', ascii('dof')),
24162403
# Bug 116251: character class bug.
@@ -3154,10 +3141,8 @@ def test_hg_bugs(self):
31543141

31553142
# Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't
31563143
# return None
3157-
self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1",
3158-
"blah BLAH").group(0, 1), ("blah BLAH", "blah"))
3159-
self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"),
3160-
None)
3144+
# Changed to positional flags in regex 2023.12.23.
3145+
self.assertEqual(regex.search(r"((?i)blah)\s+\1", "blah BLAH"), None)
31613146

31623147
# Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0)
31633148
# returns "bcd" instead of "abcd"
@@ -4336,10 +4321,10 @@ def test_hg_bugs(self):
43364321
self.assertEqual(regex.search(r"^a?(a?)b?c\1$", "abca").span(), (0, 4))
43374322

43384323
# Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
4339-
self.assertEqual(regex.match(r"(?(?=a).|..)", "ab").span(), (0, 1))
4340-
self.assertEqual(regex.match(r"(?(?=b).|..)", "ab").span(), (0, 2))
4341-
self.assertEqual(regex.match(r"(?(?!a).|..)", "ab").span(), (0, 2))
4342-
self.assertEqual(regex.match(r"(?(?!b).|..)", "ab").span(), (0, 1))
4324+
self.assertEqual(regex.match(r'(?(?=a).|..)', 'ab').span(), (0, 1))
4325+
self.assertEqual(regex.match(r'(?(?=b).|..)', 'ab').span(), (0, 2))
4326+
self.assertEqual(regex.match(r'(?(?!a).|..)', 'ab').span(), (0, 2))
4327+
self.assertEqual(regex.match(r'(?(?!b).|..)', 'ab').span(), (0, 1))
43434328

43444329
def test_fuzzy_ext(self):
43454330
self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
@@ -4460,6 +4445,12 @@ def test_more_zerowidth(self):
44604445
self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$',
44614446
'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)])
44624447

4448+
def test_line_ending(self):
4449+
self.assertEqual(regex.findall(r'\R', '\r\n\n\x0B\f\r\x85\u2028\u2029'),
4450+
['\r\n', '\n', '\x0B', '\f', '\r', '\x85', '\u2028', '\u2029'])
4451+
self.assertEqual(regex.findall(br'\R', b'\r\n\n\x0B\f\r\x85'), [b'\r\n',
4452+
b'\n', b'\x0B', b'\f', b'\r'])
4453+
44634454
def test_main():
44644455
unittest.main(verbosity=2)
44654456

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='regex',
10-
version='2023.10.3',
10+
version='2023.12.23',
1111
description='Alternative regular expression module, to replace re.',
1212
long_description=long_description,
1313
long_description_content_type='text/x-rst',

tools/build_regex_unicode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1781,4 +1781,4 @@ def make_key(names):
17811781

17821782
generate_code(unicode_data, UNICODE_VERSION, this_folder)
17831783

1784-
print('\nSuccessfully generated _reges_unicode.h and _reges_unicode.c in %s' % tools_folder)
1784+
print('\nSuccessfully generated _regex_unicode.h and _regex_unicode.c in %s' % tools_folder)

0 commit comments

Comments
 (0)