Skip to content

Commit ac95ba2

Browse files
committed
Make re.Match a well-rounded Sequence type
1 parent 3dfed23 commit ac95ba2

File tree

4 files changed

+102
-10
lines changed

4 files changed

+102
-10
lines changed

Doc/library/re.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,27 @@ when there is no match, you can test whether there was a match with a simple
13781378
if match:
13791379
process(match)
13801380

1381+
Match objects are proper :class:`~collections.abc.Sequence` types. You can access
1382+
match groups via subscripting `match[...]` and use familiar
1383+
:class:`~collections.abc.Sequence` idioms to iterate over and extract match groups::
1384+
1385+
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
1386+
>>> m[1]
1387+
"Isaac"
1388+
>>> list(m)
1389+
["Isaac Newton", "Isaac", "Newton"]
1390+
>>> _, first_name, last_name = m
1391+
>>> last_name
1392+
"Newton"
1393+
1394+
You can also destructure match objects with python's `match` statement::
1395+
1396+
>>> match re.match(r"(\d+)-(\d+)-(\d+)", "2000-10-16"):
1397+
... case [_, year, month, day]:
1398+
... year
1399+
...
1400+
"2000"
1401+
13811402
.. class:: Match
13821403

13831404
Match object returned by successful ``match``\ es and ``search``\ es.
@@ -1474,6 +1495,18 @@ when there is no match, you can test whether there was a match with a simple
14741495
.. versionadded:: 3.6
14751496

14761497

1498+
.. method:: Match.__len__()
1499+
1500+
Returns the number of groups accessible through the subscript syntax provided by
1501+
:meth:`~Match.__getitem__`. This includes group `0` representing the entire match::
1502+
1503+
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
1504+
>>> len(m)
1505+
3
1506+
1507+
.. versionadded:: 3.14
1508+
1509+
14771510
.. method:: Match.groups(default=None)
14781511

14791512
Return a tuple containing all the subgroups of the match, from 1 up to however

Lib/re/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
import enum
126126
from . import _compiler, _parser
127127
import functools
128+
import _collections_abc
128129
import _sre
129130

130131

@@ -315,6 +316,8 @@ def escape(pattern):
315316
Pattern = type(_compiler.compile('', 0))
316317
Match = type(_compiler.compile('', 0).match(''))
317318

319+
_collections_abc.Sequence.register(Match)
320+
318321
# --------------------------------------------------------------------
319322
# internals
320323

Lib/test/test_re.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,41 @@ def test_match_getitem(self):
599599
with self.assertRaises(TypeError):
600600
m[0] = 1
601601

602-
# No len().
603-
self.assertRaises(TypeError, len, m)
602+
def test_match_sequence(self):
603+
from collections.abc import Sequence
604+
605+
m = re.match(r"(a)(b)(c)", "abc")
606+
self.assertIsInstance(m, Sequence)
607+
self.assertEqual(len(m), 4)
608+
609+
it = iter(m)
610+
self.assertEqual(next(it), "abc")
611+
self.assertEqual(next(it), "a")
612+
self.assertEqual(next(it), "b")
613+
self.assertEqual(next(it), "c")
614+
615+
self.assertEqual(tuple(m), ("abc", "a", "b", "c"))
616+
self.assertEqual(list(m), ["abc", "a", "b", "c"])
617+
618+
abc, a, b, c = m
619+
self.assertEqual(abc, "abc")
620+
self.assertEqual(a, "a")
621+
self.assertEqual(b, "b")
622+
self.assertEqual(c, "c")
623+
624+
match m:
625+
case [_, "a", "b", "c"]:
626+
pass
627+
case _:
628+
self.fail()
629+
630+
match re.match(r"(\d+)-(\d+)-(\d+)", "2025-05-07"):
631+
case [_, year, month, day]:
632+
self.assertEqual(year, "2025")
633+
self.assertEqual(month, "05")
634+
self.assertEqual(day, "07")
635+
case _:
636+
self.fail()
604637

605638
def test_re_fullmatch(self):
606639
# Issue 16203: Proposal: add re.fullmatch() method.

Modules/_sre/sre.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,8 +2431,31 @@ match_group(PyObject *op, PyObject* args)
24312431
return result;
24322432
}
24332433

2434+
static Py_ssize_t
2435+
match_length(PyObject *op)
2436+
{
2437+
MatchObject *self = _MatchObject_CAST(op);
2438+
return self->groups;
2439+
}
2440+
2441+
static PyObject*
2442+
match_item(PyObject *op, Py_ssize_t index)
2443+
{
2444+
MatchObject *self = _MatchObject_CAST(op);
2445+
2446+
if (index < 0 || index >= self->groups) {
2447+
/* raise IndexError if we were given a bad group number */
2448+
if (!PyErr_Occurred()) {
2449+
PyErr_SetString(PyExc_IndexError, "no such group");
2450+
}
2451+
return NULL;
2452+
}
2453+
2454+
return match_getslice_by_index(self, index, Py_None);
2455+
}
2456+
24342457
static PyObject*
2435-
match_getitem(PyObject *op, PyObject* name)
2458+
match_subscript(PyObject *op, PyObject* name)
24362459
{
24372460
MatchObject *self = _MatchObject_CAST(op);
24382461
return match_getslice(self, name, Py_None);
@@ -3268,12 +3291,12 @@ static PyType_Slot match_slots[] = {
32683291
{Py_tp_traverse, match_traverse},
32693292
{Py_tp_clear, match_clear},
32703293

3271-
/* As mapping.
3272-
*
3273-
* Match objects do not support length or assignment, but do support
3274-
* __getitem__.
3275-
*/
3276-
{Py_mp_subscript, match_getitem},
3294+
// Sequence protocol
3295+
{Py_sq_length, match_length},
3296+
{Py_sq_item, match_item},
3297+
3298+
// Support group names provided as subscripts
3299+
{Py_mp_subscript, match_subscript},
32773300

32783301
{0, NULL},
32793302
};
@@ -3282,7 +3305,7 @@ static PyType_Spec match_spec = {
32823305
.name = "re.Match",
32833306
.basicsize = sizeof(MatchObject),
32843307
.itemsize = sizeof(Py_ssize_t),
3285-
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3308+
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_SEQUENCE | Py_TPFLAGS_IMMUTABLETYPE |
32863309
Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
32873310
.slots = match_slots,
32883311
};

0 commit comments

Comments
 (0)