Skip to content

Commit d2e1abf

Browse files
committed
Merge pull request pandas-dev#6888 from cpcloud/single-group-extract-match-naming
BUG: properly rename single group match in Series.str.extract()
2 parents f418d6c + fa57c0d commit d2e1abf

File tree

3 files changed

+15
-1
lines changed

3 files changed

+15
-1
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ Bug Fixes
374374
- Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`)
375375
- Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`)
376376
- Bug in arithmetic operations affecting to NaT (:issue:`6873`)
377+
- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single
378+
group match wasn't renamed to the group name
377379

378380
pandas 0.13.1
379381
-------------

pandas/core/strings.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,13 @@ def f(x):
387387
return _na_map(f, arr, na)
388388

389389

390+
def _get_single_group_name(rx):
391+
try:
392+
return list(rx.groupindex.keys()).pop()
393+
except IndexError:
394+
return None
395+
396+
390397
def str_extract(arr, pat, flags=0):
391398
"""
392399
Find groups in each string using passed regular expression
@@ -452,7 +459,7 @@ def f(x):
452459
return empty_row
453460
if regex.groups == 1:
454461
result = Series([f(val)[0] for val in arr],
455-
name=regex.groupindex.get(1),
462+
name=_get_single_group_name(regex),
456463
index=arr.index)
457464
else:
458465
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))

pandas/tests/test_strings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,11 @@ def test_extract(self):
556556
exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'])
557557
tm.assert_frame_equal(result, exp)
558558

559+
# single group renames series properly
560+
s = Series(['A1', 'A2'])
561+
result = s.str.extract(r'(?P<uno>A)\d')
562+
tm.assert_equal(result.name, 'uno')
563+
559564
# GH6348
560565
# not passing index to the extractor
561566
def check_index(index):

0 commit comments

Comments
 (0)