Merge pull request pandas-dev#6888 from cpcloud/single-group-extract-match-naming

cpcloud · cpcloud · commit d2e1abff612a · 2014-04-15T21:22:38.000-04:00
BUG: properly rename single group match in Series.str.extract()
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -374,6 +374,8 @@ Bug Fixes
 - Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`)
 - Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`)
 - Bug in arithmetic operations affecting to NaT (:issue:`6873`)
+- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single
+  group match wasn't renamed to the group name
 
 pandas 0.13.1
 -------------
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -387,6 +387,13 @@ def f(x):
     return _na_map(f, arr, na)
 
 
+def _get_single_group_name(rx):
+    try:
+        return list(rx.groupindex.keys()).pop()
+    except IndexError:
+        return None
+
+
 def str_extract(arr, pat, flags=0):
     """
     Find groups in each string using passed regular expression
@@ -452,7 +459,7 @@ def f(x):
             return empty_row
     if regex.groups == 1:
         result = Series([f(val)[0] for val in arr],
-                        name=regex.groupindex.get(1),
+                        name=_get_single_group_name(regex),
                         index=arr.index)
     else:
         names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -556,6 +556,11 @@ def test_extract(self):
         exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'])
         tm.assert_frame_equal(result, exp)
 
+        # single group renames series properly
+        s = Series(['A1', 'A2'])
+        result = s.str.extract(r'(?P<uno>A)\d')
+        tm.assert_equal(result.name, 'uno')
+
         # GH6348
         # not passing index to the extractor
         def check_index(index):