Skip to content

Commit

Permalink
BUG: Fix DataFrame.astype(ExtensionDtype) with duplicate column names (
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 9ed29b7 commit cd44f94
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,7 @@ Conversion
- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`)
- Bug in :meth:`DataFrame.clip` in which column types are not preserved and casted to float (:issue:`24162`)
- Bug in :meth:`DataFrame.clip` when order of columns of dataframes doesn't match, result observed is wrong in numeric values (:issue:`20911`)
- Bug in :meth:`DataFrame.astype` where converting to an extension dtype when duplicate column names are present causes a ``RecursionError`` (:issue:`24704`)

Strings
^^^^^^^
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5670,9 +5670,10 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
results.append(results.append(col.copy() if copy else col))

elif is_extension_array_dtype(dtype) and self.ndim > 1:
# GH 18099: columnwise conversion to categorical
# and extension dtype
results = (self[col].astype(dtype, copy=copy) for col in self)
# GH 18099/22869: columnwise conversion to extension dtype
# GH 24704: use iloc to handle duplicate column names
results = (self.iloc[:, i].astype(dtype, copy=copy)
for i in range(len(self.columns)))

else:
# else, only a single dtype is given
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,17 @@ def test_astype_extension_dtypes_1d(self, dtype):
tm.assert_frame_equal(df.astype(dtype), expected1)
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

@pytest.mark.parametrize("dtype", ['category', 'Int64'])
def test_astype_extension_dtypes_duplicate_col(self, dtype):
# GH 24704
a1 = Series([0, np.nan, 4], name='a')
a2 = Series([np.nan, 3, 5], name='a')
df = concat([a1, a2], axis=1)

result = df.astype(dtype)
expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
assert_frame_equal(result, expected)

@pytest.mark.parametrize('dtype', [
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
def test_astype_column_metadata(self, dtype):
Expand Down

0 comments on commit cd44f94

Please sign in to comment.