Skip to content

Commit 07bdfb8

Browse files
committed
Merge pull request pandas-dev#8176 from jreback/expand
BUG/API: Previously an enlargement with a mixed-dtype frame would act unlike append (related GH2578)
2 parents 59c175f + 14d92d6 commit 07bdfb8

File tree

3 files changed

+70
-15
lines changed

3 files changed

+70
-15
lines changed

doc/source/v0.15.0.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,19 @@ API changes
296296

297297
To insert a NaN, you must explicitly use ``np.nan``. See the :ref:`docs <missing.inserting>`.
298298

299+
- Previously an enlargement with a mixed-dtype frame would act unlike ``.append`` which will preserve dtypes (related :issue:`2578`, :issue:`8176`):
300+
301+
.. ipython:: python
302+
303+
df = DataFrame([[True, 1],[False, 2]], columns = ["female","fitness"])
304+
df
305+
df.dtypes
306+
307+
# dtypes are now preserved
308+
df.loc[2] = df.loc[1]
309+
df
310+
df.dtypes
311+
299312
.. _whatsnew_0150.dt:
300313

301314
.dt accessor

pandas/core/indexing.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -303,12 +303,27 @@ def _setitem_with_indexer(self, indexer, value):
303303
"cannot set a frame with no defined columns"
304304
)
305305

306-
index = self.obj._get_axis(0)
307-
labels = _safe_append_to_index(index, indexer)
308-
self.obj._data = self.obj.reindex_axis(labels, 0)._data
306+
# append a Series
307+
if isinstance(value, Series):
308+
309+
value = value.reindex(index=self.obj.columns,copy=True)
310+
value.name = indexer
311+
312+
# a list-list
313+
else:
314+
315+
# must have conforming columns
316+
if com.is_list_like(value):
317+
if len(value) != len(self.obj.columns):
318+
raise ValueError(
319+
"cannot set a row with mismatched columns"
320+
)
321+
322+
value = Series(value,index=self.obj.columns,name=indexer)
323+
324+
self.obj._data = self.obj.append(value)._data
309325
self.obj._maybe_update_cacher(clear=True)
310-
return getattr(self.obj, self.name).__setitem__(indexer,
311-
value)
326+
return self.obj
312327

313328
# set using setitem (Panel and > dims)
314329
elif self.ndim >= 3:

pandas/tests/test_indexing.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2812,7 +2812,8 @@ def f():
28122812
df.loc[1] = df.loc[2]
28132813
assert_frame_equal(df,expected)
28142814

2815-
expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }),dtype='float64')
2815+
# like 2578, partial setting with dtype preservation
2816+
expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }))
28162817
df = df_orig.copy()
28172818
df.loc[3] = df.loc[2]
28182819
assert_frame_equal(df,expected)
@@ -2864,6 +2865,41 @@ def f():
28642865
p.loc[:,:,'C'] = Series([30,32],index=p_orig.items)
28652866
assert_panel_equal(p,expected)
28662867

2868+
def test_partial_setting_mixed_dtype(self):
2869+
2870+
# in a mixed dtype environment, try to preserve dtypes
2871+
# by appending
2872+
df = DataFrame([[True, 1],[False, 2]],
2873+
columns = ["female","fitness"])
2874+
2875+
s = df.loc[1].copy()
2876+
s.name = 2
2877+
expected = df.append(s)
2878+
2879+
df.loc[2] = df.loc[1]
2880+
assert_frame_equal(df, expected)
2881+
2882+
# columns will align
2883+
df = DataFrame(columns=['A','B'])
2884+
df.loc[0] = Series(1,index=range(4))
2885+
assert_frame_equal(df,DataFrame(columns=['A','B'],index=[0]))
2886+
2887+
# columns will align
2888+
df = DataFrame(columns=['A','B'])
2889+
df.loc[0] = Series(1,index=['B'])
2890+
assert_frame_equal(df,DataFrame([[np.nan, 1]], columns=['A','B'],index=[0],dtype='float64'))
2891+
2892+
# list-like must conform
2893+
df = DataFrame(columns=['A','B'])
2894+
def f():
2895+
df.loc[0] = [1,2,3]
2896+
self.assertRaises(ValueError, f)
2897+
2898+
# these are coerced to float unavoidably (as its a list-like to begin)
2899+
df = DataFrame(columns=['A','B'])
2900+
df.loc[3] = [6,7]
2901+
assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B'],dtype='float64'))
2902+
28672903
def test_series_partial_set(self):
28682904
# partial set with new index
28692905
# Regression from GH4825
@@ -3013,15 +3049,6 @@ def f():
30133049
assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1]))
30143050
assert_frame_equal(df,df2)
30153051

3016-
df = DataFrame(columns=['A','B'])
3017-
df.loc[3] = [6,7]
3018-
assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B']))
3019-
3020-
# no label overlap
3021-
df = DataFrame(columns=['A','B'])
3022-
df.loc[0] = Series(1,index=range(4))
3023-
assert_frame_equal(df,DataFrame(columns=['A','B'],index=[0]))
3024-
30253052
# no index to start
30263053
expected = DataFrame({ 0 : Series(1,index=range(4)) },columns=['A','B',0])
30273054

0 commit comments

Comments
 (0)