Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Here's the code:
In [9]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
df = pd.DataFrame({'A': s,'F': 'foo'})
df.loc[1,'F']=np.nan
df
Out[9]:
A F
0 1.0 foo
1 3.0 NaN
2 5.0 foo
3 NaN foo
4 6.0 foo
5 8.0 foo
In [10]:
df['A']=df.groupby(['F'])['A'].transform(lambda x: x.fillna(x.mean()))
Out[10]:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-111-5c30febb854f> in <module>
----> 1 df['A']=df.groupby(['F'])['A'].transform(lambda x: x.fillna(x.mean()))
d:\python36\lib\site-packages\pandas\core\groupby\generic.py in transform(self, func, engine, engine_kwargs, *args, **kwargs)
492 if not isinstance(func, str):
493 return self._transform_general(
--> 494 func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
495 )
496
d:\python36\lib\site-packages\pandas\core\groupby\generic.py in _transform_general(self, func, engine, engine_kwargs, *args, **kwargs)
560
561 result.name = self._selected_obj.name
--> 562 result.index = self._selected_obj.index
563 return result
564
d:\python36\lib\site-packages\pandas\core\generic.py in __setattr__(self, name, value)
5152 try:
5153 object.__getattribute__(self, name)
-> 5154 return object.__setattr__(self, name, value)
5155 except AttributeError:
5156 pass
pandas\_libs\properties.pyx in pandas._libs.properties.AxisProperty.__set__()
d:\python36\lib\site-packages\pandas\core\series.py in _set_axis(self, axis, labels, fastpath)
422 if not fastpath:
423 # The ensure_index call above ensures we have an Index object
--> 424 self._mgr.set_axis(axis, labels)
425
426 # ndarray compatibility
d:\python36\lib\site-packages\pandas\core\internals\managers.py in set_axis(self, axis, new_labels)
225 if new_len != old_len:
226 raise ValueError(
--> 227 f"Length mismatch: Expected axis has {old_len} elements, new "
228 f"values have {new_len} elements"
229 )
ValueError: Length mismatch: Expected axis has 5 elements, new values have 6 elements
Problem description
If I change column F's type to 'str' , everything goes well.
In [11]:
df['F'] = df['F'].astype('str')
df['A']=df.groupby(['F'])['A'].transform(lambda x: x.fillna(x.mean()))
df
Out[11]:
A F
0 1.0 foo
1 3.0 nan
2 5.0 foo
3 5.0 foo
4 6.0 foo
5 8.0 foo
Output of pd.show_versions()
INSTALLED VERSIONS
commit : b5958ee
python : 3.6.8.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.19041
machine : AMD64
processor : Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 1.1.5
numpy : 1.17.2
pytz : 2018.9
dateutil : 2.7.5
pip : 19.3.1
setuptools : 41.4.0
Cython : 0.29.3
pytest : None
hypothesis : None
sphinx : 2.4.4
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.3.4
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.11.3
IPython : 7.2.0
pandas_datareader: None
bs4 : 4.9.0
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.0.2
numexpr : 2.6.9
odfpy : None
openpyxl : 2.6.2
pandas_gbq : None
pyarrow : None
pytables : None
pyxlsb : None
s3fs : None
scipy : 1.5.4
sqlalchemy : None
tables : None
tabulate : None
xarray : 0.11.2
xlrd : None
xlwt : None
numba : 0.42.0
[paste the output of pd.show_versions()
here leaving a blank line after the details tag]