Closed
Description
Code Sample, a copy-pastable example if possible
>>> import numpy as np
>>>
>>> import pandas as pd
>>>
>>> pd.__version__
'1.0.3'
>>>
>>> df_wide = pd.DataFrame(np.random.randint(1000, size=(1000, 100))).astype("Int64").copy()
>>>
>>> df_wide.mean(numeric_only=True)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\simon\pandas\pandas\core\generic.py", line 11215, in stat_func
f, name, axis=axis, skipna=skipna, numeric_only=numeric_only
File "C:\Users\simon\pandas\pandas\core\frame.py", line 7896, in _reduce
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
File "C:\Users\simon\pandas\pandas\core\internals\managers.py", line 351, in reduce
bres = func(blk.values, *args, **kwargs)
File "C:\Users\simon\pandas\pandas\core\nanops.py", line 69, in _f
return f(*args, **kwargs)
File "C:\Users\simon\pandas\pandas\core\nanops.py", line 102, in f
if values.size == 0 and kwds.get("min_count") is None:
AttributeError: 'IntegerArray' object has no attribute 'size'
>>>
Problem description
This is a regression from 0.25.3
0aa48f7 is the first bad commit
commit 0aa48f7
Author: jbrockmendel jbrockmendel@gmail.com
Date: Wed Jan 1 09:18:20 2020 -0800
PERF: perform reductions block-wise (#29847)
on master raises AttributeError: 'int' object has no attribute 'dtype'
>>> df_wide.mean(numeric_only=True)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\simon\pandas\pandas\core\generic.py", line 11114, in stat_func
func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
File "C:\Users\simon\pandas\pandas\core\frame.py", line 7990, in _reduce
res = df._data.reduce(blk_func)
File "C:\Users\simon\pandas\pandas\core\internals\managers.py", line 362, in reduce
bres = func(blk.values, *args, **kwargs)
File "C:\Users\simon\pandas\pandas\core\frame.py", line 7985, in blk_func
return op(values, axis=0, skipna=skipna, **kwds)
File "C:\Users\simon\pandas\pandas\core\nanops.py", line 120, in f
result = bn_func(values, axis=axis, **kwds)
File "<__array_function__ internals>", line 6, in nanmean
File "C:\Users\simon\Anaconda3\envs\pandas-dev\lib\site-packages\numpy\lib\nanfunctions.py", line 952, in nanmean
avg = _divide_by_count(tot, cnt, out=out)
File "C:\Users\simon\Anaconda3\envs\pandas-dev\lib\site-packages\numpy\lib\nanfunctions.py", line 219, in _divide_by_count
return a.dtype.type(a / b)
AttributeError: 'int' object has no attribute 'dtype'
Expected Output
>>> import numpy as np
>>>
>>> import pandas as pd
>>>
>>> pd.__version__
'0.25.3'
>>>
>>> df_wide = pd.DataFrame(np.random.randint(1000, size=(1000, 100))).astype("Int64").copy()
>>>
>>> df_wide.mean(numeric_only=True)
0 520.057
1 507.735
2 501.618
3 506.590
4 501.500
...
95 507.594
96 483.273
97 506.330
98 497.068
99 508.118
Length: 100, dtype: float64
>>>