Closed
Description
Code Sample, a copy-pastable example if possible
X = pd.DataFrame(data=np.random.rand(7, 3), columns=list('XYZ'), index=list('zxcvbnm'))
X['grouping'] = ['group 1', 'group 1', 'group 1', 2, 2 , 2, 'group 1']
X.groupby('grouping').aggregate(lambda x: x.tolist())
This is the exception and traceback that the code above returns:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
3482 result = self._aggregate_multiple_funcs(
-> 3483 [arg], _level=_level, _axis=self.axis)
3484 result.columns = Index(
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _level, _axis)
690 if not len(results):
--> 691 raise ValueError("no results")
692
ValueError: no results
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_generic(self, func, *args, **kwargs)
3508 for name, data in self:
-> 3509 result[name] = self._try_cast(func(data, *args, **kwargs),
3510 data)
<ipython-input-25-18b24604e98f> in <lambda>(x)
2 X['grouping'] = ['group 1', 'group 1', 'group 1', 2, 2 , 2, 'group 1']
----> 3 X.groupby('grouping').aggregate(lambda x: x.tolist())
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/generic.py in __getattr__(self, name)
3080 return self[name]
-> 3081 return object.__getattribute__(self, name)
3082
AttributeError: 'DataFrame' object has no attribute 'tolist'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-25-18b24604e98f> in <module>()
1 X = pd.DataFrame(data=np.random.rand(7, 3), columns=list('XYZ'), index=list('zxcvbnm'))
2 X['grouping'] = ['group 1', 'group 1', 'group 1', 2, 2 , 2, 'group 1']
----> 3 X.groupby('grouping').aggregate(lambda x: x.tolist())
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
4034 versionadded=''))
4035 def aggregate(self, arg, *args, **kwargs):
-> 4036 return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
4037
4038 agg = aggregate
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
3486 name=self._selected_obj.columns.name)
3487 except:
-> 3488 result = self._aggregate_generic(arg, *args, **kwargs)
3489
3490 if not self.as_index:
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_generic(self, func, *args, **kwargs)
3510 data)
3511 except Exception:
-> 3512 return self._aggregate_item_by_item(func, *args, **kwargs)
3513 else:
3514 for name in self.indices:
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_item_by_item(self, func, *args, **kwargs)
3554 # GH6337
3555 if not len(result_columns) and errors is not None:
-> 3556 raise errors
3557
3558 return DataFrame(result, columns=result_columns)
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in _aggregate_item_by_item(self, func, *args, **kwargs)
3539 grouper=self.grouper)
3540 result[item] = self._try_cast(
-> 3541 colg.aggregate(func, *args, **kwargs), data)
3542 except ValueError:
3543 cannot_agg.append(item)
/Users/nicolaireeve/miniconda2/envs/skbiodev/lib/python3.4/site-packages/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs)
2885 result = self._aggregate_named(func_or_funcs, *args, **kwargs)
2886
-> 2887 index = Index(sorted(result), name=self.grouper.names[0])
2888 ret = Series(result, index=index)
2889
TypeError: unorderable types: str() < int()
Problem description
If a grouping vector is of mixed type and aggregate is used after groupby(...), an exception will be raised. The source code will get to this line and fails because sorted() does not support mixed types.
Expected Output
This is what we would expect to see if the exception was not raised. This output was achieved by using a column in groupby that is of a single type. In this instance, 2 was changed to a string
X = pd.DataFrame(data=np.random.rand(7, 3), columns=list('XYZ'), index=list('zxcvbnm'))
X['grouping'] = ['group 1', 'group 1', 'group 1', '2', '2' , '2', 'group 1']
X.groupby('grouping').aggregate(lambda x: x.tolist())
X \
grouping
2 [0.9219120799240533, 0.6439069401684864, 0.035...
group 1 [0.6884732212797477, 0.326906484996646, 0.6718...
Y \
grouping
2 [0.7796923828539405, 0.7668459596180287, 0.868...
group 1 [0.20259205506065203, 0.9138593138141587, 0.95...
Z
grouping
2 [0.9863526134877422, 0.6342347501171951, 0.873...
group 1 [0.054465751087565906, 0.9026560581041934, 0.9...
Output of pd.show_versions()
# Paste the output here pd.show_versions() here
INSTALLED VERSIONS
------------------
commit: None
python: 3.4.5.final.0
python-bits: 64
OS: Darwin
OS-release: 16.6.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.20.3
pytest: None
pip: 9.0.1
setuptools: 35.0.2
Cython: None
numpy: 1.13.1
scipy: 0.19.0
xarray: None
IPython: 6.0.0
sphinx: None
patsy: None
dateutil: 2.6.1
pytz: 2017.2
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.0.2
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: None
s3fs: None
pandas_gbq: None
pandas_datareader: None
cc @ElDeveloper