"TypeError: unorderable types" in Python3 when initializing Categorical with array values including tuples #15457 #22080
Description
Code Sample, a copy-pastable example if possible
In [2]: pd.Categorical(np.array([1, 'a'], dtype='object'), ordered=True)
Out[2]:
[1, a]
Categories (2, object): [1 < a]
In [3]: pd.Categorical(np.array([1, (1, 2)], dtype='object'), ordered=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/nobackup/repo/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
634 try:
--> 635 order = uniques.argsort()
636 order2 = order.argsort()
TypeError: unorderable types: tuple() < int()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
397 try:
--> 398 codes, categories = factorize(values, sort=True)
399 except TypeError:
~/nobackup/repo/pandas/pandas/util/_decorators.py in wrapper(*args, **kwargs)
177 kwargs[new_arg_name] = new_arg_value
--> 178 return func(*args, **kwargs)
179 return wrapper
~/nobackup/repo/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
642 na_sentinel=na_sentinel,
--> 643 assume_unique=True)
644
~/nobackup/repo/pandas/pandas/core/sorting.py in safe_sort(values, labels, na_sentinel, assume_unique)
447 # unorderable in py3 if mixed str/int
--> 448 ordered = sort_mixed(values)
449 else:
~/nobackup/repo/pandas/pandas/core/sorting.py in sort_mixed(values)
440 dtype=bool)
--> 441 nums = np.sort(values[~str_pos])
442 strs = np.sort(values[str_pos])
~/.local/lib/python3.5/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
846 a = asanyarray(a).copy(order="K")
--> 847 a.sort(axis=axis, kind=kind, order=order)
848 return a
TypeError: unorderable types: tuple() < int()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-3-02fe967cd88d> in <module>()
----> 1 pd.Categorical(np.array([1, (1, 2)], dtype='object'), ordered=True)
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
402 # raise, as we don't have a sortable data structure and so
403 # the user should give us one by specifying categories
--> 404 raise TypeError("'values' is not ordered, please "
405 "explicitly specify the categories order "
406 "by passing in a categories argument.")
TypeError: 'values' is not ordered, please explicitly specify the categories order by passing in a categories argument.
In [4]: pd.Categorical(np.array([1, (1, 2)], dtype='object'), ordered=False)
Out[4]:
[1, (1, 2)]
Categories (2, object): [1, (1, 2)]
Problem description
The problem lies in
In [2]: pd.core.sorting.safe_sort(['a', 1])
Out[2]: array([1, 'a'], dtype=object)
In [3]: pd.core.sorting.safe_sort([1,'a'])
Out[3]: array([1, 'a'], dtype=object)
In [4]: pd.core.sorting.safe_sort(['a', (1, 2)])
Out[4]: array([(1, 2), 'a'], dtype=object)
In [5]: pd.core.sorting.safe_sort([(1, 2), 'a'])
Out[5]: array([(1, 2), 'a'], dtype=object)
In [6]: pd.core.sorting.safe_sort([1, (1, 2)])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-14e22dd6134e> in <module>()
----> 1 pd.core.sorting.safe_sort([1, (1, 2)])
/home/nobackup/repo/pandas/pandas/core/sorting.py in safe_sort(values, labels, na_sentinel, assume_unique)
446 if PY3 and lib.infer_dtype(values) == 'mixed-integer':
447 # unorderable in py3 if mixed str/int
--> 448 ordered = sort_mixed(values)
449 else:
450 try:
/home/nobackup/repo/pandas/pandas/core/sorting.py in sort_mixed(values)
439 str_pos = np.array([isinstance(x, string_types) for x in values],
440 dtype=bool)
--> 441 nums = np.sort(values[~str_pos])
442 strs = np.sort(values[str_pos])
443 return np.concatenate([nums, np.asarray(strs, dtype=object)])
~/.local/lib/python3.5/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
845 else:
846 a = asanyarray(a).copy(order="K")
--> 847 a.sort(axis=axis, kind=kind, order=order)
848 return a
849
TypeError: unorderable types: tuple() < int()
If we want to play with sorting between different types, we must make it work a bit more generally.
Expected Output
A sorted array.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: 24fd90f
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.9.0-6-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.UTF-8
LOCALE: it_IT.UTF-8
pandas: 0.24.0.dev0+360.g24fd90f66.dirty
pytest: 3.5.0
pip: 9.0.1
setuptools: 39.2.0
Cython: 0.28.4
numpy: 1.14.3
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 6.2.1
sphinx: 1.5.6
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: None
bottleneck: 1.2.0dev
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.2.2.post1634.dev0+ge8120cf6d
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.3.0
xlsxwriter: 0.9.6
lxml: 4.1.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.2.1
gcsfs: None