Skip to content

Commit

Permalink
BUG: make order of index from pd.concat deterministic (pandas-dev#17364)
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz authored and alanbato committed Nov 10, 2017
1 parent c0f221c commit 867c512
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ Reshaping
- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)

Numeric
^^^^^^^
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,3 +629,17 @@ def _random_state(state=None):
else:
raise ValueError("random_state must be an integer, a numpy "
"RandomState, or None")


def _get_distinct_objs(objs):
"""
Return a list with distinct elements of "objs" (different ids).
Preserves order.
"""
ids = set()
res = []
for obj in objs:
if not id(obj) in ids:
ids.add(id(obj))
res.append(obj)
return res
9 changes: 2 additions & 7 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
'PeriodIndex', 'DatetimeIndex',
'_new_Index', 'NaT',
'_ensure_index', '_get_na_value', '_get_combined_index',
'_get_objs_combined_axis',
'_get_distinct_indexes', '_union_indexes',
'_get_objs_combined_axis', '_union_indexes',
'_get_consensus_names',
'_all_indexes_same']

Expand All @@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0):

def _get_combined_index(indexes, intersect=False):
# TODO: handle index names!
indexes = _get_distinct_indexes(indexes)
indexes = com._get_distinct_objs(indexes)
if len(indexes) == 0:
return Index([])
if len(indexes) == 1:
Expand All @@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False):
return _ensure_index(union)


def _get_distinct_indexes(indexes):
return list(dict((id(x), x) for x in indexes).values())


def _union_indexes(indexes):
if len(indexes) == 0:
raise AssertionError('Must have at least 1 Index to union')
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from numpy.random import randn

from datetime import datetime
from pandas.compat import StringIO, iteritems
from pandas.compat import StringIO, iteritems, PY2
import pandas as pd
from pandas import (DataFrame, concat,
read_csv, isna, Series, date_range,
Expand Down Expand Up @@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self):
index=exp_idx)
tm.assert_frame_equal(result, exp)

def test_concat_order(self):
# GH 17344
dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
for i in range(100)]
result = pd.concat(dfs).columns
expected = dfs[0].columns
if PY2:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)


@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
@pytest.mark.parametrize('dt', np.sctypes['float'])
Expand Down

0 comments on commit 867c512

Please sign in to comment.