Skip to content

BUG: Fix index order for Index.intersection() #15583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add new tests
  • Loading branch information
Albert Villanova del Moral authored and jreback committed Mar 25, 2017
commit 3c200fe734423e0c5c58fc9c1e198d52022b200c
86 changes: 86 additions & 0 deletions pandas/tests/frame/test_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-

from __future__ import print_function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, these need to go in pandas/tests/tools/test_merge.py. just fit them in where appropriate.

.join is fine here as its a direct / used method on DataFrame. .merge is a direct calling of the pd.merge where LOTS of tests already exist. Test location is very important to avoid future confusion.

In fact I suspect most of these are duplicated tests, pls only add as appropriate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see you already added to the correct location. no point in duplicating tests. delete these.


import numpy as np
import pandas as pd
import pandas.util.testing as tm


class TestDataFrameMerge(object):

def test_merge_on_indexes(self):
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])

# default how='inner'
result = df1.merge(df2, left_index=True, right_index=True)
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]},
index=[2, 1])
tm.assert_frame_equal(result, expected)

# how='left'
result = df1.merge(df2, left_index=True, right_index=True, how='left')
expected = pd.DataFrame({'a': [20, 10, 0], 'b': [200, 100, np.nan]},
index=[2, 1, 0])
tm.assert_frame_equal(result, expected)

# how='right'
result = df1.merge(df2, left_index=True, right_index=True, how='right')
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
index=[1, 2, 3])
tm.assert_frame_equal(result, expected)

# how='inner'
result = df1.merge(df2, left_index=True, right_index=True, how='inner')
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]},
index=[2, 1])
tm.assert_frame_equal(result, expected)

# how='outer'
result = df1.merge(df2, left_index=True, right_index=True, how='outer')
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
'b': [np.nan, 100, 200, 300]},
index=[0, 1, 2, 3])
tm.assert_frame_equal(result, expected)

def test_merge_on_indexes_sort(self):
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])

# default how='inner'
result = df1.merge(df2, left_index=True, right_index=True, sort=True)
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
index=[1, 2])
tm.assert_frame_equal(result, expected)

# how='left'
result = df1.merge(df2, left_index=True, right_index=True, how='left', sort=True)
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [np.nan, 100, 200]},
index=[0, 1, 2])
tm.assert_frame_equal(result, expected)

# how='right' (already sorted)
result = df1.merge(df2, left_index=True, right_index=True, how='right', sort=True)
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
index=[1, 2, 3])
tm.assert_frame_equal(result, expected)

# how='right'
result = df2.merge(df1, left_index=True, right_index=True, how='right', sort=True)
expected = pd.DataFrame([[np.nan, 0], [100, 10], [200, 20]],
columns=['b', 'a'], index=[0, 1, 2])
tm.assert_frame_equal(result, expected)

# how='inner'
result = df1.merge(df2, left_index=True, right_index=True, how='inner', sort=True)
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
index=[1, 2])
tm.assert_frame_equal(result, expected)

# how='outer'
result = df1.merge(df2, left_index=True, right_index=True, how='outer', sort=True)
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
'b': [np.nan, 100, 200, 300]},
index=[0, 1, 2, 3])
tm.assert_frame_equal(result, expected)
78 changes: 78 additions & 0 deletions pandas/tests/tools/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,3 +1355,81 @@ def test_dtype_on_merged_different(self, change, how, left, right):
np.dtype('int64')],
index=['X', 'Y', 'Z'])
assert_series_equal(result, expected)

class TestMergeOnIndexes(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok you added here good.


def test_merge_on_indexes(self):
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change this to be parametrized like join above. (e.g. matrix of how/sort)

# default how='inner'
result = pd.merge(df1, df2, left_index=True, right_index=True)
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]},
index=[2, 1])
tm.assert_frame_equal(result, expected)

# how='left'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='left')
expected = pd.DataFrame({'a': [20, 10, 0], 'b': [200, 100, np.nan]},
index=[2, 1, 0])
tm.assert_frame_equal(result, expected)

# how='right'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='right')
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
index=[1, 2, 3])
tm.assert_frame_equal(result, expected)

# how='inner'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='inner')
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]},
index=[2, 1])
tm.assert_frame_equal(result, expected)

# how='outer'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='outer')
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
'b': [np.nan, 100, 200, 300]},
index=[0, 1, 2, 3])
tm.assert_frame_equal(result, expected)

def test_merge_on_indexes_sort(self):
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])

# default how='inner'
result = pd.merge(df1, df2, left_index=True, right_index=True, sort=True)
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
index=[1, 2])
tm.assert_frame_equal(result, expected)

# how='left'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='left', sort=True)
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [np.nan, 100, 200]},
index=[0, 1, 2])
tm.assert_frame_equal(result, expected)

# how='right' (already sorted)
result = pd.merge(df1, df2, left_index=True, right_index=True, how='right', sort=True)
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
index=[1, 2, 3])
tm.assert_frame_equal(result, expected)

# how='right'
result = pd.merge(df2, df1, left_index=True, right_index=True, how='right', sort=True)
expected = pd.DataFrame([[np.nan, 0], [100, 10], [200, 20]],
columns=['b', 'a'], index=[0, 1, 2])
tm.assert_frame_equal(result, expected)

# how='inner'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='inner', sort=True)
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
index=[1, 2])
tm.assert_frame_equal(result, expected)

# how='outer'
result = pd.merge(df1, df2, left_index=True, right_index=True, how='outer', sort=True)
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
'b': [np.nan, 100, 200, 300]},
index=[0, 1, 2, 3])
tm.assert_frame_equal(result, expected)