Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DataFrame] Test bugfixes #2111

Merged
merged 3 commits into from
May 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/ray/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ def div(self, other, axis='columns', level=None, fill_value=None):
Returns:
A new DataFrame with the Divide applied.
"""
return self._operator_helper(pd.DataFrame.add, other, axis, level,
return self._operator_helper(pd.DataFrame.div, other, axis, level,
fill_value)

def divide(self, other, axis='columns', level=None, fill_value=None):
Expand Down Expand Up @@ -3991,15 +3991,15 @@ def _sort_helper(df, index, axis, *args):
self._col_partitions)

new_columns = self.columns
new_index = self.index.sort_values()
new_index = self.index.sort_values(ascending=ascending)
new_row_parts = None
else:
columns = self.columns
new_row_parts = _map_partitions(
lambda df: _sort_helper(df, columns, axis, *args),
self._row_partitions)

new_columns = self.columns.sort_values()
new_columns = self.columns.sort_values(ascending=ascending)
new_index = self.index
new_column_parts = None

Expand Down
104 changes: 53 additions & 51 deletions python/ray/dataframe/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,27 +861,27 @@ def test_inter_df_math(op, simple=False):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))

ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))

list_test = [0, 1, 2, 4]

if not simple:
ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))

ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))


@pytest.fixture
Expand All @@ -892,18 +892,18 @@ def test_comparison_inter_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))

ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})

ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))


@pytest.fixture
Expand All @@ -914,10 +914,10 @@ def test_inter_df_math_right_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))


def test_add():
Expand Down Expand Up @@ -1938,14 +1938,14 @@ def test_fillna_datetime_columns(num_partitions=2):

@pytest.fixture
def test_filter(ray_df, pandas_df, by):
ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))
assert ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))

ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))
assert ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))

ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))
assert ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))


def test_first():
Expand Down Expand Up @@ -2008,7 +2008,7 @@ def test_gt():

@pytest.fixture
def test_head(ray_df, pandas_df, n=5):
ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))
assert ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))


def test_hist():
Expand Down Expand Up @@ -2129,7 +2129,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join(ray_df2, how=how)
pandas_join = pandas_df.join(pandas_df2, how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)

ray_df3 = rdf.DataFrame({"col7": [1, 2, 3, 5, 6, 7, 8]})

Expand All @@ -2139,7 +2139,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join([ray_df2, ray_df3], how=how)
pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)


def test_kurt():
Expand Down Expand Up @@ -2408,7 +2408,7 @@ def test_pop(ray_df, pandas_df):
ray_popped = temp_ray_df.pop('col2')
pandas_popped = temp_pandas_df.pop('col2')
assert ray_popped.sort_index().equals(pandas_popped.sort_index())
ray_df_equals_pandas(temp_ray_df, temp_pandas_df)
assert ray_df_equals_pandas(temp_ray_df, temp_pandas_df)


def test_pow():
Expand Down Expand Up @@ -2759,10 +2759,14 @@ def test_reset_index(ray_df, pandas_df, inplace=False):
assert to_pandas(ray_df_cp).equals(pd_df_cp)


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rfloordiv():
test_inter_df_math_right_ops("rfloordiv")


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rmod():
test_inter_df_math_right_ops("rmod")

Expand Down Expand Up @@ -2792,6 +2796,8 @@ def test_rsub():
test_inter_df_math_right_ops("rsub")


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rtruediv():
test_inter_df_math_right_ops("rtruediv")

Expand Down Expand Up @@ -2893,12 +2899,12 @@ def test_sort_index():
pandas_result = pandas_df.sort_index()
ray_result = ray_df.sort_index()

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_index(ascending=False)
ray_result = ray_df.sort_index(ascending=False)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)


def test_sort_values():
Expand All @@ -2908,22 +2914,22 @@ def test_sort_values():
pandas_result = pandas_df.sort_values(by=1)
ray_result = ray_df.sort_values(by=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=1, axis=1)
ray_result = ray_df.sort_values(by=1, axis=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=[1, 3])
ray_result = ray_df.sort_values(by=[1, 3])

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=[1, 67], axis=1)
ray_result = ray_df.sort_values(by=[1, 67], axis=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)


def test_sortlevel():
Expand Down Expand Up @@ -2976,7 +2982,7 @@ def test_swaplevel():

@pytest.fixture
def test_tail(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())
assert ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())


def test_take():
Expand Down Expand Up @@ -3023,10 +3029,10 @@ def test_to_xarray():

@pytest.fixture
def test_transform(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))
assert ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
assert ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))


def test_truediv():
Expand Down Expand Up @@ -3272,7 +3278,7 @@ def test___delitem__(ray_df, pd_df):
pd_df = pd_df.copy()
ray_df.__delitem__('col1')
pd_df.__delitem__('col1')
ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pd_df)

# Issue 2027
last_label = pd_df.iloc[:, -1].name
Expand Down Expand Up @@ -3348,10 +3354,6 @@ def test_is_copy():
ray_df.is_copy


def test___itruediv__():
test_inter_df_math("__itruediv__", simple=True)


def test___div__():
test_inter_df_math("__div__", simple=True)

Expand Down Expand Up @@ -3416,4 +3418,4 @@ def test_get_dummies():
'B': ['b', 'a', 'c'],
'C': [1, 2, 3]})

ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))
assert ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))