Skip to content

Commit

Permalink
[DataFrame] Test bugfixes (ray-project#2111)
Browse files Browse the repository at this point in the history
  • Loading branch information
devin-petersohn authored and robertnishihara committed May 22, 2018
1 parent f1fc373 commit 317c945
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 54 deletions.
6 changes: 3 additions & 3 deletions python/ray/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ def div(self, other, axis='columns', level=None, fill_value=None):
Returns:
A new DataFrame with the Divide applied.
"""
return self._operator_helper(pd.DataFrame.add, other, axis, level,
return self._operator_helper(pd.DataFrame.div, other, axis, level,
fill_value)

def divide(self, other, axis='columns', level=None, fill_value=None):
Expand Down Expand Up @@ -3991,15 +3991,15 @@ def _sort_helper(df, index, axis, *args):
self._col_partitions)

new_columns = self.columns
new_index = self.index.sort_values()
new_index = self.index.sort_values(ascending=ascending)
new_row_parts = None
else:
columns = self.columns
new_row_parts = _map_partitions(
lambda df: _sort_helper(df, columns, axis, *args),
self._row_partitions)

new_columns = self.columns.sort_values()
new_columns = self.columns.sort_values(ascending=ascending)
new_index = self.index
new_column_parts = None

Expand Down
104 changes: 53 additions & 51 deletions python/ray/dataframe/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,27 +861,27 @@ def test_inter_df_math(op, simple=False):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))

ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))

list_test = [0, 1, 2, 4]

if not simple:
ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))

ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))


@pytest.fixture
Expand All @@ -892,18 +892,18 @@ def test_comparison_inter_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))

ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})

ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))


@pytest.fixture
Expand All @@ -914,10 +914,10 @@ def test_inter_df_math_right_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})

ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))


def test_add():
Expand Down Expand Up @@ -1938,14 +1938,14 @@ def test_fillna_datetime_columns(num_partitions=2):

@pytest.fixture
def test_filter(ray_df, pandas_df, by):
ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))
assert ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))

ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))
assert ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))

ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))
assert ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))


def test_first():
Expand Down Expand Up @@ -2008,7 +2008,7 @@ def test_gt():

@pytest.fixture
def test_head(ray_df, pandas_df, n=5):
ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))
assert ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))


def test_hist():
Expand Down Expand Up @@ -2129,7 +2129,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join(ray_df2, how=how)
pandas_join = pandas_df.join(pandas_df2, how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)

ray_df3 = rdf.DataFrame({"col7": [1, 2, 3, 5, 6, 7, 8]})

Expand All @@ -2139,7 +2139,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join([ray_df2, ray_df3], how=how)
pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)


def test_kurt():
Expand Down Expand Up @@ -2408,7 +2408,7 @@ def test_pop(ray_df, pandas_df):
ray_popped = temp_ray_df.pop('col2')
pandas_popped = temp_pandas_df.pop('col2')
assert ray_popped.sort_index().equals(pandas_popped.sort_index())
ray_df_equals_pandas(temp_ray_df, temp_pandas_df)
assert ray_df_equals_pandas(temp_ray_df, temp_pandas_df)


def test_pow():
Expand Down Expand Up @@ -2759,10 +2759,14 @@ def test_reset_index(ray_df, pandas_df, inplace=False):
assert to_pandas(ray_df_cp).equals(pd_df_cp)


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rfloordiv():
test_inter_df_math_right_ops("rfloordiv")


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rmod():
test_inter_df_math_right_ops("rmod")

Expand Down Expand Up @@ -2792,6 +2796,8 @@ def test_rsub():
test_inter_df_math_right_ops("rsub")


@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rtruediv():
test_inter_df_math_right_ops("rtruediv")

Expand Down Expand Up @@ -2893,12 +2899,12 @@ def test_sort_index():
pandas_result = pandas_df.sort_index()
ray_result = ray_df.sort_index()

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_index(ascending=False)
ray_result = ray_df.sort_index(ascending=False)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)


def test_sort_values():
Expand All @@ -2908,22 +2914,22 @@ def test_sort_values():
pandas_result = pandas_df.sort_values(by=1)
ray_result = ray_df.sort_values(by=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=1, axis=1)
ray_result = ray_df.sort_values(by=1, axis=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=[1, 3])
ray_result = ray_df.sort_values(by=[1, 3])

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)

pandas_result = pandas_df.sort_values(by=[1, 67], axis=1)
ray_result = ray_df.sort_values(by=[1, 67], axis=1)

ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)


def test_sortlevel():
Expand Down Expand Up @@ -2976,7 +2982,7 @@ def test_swaplevel():

@pytest.fixture
def test_tail(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())
assert ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())


def test_take():
Expand Down Expand Up @@ -3023,10 +3029,10 @@ def test_to_xarray():

@pytest.fixture
def test_transform(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))
assert ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
assert ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))


def test_truediv():
Expand Down Expand Up @@ -3272,7 +3278,7 @@ def test___delitem__(ray_df, pd_df):
pd_df = pd_df.copy()
ray_df.__delitem__('col1')
pd_df.__delitem__('col1')
ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pd_df)

# Issue 2027
last_label = pd_df.iloc[:, -1].name
Expand Down Expand Up @@ -3348,10 +3354,6 @@ def test_is_copy():
ray_df.is_copy


def test___itruediv__():
test_inter_df_math("__itruediv__", simple=True)


def test___div__():
test_inter_df_math("__div__", simple=True)

Expand Down Expand Up @@ -3416,4 +3418,4 @@ def test_get_dummies():
'B': ['b', 'a', 'c'],
'C': [1, 2, 3]})

ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))
assert ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))

0 comments on commit 317c945

Please sign in to comment.