ray-project · devin-petersohn · May 6, 2018 · May 3, 2018 · May 3, 2018 · May 3, 2018
diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py
@@ -3137,9 +3137,52 @@ def radd(self, other, axis='columns', level=None, fill_value=None):
 
     def rank(self, axis=0, method='average', numeric_only=None,
              na_option='keep', ascending=True, pct=False):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
+
+        """
+        Compute numerical data ranks (1 through n) along axis.
+        Equal values are assigned a rank that is the [method] of
+        the ranks of those values.
+
+        Args:
+            axis (int): 0 or 'index' for row-wise,
+                        1 or 'columns' for column-wise
+            interpolation: {‘average’, ‘min’, ‘max’, ‘first’, ‘dense’}
+                Specifies which method to use for equal vals
+            numeric_only (boolean)
+                Include only float, int, boolean data.
+            na_option: {'keep', 'top', 'bottom'}
+                Specifies how to handle NA options
+            ascending (boolean):
+                Decedes ranking order
+            pct (boolean):
+                Computes percentage ranking of data
+        Returns:
+            A new DataFrame
+        """
+
+        def rank_helper(df):
+            return df.rank(axis=axis, method=method,
+                           numeric_only=numeric_only,
+                           na_option=na_option,
+                           ascending=ascending, pct=pct)
+
+        axis = pd.DataFrame()._get_axis_number(axis)
+
+        if (axis == 1):
+            new_cols = self.dtypes[self.dtypes.apply(
+                                   lambda x: is_numeric_dtype(x))].index
+            result = _map_partitions(rank_helper,
+                                     self._row_partitions)
+            return DataFrame(row_partitions=result,
+                             columns=new_cols,
+                             index=self.index)
+
+        if (axis == 0):
+            result = _map_partitions(rank_helper,
+                                     self._col_partitions)
+            return DataFrame(col_partitions=result,
+                             columns=self.columns,
+                             index=self.index)
 
     def rdiv(self, other, axis='columns', level=None, fill_value=None):
         return self._single_df_op_helper(

diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py
@@ -227,6 +227,7 @@ def test_int_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -392,6 +393,7 @@ def test_float_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -560,6 +562,9 @@ def test_mixed_dtype_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
 
+    # TODO Reolve once Pandas-20962 is resolved.
+    # test_rank(ray_df, pandas_df)
+
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
     test___getitem__(ray_df, pandas_df)
@@ -718,6 +723,7 @@ def test_nan_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -2377,11 +2383,10 @@ def test_radd():
     test_inter_df_math_right_ops("radd")
 
 
-def test_rank():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.rank()
+@pytest.fixture
+def test_rank(ray_df, pandas_df):
+    assert(ray_df_equals_pandas(ray_df.rank(), pandas_df.rank()))
+    assert(ray_df_equals_pandas(ray_df.rank(axis=1), pandas_df.rank(axis=1)))
 
 
 def test_rdiv():