rapidsai · rapids-bot · May 23, 2022 · May 18, 2022 · May 18, 2022
@@ -298,8 +298,8 @@ class BaseRandomForestModel(Base):
                     check_rows=self.n_rows, check_cols=1)
 
         if self.dtype == np.float64:
-            warnings.warn("To use pickling or GPU-based prediction first "
-                          "train using float32 data to fit the estimator")
+            warnings.warn("To use pickling first train using float32 data "
+                          "to fit the estimator")
 
         max_feature_val = self._get_max_feat_val()
         if type(self.min_samples_leaf) == float:
@@ -348,18 +348,7 @@ class BaseRandomForestModel(Base):
         _, n_rows, n_cols, dtype = \
             input_to_cuml_array(X, order='F',
                                 check_cols=self.n_cols)
-
-        if dtype == np.float64 and not convert_dtype:
-            warnings.warn("GPU based predict only accepts "
-                          "np.float32 data. The model was "
-                          "trained on np.float64 data hence "
-                          "cannot use GPU-based prediction! "
-                          "\nDefaulting to CPU-based Prediction. "
-                          "\nTo predict on float-64 data, set "
-                          "parameter predict_model = 'CPU'")
-            return self._predict_model_on_cpu(X, convert_dtype=convert_dtype)
         treelite_handle = self._obtain_treelite_handle()
-
         storage_type = \
             _check_fil_parameter_validity(depth=self.max_depth,
                                           fil_sparse_format=fil_sparse_format,

@@ -562,10 +562,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         ----------
         X : {}
         predict_model : String (default = 'GPU')
-            'GPU' to predict using the GPU, 'CPU' otherwise. The 'GPU' can only
-            be used if the model was trained on float32 data and `X` is float32
-            or convert_dtype is set to True. Also the 'GPU' should only be
-            used for classification problems.
+            'GPU' to predict using the GPU, 'CPU' otherwise.
         algo : string (default = ``'auto'``)
             This is optional and required only while performing the
             predict operation on the GPU.
@@ -605,16 +602,6 @@ class RandomForestClassifier(BaseRandomForestModel,
         if predict_model == "CPU":
             preds = self._predict_model_on_cpu(X,
                                                convert_dtype=convert_dtype)
-        elif self.dtype == np.float64:
-            warnings.warn("GPU based predict only accepts "
-                          "np.float32 data. The model was "
-                          "trained on np.float64 data hence "
-                          "cannot use GPU-based prediction! "
-                          "\nDefaulting to CPU-based Prediction. "
-                          "\nTo predict on float-64 data, set "
-                          "parameter predict_model = 'CPU'")
-            preds = self._predict_model_on_cpu(X,
-                                               convert_dtype=convert_dtype)
         else:
             preds = \
                 self._predict_model_on_gpu(X=X, output_class=True,
@@ -633,8 +620,7 @@ class RandomForestClassifier(BaseRandomForestModel,
                       fil_sparse_format='auto') -> CumlArray:
         """
         Predicts class probabilites for X. This function uses the GPU
-        implementation of predict. Therefore, data with 'dtype = np.float32'
-        should be used with this function.
+        implementation of predict.
 
         Parameters
         ----------
@@ -671,14 +657,6 @@ class RandomForestClassifier(BaseRandomForestModel,
         -------
         y : {}
         """
-        if self.dtype == np.float64:
-            raise TypeError("GPU based predict only accepts np.float32 data. \
-                            In order use the GPU predict the model should \
-                            also be trained using a np.float32 dataset. \
-                            If you would like to use np.float64 dtype \
-                            then please use the CPU based predict by \
-                            setting predict_model = 'CPU'")
-
         preds_proba = \
             self._predict_model_on_gpu(X, output_class=True,
                                        algo=algo,

@@ -544,9 +544,7 @@ class RandomForestRegressor(BaseRandomForestModel,
         ----------
         X : {}
         predict_model : String (default = 'GPU')
-            'GPU' to predict using the GPU, 'CPU' otherwise. The GPU can only
-            be used if the model was trained on float32 data and `X` is float32
-            or convert_dtype is set to True.
+            'GPU' to predict using the GPU, 'CPU' otherwise.
         algo : string (default = 'auto')
             This is optional and required only while performing the
             predict operation on the GPU.
@@ -582,16 +580,6 @@ class RandomForestRegressor(BaseRandomForestModel,
         """
         if predict_model == "CPU":
             preds = self._predict_model_on_cpu(X, convert_dtype)
-        elif self.dtype == np.float64:
-            warnings.warn("GPU based predict only accepts "
-                          "np.float32 data. The model was "
-                          "trained on np.float64 data hence "
-                          "cannot use GPU-based prediction! "
-                          "\nDefaulting to CPU-based Prediction. "
-                          "\nTo predict on float-64 data, set "
-                          "parameter predict_model = 'CPU'")
-            preds = self._predict_model_on_cpu(X,
-                                               convert_dtype=convert_dtype)
         else:
             preds = self._predict_model_on_gpu(
                 X=X,

@@ -254,7 +254,7 @@ def test_tweedie_convergence(max_depth, split_criterion):
 @pytest.mark.parametrize(
     "max_samples", [unit_param(1.0), quality_param(0.90), stress_param(0.95)]
 )
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("max_features", [1.0, "auto", "log2", "sqrt"])
 def test_rf_classification(small_clf, datatype, max_samples, max_features):
     use_handle = True
@@ -310,7 +310,7 @@ def test_rf_classification(small_clf, datatype, max_samples, max_features):
 @pytest.mark.parametrize(
     "max_samples", [unit_param(1.0), quality_param(0.90), stress_param(0.95)]
 )
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize(
     "max_features,n_bins",
     [
@@ -379,7 +379,7 @@ def test_rf_regression(
     assert fil_r2 >= (cu_r2 - 0.02)
 
 
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 def test_rf_classification_seed(small_clf, datatype):
 
     X, y = small_clf
@@ -455,30 +455,13 @@ def test_rf_classification_float64(small_clf, datatype, convert_dtype):
         assert cu_acc >= (sk_acc - 0.07)
 
     # predict using cuML's GPU based prediction
-    if datatype[0] == np.float32 and convert_dtype:
-        fil_preds = cuml_model.predict(
-            X_test, predict_model="GPU", convert_dtype=convert_dtype
-        )
-        fil_preds = np.reshape(fil_preds, np.shape(cu_preds))
+    fil_preds = cuml_model.predict(
+        X_test, predict_model="GPU", convert_dtype=convert_dtype
+    )
+    fil_preds = np.reshape(fil_preds, np.shape(cu_preds))
 
-        fil_acc = accuracy_score(y_test, fil_preds)
-        assert fil_acc >= (cu_acc - 0.07)  # to be changed to 0.02. see issue #3910: https://github.com/rapidsai/cuml/issues/3910 # noqa
-    # if GPU predict cannot be used, display warning and use CPU predict
-    elif datatype[1] == np.float64:
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter("always")
-            fil_preds = cuml_model.predict(
-                X_test, predict_model="GPU",
-                convert_dtype=convert_dtype
-            )
-            assert("GPU based predict only accepts "
-                   "np.float32 data. The model was "
-                   "trained on np.float64 data hence "
-                   "cannot use GPU-based prediction! "
-                   "\nDefaulting to CPU-based Prediction. "
-                   "\nTo predict on float-64 data, set "
-                   "parameter predict_model = 'CPU'"
-                   in str(w[-1].message))
+    fil_acc = accuracy_score(y_test, fil_preds)
+    assert fil_acc >= (cu_acc - 0.07)  # to be changed to 0.02. see issue #3910: https://github.com/rapidsai/cuml/issues/3910 # noqa
 
 
 @pytest.mark.parametrize(
@@ -513,30 +496,12 @@ def test_rf_regression_float64(large_reg, datatype):
         assert cu_r2 >= (sk_r2 - 0.09)
 
     # predict using cuML's GPU based prediction
-    if datatype[0] == np.float32:
-        fil_preds = cuml_model.predict(
-            X_test, predict_model="GPU", convert_dtype=True
-        )
-        fil_preds = np.reshape(fil_preds, np.shape(cu_preds))
-        fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype[0])
-        assert fil_r2 >= (cu_r2 - 0.02)
-
-    #  because datatype[0] != np.float32 or datatype[0] != datatype[1]
-    # display warning when GPU-predict cannot be used and revert to CPU-predict
-    elif datatype[1] == np.float64:
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter("always")
-            fil_preds = cuml_model.predict(
-                X_test, predict_model="GPU"
-                )
-            assert("GPU based predict only accepts "
-                   "np.float32 data. The model was "
-                   "trained on np.float64 data hence "
-                   "cannot use GPU-based prediction! "
-                   "\nDefaulting to CPU-based Prediction. "
-                   "\nTo predict on float-64 data, set "
-                   "parameter predict_model = 'CPU'"
-                   in str(w[-1].message))
+    fil_preds = cuml_model.predict(
+        X_test, predict_model="GPU", convert_dtype=True
+    )
+    fil_preds = np.reshape(fil_preds, np.shape(cu_preds))
+    fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype[0])
+    assert fil_r2 >= (cu_r2 - 0.02)
 
 
 def check_predict_proba(test_proba, baseline_proba, y_test, rel_err):
@@ -624,13 +589,13 @@ def rf_classification(
         check_predict_proba(cu_proba_gpu, sk_proba, y_test, 0.1)
 
 
-@pytest.mark.parametrize("datatype", [(np.float32, np.float32)])
+@pytest.mark.parametrize("datatype", [(np.float32, np.float64)])
 @pytest.mark.parametrize("array_type", ["dataframe", "numpy"])
 def test_rf_classification_multi_class(mclass_clf, datatype, array_type):
     rf_classification(datatype, array_type, 1.0, 1.0, mclass_clf)
 
 
-@pytest.mark.parametrize("datatype", [(np.float32, np.float32)])
+@pytest.mark.parametrize("datatype", [(np.float32, np.float64)])
 @pytest.mark.parametrize("max_samples", [unit_param(1.0), stress_param(0.95)])
 @pytest.mark.parametrize("max_features", [1.0, "auto", "log2", "sqrt"])
 def test_rf_classification_proba(
@@ -639,7 +604,7 @@ def test_rf_classification_proba(
     rf_classification(datatype, "numpy", max_features, max_samples, small_clf)
 
 
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize(
     "fil_sparse_format", ["not_supported", True, "auto", False]
 )
@@ -727,7 +692,7 @@ def test_rf_classification_sparse(
             assert fil_acc >= (sk_acc - 0.07)
 
 
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize(
     "fil_sparse_format", ["not_supported", True, "auto", False]
 )
@@ -817,12 +782,12 @@ def test_rf_regression_sparse(special_reg, datatype, fil_sparse_format, algo):
 
 @pytest.mark.xfail(reason="Need rapidsai/rmm#415 to detect memleak robustly")
 @pytest.mark.memleak
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("fil_sparse_format", [True, False, "auto"])
 @pytest.mark.parametrize(
     "n_iter", [unit_param(5), quality_param(30), stress_param(80)]
 )
-def test_rf_memory_leakage(small_clf, fil_sparse_format, n_iter):
-    datatype = np.float32
+def test_rf_memory_leakage(small_clf, datatype, fil_sparse_format, n_iter):
     use_handle = True
 
     X, y = small_clf