Clearup warnings (#1238)

eddiebergman · mfeurer · eddiebergman · commit b1a453b34e13 · 2022-08-18T20:08:47.000+02:00
* np.bool deprecation * Invalid escape sequence \_ * Series specify dtype * drop na requires keyword args deprecation * unspecified np.int size deprecated, use int instead * deprecated unspeicifed np.int precision * Element wise comparison failed, will raise error in the future * Specify explicit dtype for empty series * metric warnings for mismatch between y_pred and y_true label count * Quantile transformer n_quantiles larger than n_samples warning ignored * Silenced convergence warnings * pass sklearn args as keywords * np.bool deprecation * Invalid escape sequence \_ * Series specify dtype * drop na requires keyword args deprecation * unspecified np.int size deprecated, use int instead * deprecated unspeicifed np.int precision * Element wise comparison failed, will raise error in the future * Specify explicit dtype for empty series * metric warnings for mismatch between y_pred and y_true label count * Quantile transformer n_quantiles larger than n_samples warning ignored * Silenced convergence warnings * pass sklearn args as keywords * flake8'd * flake8'd * Fixed CategoricalImputation not accounting for sparse matrices * Updated to use distro for linux distribution * Ignore convergence warnings for gaussian process regressor * Averaging metrics now use zero_division parameter * Readded scorers to module scope * flake8'd * Fix * Fixed dtype for metalearner no run * Catch gaussian process iterative fit warning * Moved ignored warnings to tests * Correctly type pd.Series * Revert back to usual iterative fit * Readded missing iteration increment * Removed odd backslash * Fixed imputer for sparse matrices * Ignore warnings we are aware about in tests * Flake'd: * Revert "Fixed imputer for sparse matrices" This reverts commit 05675ad. * Revert "Revert "Fixed imputer for sparse matrices"" This reverts commit d031b0d. * Back to default values * Reverted to default behaviour with comment * Added xfail test to document * flaked * Fixed test, moved to np.testing for assertion * Update autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de> Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
diff --git a/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py b/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py
@@ -111,7 +111,7 @@ def _learn(self, exclude_double_configurations=True):
                 except KeyError:
                     # TODO should I really except this?
                     self.logger.info("Could not find runs for instance %s" % task_id)
-                    runs[task_id] = pd.Series([], name=task_id, dtype=float)
+                    runs[task_id] = pd.Series([], name=task_id, dtype=np.float64)
 
             runs = pd.DataFrame(runs)
 
diff --git a/autosklearn/pipeline/components/regression/gaussian_process.py b/autosklearn/pipeline/components/regression/gaussian_process.py
@@ -37,9 +37,6 @@ def fit(self, X, y):
             normalize_y=True
         )
 
-        if y.ndim == 2 and y.shape[1] == 1:
-            y = y.flatten()
-
         self.estimator.fit(X, y)
 
         return self
diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py
@@ -7,6 +7,7 @@
 import tempfile
 import unittest
 import unittest.mock
+import warnings
 
 from joblib import Memory
 import numpy as np
@@ -18,6 +19,7 @@
 import sklearn.ensemble
 import sklearn.svm
 from sklearn.utils.validation import check_is_fitted
+from sklearn.exceptions import ConvergenceWarning
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
@@ -32,7 +34,42 @@
 from autosklearn.pipeline.constants import \
     DENSE, SPARSE, UNSIGNED_DATA, PREDICTIONS, SIGNED_DATA, INPUT
 
-from test.test_pipeline.ignored_warnings import classifier_warnings, ignore_warnings
+ignored_warnings = [
+    (
+        UserWarning, (  # From QuantileTransformer
+            r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
+            r" n_quantiles is set to n_samples\."
+        )
+    ),
+    (
+        UserWarning, (  # From FastICA
+            r"n_components is too large: it will be set to \d+"
+        )
+
+    ),
+    (
+        ConvergenceWarning, (  # From Liblinear
+            r"Liblinear failed to converge, increase the number of iterations\."
+        )
+    ),
+    (
+        ConvergenceWarning, (  # From SGD
+            r"Maximum number of iteration reached before convergence\. Consider increasing"
+            r" max_iter to improve the fit\."
+        )
+    ),
+    (
+        ConvergenceWarning, (  # From MLP
+            r"Stochastic Optimizer: Maximum iterations \(\d+\) reached and the"
+            r" optimization hasn't converged yet\."
+        )
+    ),
+    (
+        UserWarning, (  # From LDA (Linear Discriminant Analysis)
+            r"Variables are collinear"
+        )
+    ),
+]
 
 
 class DummyClassifier(AutoSklearnClassificationAlgorithm):
@@ -498,7 +535,10 @@ def _test_configurations(
                     check_is_fitted(step)
 
             try:
-                with ignore_warnings(classifier_warnings):
+                with warnings.catch_warnings():
+                    for category, message in ignored_warnings:
+                        warnings.filterwarnings('ignore', category=category, message=message)
+
                     cls.fit(X_train, Y_train)
 
                 # After fit, all components should be tagged as fitted
diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py
@@ -4,6 +4,7 @@
 import tempfile
 import unittest
 import unittest.mock
+import warnings
 
 from joblib import Memory
 import numpy as np
@@ -13,6 +14,7 @@
 import sklearn.ensemble
 import sklearn.svm
 from sklearn.utils.validation import check_is_fitted
+from sklearn.exceptions import ConvergenceWarning
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
@@ -26,7 +28,32 @@
 from autosklearn.pipeline.util import get_dataset
 from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
 
-from test.test_pipeline.ignored_warnings import regressor_warnings, ignore_warnings
+ignored_warnings = [
+    (
+        UserWarning, (  # From QuantileTransformer
+            r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
+            r" n_quantiles is set to n_samples\."
+        )
+    ),
+    (
+        ConvergenceWarning, (  # From GaussianProcesses
+            r"The optimal value found for dimension \d+ of parameter \w+ is close"
+            r" to the specified (upper|lower) bound .*(Increasing|Decreasing) the bound"
+            r" and calling fit again may find a better value."
+        )
+    ),
+    (
+        UserWarning, (  # From FastICA
+            r"n_components is too large: it will be set to \d+"
+        )
+    ),
+    (
+        ConvergenceWarning, (  # From SGD
+            r"Maximum number of iteration reached before convergence\. Consider increasing"
+            r" max_iter to improve the fit\."
+        )
+    ),
+]
 
 
 class SimpleRegressionPipelineTest(unittest.TestCase):
@@ -180,19 +207,21 @@ def _test_configurations(self, configurations_space, make_sparse=False,
                     check_is_fitted(step)
 
             try:
-                with ignore_warnings(regressor_warnings):
-                    cls.fit(X_train, Y_train)
+                with warnings.catch_warnings():
+                    for category, message in ignored_warnings:
+                        warnings.filterwarnings('ignore', category=category, message=message)
 
-                # After fit, all components should be tagged as fitted
-                # by sklearn. Check is fitted raises an exception if that
-                # is not the case
-                try:
-                    for name, step in cls.named_steps.items():
-                        check_is_fitted(step)
-                except sklearn.exceptions.NotFittedError:
-                    self.fail("config={} raised NotFittedError unexpectedly!".format(
-                        config
-                    ))
+                    cls.fit(X_train, Y_train)
+                    # After fit, all components should be tagged as fitted
+                    # by sklearn. Check is fitted raises an exception if that
+                    # is not the case
+                    try:
+                        for name, step in cls.named_steps.items():
+                            check_is_fitted(step)
+                    except sklearn.exceptions.NotFittedError:
+                        self.fail("config={} raised NotFittedError unexpectedly!".format(
+                            config
+                        ))
 
                 cls.predict(X_test)
             except MemoryError:

Original file line number	Diff line number	Diff line change
`@@ -37,9 +37,6 @@ def fit(self, X, y):`
`37`	`37`	`normalize_y=True`
`38`	`38`	`)`
`39`	`39`
`40`		`- if y.ndim == 2 and y.shape[1] == 1:`
`41`		`- y = y.flatten()`
`42`		`-`
`43`	`40`	`self.estimator.fit(X, y)`
`44`	`41`
`45`	`42`	`return self`