From ad717075f0fed247b7de10c4f8cb9394f3f8990a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 5 Dec 2022 16:59:30 +0100 Subject: [PATCH] MAINT validate parameters for public functions (#956) --- imblearn/datasets/_imbalance.py | 25 ++-- imblearn/datasets/_zenodo.py | 12 ++ imblearn/datasets/tests/test_imbalance.py | 1 - imblearn/metrics/_classification.py | 135 +++++++++++++++--- imblearn/pipeline.py | 23 ++- imblearn/tests/test_common.py | 1 - imblearn/tests/test_pipeline.py | 24 ++-- imblearn/tests/test_public_functions.py | 105 ++++++++++++++ imblearn/utils/_param_validation.py | 24 +--- imblearn/utils/tests/test_param_validation.py | 36 ----- 10 files changed, 277 insertions(+), 109 deletions(-) create mode 100644 imblearn/tests/test_public_functions.py diff --git a/imblearn/datasets/_imbalance.py b/imblearn/datasets/_imbalance.py index ffa822037..8c1c15aec 100644 --- a/imblearn/datasets/_imbalance.py +++ b/imblearn/datasets/_imbalance.py @@ -6,11 +6,22 @@ # License: MIT from collections import Counter +from collections.abc import Mapping from ..under_sampling import RandomUnderSampler from ..utils import check_sampling_strategy - - +from ..utils._param_validation import validate_params + + +@validate_params( + { + "X": ["array-like"], + "y": ["array-like"], + "sampling_strategy": [Mapping, callable, None], + "random_state": ["random_state"], + "verbose": ["boolean"], + } +) def make_imbalance( X, y, *, sampling_strategy=None, random_state=None, verbose=False, **kwargs ): @@ -26,7 +37,7 @@ def make_imbalance( X : {array-like, dataframe} of shape (n_samples, n_features) Matrix containing the data to be imbalanced. - y : ndarray of shape (n_samples,) + y : array-like of shape (n_samples,) Corresponding label for each sample in X. sampling_strategy : dict or callable, @@ -86,16 +97,10 @@ def make_imbalance( """ target_stats = Counter(y) # restrict ratio to be a dict or a callable - if isinstance(sampling_strategy, dict) or callable(sampling_strategy): + if isinstance(sampling_strategy, Mapping) or callable(sampling_strategy): sampling_strategy_ = check_sampling_strategy( sampling_strategy, y, "under-sampling", **kwargs ) - else: - raise ValueError( - f"'sampling_strategy' has to be a dictionary or a " - f"function returning a dictionary. Got {type(sampling_strategy)} " - f"instead." - ) if verbose: print(f"The original target distribution in the dataset is: {target_stats}") diff --git a/imblearn/datasets/_zenodo.py b/imblearn/datasets/_zenodo.py index 72bafe7a6..3a2c679a0 100644 --- a/imblearn/datasets/_zenodo.py +++ b/imblearn/datasets/_zenodo.py @@ -54,6 +54,8 @@ from sklearn.datasets import get_data_home from sklearn.utils import Bunch, check_random_state +from ..utils._param_validation import validate_params + URL = "https://zenodo.org/record/61452/files/benchmark-imbalanced-learn.tar.gz" PRE_FILENAME = "x" POST_FILENAME = "data.npz" @@ -95,6 +97,16 @@ MAP_ID_NAME[v + 1] = k +@validate_params( + { + "data_home": [None, str], + "filter_data": [None, tuple], + "download_if_missing": ["boolean"], + "random_state": ["random_state"], + "shuffle": ["boolean"], + "verbose": ["boolean"], + } +) def fetch_datasets( *, data_home=None, diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py index 1b98d3aae..2d8e278fa 100644 --- a/imblearn/datasets/tests/test_imbalance.py +++ b/imblearn/datasets/tests/test_imbalance.py @@ -22,7 +22,6 @@ def iris(): [ ({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"), ({0: 10, 1: 70}, "should be less or equal to the original"), - ("random-string", "has to be a dictionary or a function"), ], ) def test_make_imbalance_error(iris, sampling_strategy, err_msg): diff --git a/imblearn/metrics/_classification.py b/imblearn/metrics/_classification.py index 797fb56a8..b377db592 100644 --- a/imblearn/metrics/_classification.py +++ b/imblearn/metrics/_classification.py @@ -15,6 +15,7 @@ # License: MIT import functools +import numbers import warnings from inspect import signature @@ -26,7 +27,23 @@ from sklearn.utils.multiclass import unique_labels from sklearn.utils.validation import check_consistent_length, column_or_1d +from ..utils._param_validation import Interval, StrOptions, validate_params + +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "labels": ["array-like", None], + "pos_label": [str, numbers.Integral, None], + "average": [ + None, + StrOptions({"binary", "micro", "macro", "weighted", "samples"}), + ], + "warn_for": ["array-like"], + "sample_weight": ["array-like", None], + } +) def sensitivity_specificity_support( y_true, y_pred, @@ -57,13 +74,13 @@ def sensitivity_specificity_support( Parameters ---------- - y_true : ndarray of shape (n_samples,) + y_true : array-like of shape (n_samples,) Ground truth (correct) target values. - y_pred : ndarray of shape (n_samples,) + y_pred : array-like of shape (n_samples,) Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -72,8 +89,11 @@ def sensitivity_specificity_support( labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. - pos_label : str or int, default=1 + pos_label : str, int or None, default=1 The class to report if ``average='binary'`` and the data is binary. + If ``pos_label is None`` and in binary classification, this function + returns the average sensitivity and specificity if ``average`` + is one of ``'weighted'``. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. @@ -105,7 +125,7 @@ def sensitivity_specificity_support( This determines which warnings will be made in the case that this function is being used to return only one of its metrics. - sample_weight : ndarray of shape (n_samples,), default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns @@ -274,6 +294,19 @@ def sensitivity_specificity_support( return sensitivity, specificity, true_sum +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "labels": ["array-like", None], + "pos_label": [str, numbers.Integral, None], + "average": [ + None, + StrOptions({"binary", "micro", "macro", "weighted", "samples"}), + ], + "sample_weight": ["array-like", None], + } +) def sensitivity_score( y_true, y_pred, @@ -295,21 +328,23 @@ def sensitivity_score( Parameters ---------- - y_true : ndarray of shape (n_samples,) + y_true : array-like of shape (n_samples,) Ground truth (correct) target values. - y_pred : ndarray of shape (n_samples,) + y_pred : array-like of shape (n_samples,) Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. - pos_label : str or int, default=1 + pos_label : str, int or None, default=1 The class to report if ``average='binary'`` and the data is binary. + If ``pos_label is None`` and in binary classification, this function + returns the average sensitivity if ``average`` is one of ``'weighted'``. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. @@ -337,7 +372,7 @@ def sensitivity_score( meaningful for multilabel classification where this differs from :func:`accuracy_score`). - sample_weight : ndarray of shape (n_samples,), default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns @@ -374,6 +409,19 @@ def sensitivity_score( return s +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "labels": ["array-like", None], + "pos_label": [str, numbers.Integral, None], + "average": [ + None, + StrOptions({"binary", "micro", "macro", "weighted", "samples"}), + ], + "sample_weight": ["array-like", None], + } +) def specificity_score( y_true, y_pred, @@ -395,21 +443,23 @@ def specificity_score( Parameters ---------- - y_true : ndarray of shape (n_samples,) + y_true : array-like of shape (n_samples,) Ground truth (correct) target values. - y_pred : ndarray of shape (n_samples,) + y_pred : array-like of shape (n_samples,) Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. - pos_label : str or int, default=1 + pos_label : str, int or None, default=1 The class to report if ``average='binary'`` and the data is binary. + If ``pos_label is None`` and in binary classification, this function + returns the average specificity if ``average`` is one of ``'weighted'``. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. @@ -437,7 +487,7 @@ def specificity_score( meaningful for multilabel classification where this differs from :func:`accuracy_score`). - sample_weight : ndarray of shape (n_samples,), default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns @@ -474,6 +524,22 @@ def specificity_score( return s +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "labels": ["array-like", None], + "pos_label": [str, numbers.Integral, None], + "average": [ + None, + StrOptions( + {"binary", "micro", "macro", "weighted", "samples", "multiclass"} + ), + ], + "sample_weight": ["array-like", None], + "correction": [Interval(numbers.Real, 0, None, closed="left")], + } +) def geometric_mean_score( y_true, y_pred, @@ -507,21 +573,24 @@ class is unrecognized by the classifier, G-mean resolves to zero. To Parameters ---------- - y_true : ndarray of shape (n_samples,) + y_true : array-like of shape (n_samples,) Ground truth (correct) target values. - y_pred : ndarray of shape (n_samples,) + y_pred : array-like of shape (n_samples,) Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. - pos_label : str or int, default=1 + pos_label : str, int or None, default=1 The class to report if ``average='binary'`` and the data is binary. + If ``pos_label is None`` and in binary classification, this function + returns the average geometric mean if ``average`` is one of + ``'weighted'``. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. @@ -539,6 +608,8 @@ class is unrecognized by the classifier, G-mean resolves to zero. To ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. + ``'multiclass'``: + No average is taken. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This @@ -549,7 +620,7 @@ class is unrecognized by the classifier, G-mean resolves to zero. To meaningful for multilabel classification where this differs from :func:`accuracy_score`). - sample_weight : ndarray of shape (n_samples,), default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. correction : float, default=0.0 @@ -658,6 +729,7 @@ class is unrecognized by the classifier, G-mean resolves to zero. To return gmean +@validate_params({"alpha": [numbers.Real], "squared": ["boolean"]}) def make_index_balanced_accuracy(*, alpha=0.1, squared=True): """Balance any scoring function using the index balanced accuracy. @@ -763,6 +835,22 @@ def compute_score(*args, **kwargs): return decorate +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "labels": ["array-like", None], + "target_names": ["array-like", None], + "sample_weight": ["array-like", None], + "digits": [Interval(numbers.Integral, 0, None, closed="left")], + "alpha": [numbers.Real], + "output_dict": ["boolean"], + "zero_division": [ + StrOptions({"warn"}), + Interval(numbers.Integral, 0, 1, closed="both"), + ], + } +) def classification_report_imbalanced( y_true, y_pred, @@ -970,6 +1058,13 @@ class 2 1.00 0.67 1.00 0.80 0.82 0.64\ return report +@validate_params( + { + "y_true": ["array-like"], + "y_pred": ["array-like"], + "sample_weight": ["array-like", None], + } +) def macro_averaged_mean_absolute_error(y_true, y_pred, *, sample_weight=None): """Compute Macro-Averaged MAE for imbalanced ordinal classification. diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index f6b5d5d24..738f89b49 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -12,16 +12,19 @@ # Christos Aridas # Guillaume Lemaitre # License: BSD +import joblib from sklearn import pipeline from sklearn.base import clone from sklearn.utils import _print_elapsed_time from sklearn.utils.metaestimators import available_if -from sklearn.utils.validation import check_memory + +from .base import _ParamsValidationMixin +from .utils._param_validation import HasMethods, validate_params __all__ = ["Pipeline", "make_pipeline"] -class Pipeline(pipeline.Pipeline): +class Pipeline(pipeline.Pipeline, _ParamsValidationMixin): """Pipeline of transforms and resamples with a final estimator. Sequentially apply a list of transforms, sampling, and a final estimator. @@ -128,6 +131,12 @@ class Pipeline(pipeline.Pipeline): """ + _parameter_constraints: dict = { + "steps": "no_validation", # validated in `_validate_steps` + "memory": [None, str, HasMethods(["cache"])], + "verbose": ["boolean"], + } + # BaseEstimator interface def _validate_steps(self): @@ -201,7 +210,10 @@ def _fit(self, X, y=None, **fit_params_steps): self.steps = list(self.steps) self._validate_steps() # Setup the memory - memory = check_memory(self.memory) + if self.memory is None or isinstance(self.memory, str): + memory = joblib.Memory(location=self.memory, verbose=0) + else: + memory = self.memory fit_transform_one_cached = memory.cache(pipeline._fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) @@ -276,6 +288,7 @@ def fit(self, X, y=None, **fit_params): self : Pipeline This estimator. """ + self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): @@ -311,6 +324,7 @@ def fit_transform(self, X, y=None, **fit_params): Xt : array-like of shape (n_samples, n_transformed_features) Transformed samples. """ + self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) @@ -354,6 +368,7 @@ def fit_resample(self, X, y=None, **fit_params): yt : array-like of shape (n_samples, n_transformed_features) Transformed target. """ + self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) last_step = self._final_estimator @@ -392,6 +407,7 @@ def fit_predict(self, X, y=None, **fit_params): y_pred : ndarray of shape (n_samples,) The predicted target. """ + self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) @@ -408,6 +424,7 @@ def _fit_resample_one(sampler, X, y, message_clsname="", message=None, **fit_par return X_res, y_res, sampler +@validate_params({"memory": [None, str, HasMethods(["cache"])], "verbose": ["boolean"]}) def make_pipeline(*steps, memory=None, verbose=False): """Construct a Pipeline from the given estimators. diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py index 0f0b06494..0c2e2f301 100644 --- a/imblearn/tests/test_common.py +++ b/imblearn/tests/test_common.py @@ -74,7 +74,6 @@ def test_estimators_imblearn(estimator, check, request): ) def test_check_param_validation(estimator): name = estimator.__class__.__name__ - print(name) _set_checking_parameters(estimator) check_param_validation(name, estimator) diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py index d2f0b8f5c..8355f0228 100644 --- a/imblearn/tests/test_pipeline.py +++ b/imblearn/tests/test_pipeline.py @@ -35,6 +35,7 @@ from imblearn.pipeline import Pipeline, make_pipeline from imblearn.under_sampling import EditedNearestNeighbours as ENN from imblearn.under_sampling import RandomUnderSampler +from imblearn.utils.estimator_checks import check_param_validation JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", @@ -642,22 +643,6 @@ def test_classes_property(): assert_array_equal(clf.classes_, np.unique(y)) -def test_pipeline_wrong_memory(): - # Test that an error is raised when memory is not a string or a Memory - # instance - iris = load_iris() - X = iris.data - y = iris.target - # Define memory as an integer - memory = 1 - cached_pipe = Pipeline( - [("transf", DummyTransf()), ("svc", SVC(gamma="scale"))], memory=memory - ) - error_regex = "string or have the same interface as" - with raises(ValueError, match=error_regex): - cached_pipe.fit(X, y) - - def test_pipeline_memory_transformer(): iris = load_iris() X = iris.data @@ -1341,3 +1326,10 @@ def test_pipeline_score_samples_pca_lof_multiclass(): # Check the values lof.fit(pca.fit_transform(X)) assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X))) + + +def test_pipeline_param_validation(): + model = Pipeline( + [("sampler", RandomUnderSampler()), ("classifier", LogisticRegression())] + ) + check_param_validation("Pipeline", model) diff --git a/imblearn/tests/test_public_functions.py b/imblearn/tests/test_public_functions.py new file mode 100644 index 000000000..d84732007 --- /dev/null +++ b/imblearn/tests/test_public_functions.py @@ -0,0 +1,105 @@ +"""This is a copy of sklearn/tests/test_public_functions.py. It can be +removed when we support scikit-learn >= 1.2. +""" +from importlib import import_module +from inspect import signature + +import pytest + +from imblearn.utils._param_validation import ( + generate_invalid_param_val, + generate_valid_param, + make_constraint, +) + +PARAM_VALIDATION_FUNCTION_LIST = [ + "imblearn.datasets.fetch_datasets", + "imblearn.datasets.make_imbalance", + "imblearn.metrics.classification_report_imbalanced", + "imblearn.metrics.geometric_mean_score", + "imblearn.metrics.macro_averaged_mean_absolute_error", + "imblearn.metrics.make_index_balanced_accuracy", + "imblearn.metrics.sensitivity_specificity_support", + "imblearn.metrics.sensitivity_score", + "imblearn.metrics.specificity_score", + "imblearn.pipeline.make_pipeline", +] + + +@pytest.mark.parametrize("func_module", PARAM_VALIDATION_FUNCTION_LIST) +def test_function_param_validation(func_module): + """Check that an informative error is raised when the value of a parameter does not + have an appropriate type or value. + """ + module_name, func_name = func_module.rsplit(".", 1) + module = import_module(module_name) + func = getattr(module, func_name) + + func_sig = signature(func) + func_params = [ + p.name + for p in func_sig.parameters.values() + if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD) + ] + parameter_constraints = getattr(func, "_skl_parameter_constraints") + + # Generate valid values for the required parameters + # The parameters `*args` and `**kwargs` are ignored since we cannot generate + # constraints. + required_params = [ + p.name + for p in func_sig.parameters.values() + if p.default is p.empty and p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD) + ] + valid_required_params = {} + for param_name in required_params: + if parameter_constraints[param_name] == "no_validation": + valid_required_params[param_name] = 1 + else: + valid_required_params[param_name] = generate_valid_param( + make_constraint(parameter_constraints[param_name][0]) + ) + + # check that there is a constraint for each parameter + if func_params: + validation_params = parameter_constraints.keys() + unexpected_params = set(validation_params) - set(func_params) + missing_params = set(func_params) - set(validation_params) + err_msg = ( + "Mismatch between _parameter_constraints and the parameters of" + f" {func_name}.\nConsider the unexpected parameters {unexpected_params} and" + f" expected but missing parameters {missing_params}\n" + ) + assert set(validation_params) == set(func_params), err_msg + + # this object does not have a valid type for sure for all params + param_with_bad_type = type("BadType", (), {})() + + for param_name in func_params: + constraints = parameter_constraints[param_name] + + if constraints == "no_validation": + # This parameter is not validated + continue + + match = ( + rf"The '{param_name}' parameter of {func_name} must be .* Got .* instead." + ) + + # First, check that the error is raised if param doesn't match any valid type. + with pytest.raises(ValueError, match=match): + func(**{**valid_required_params, param_name: param_with_bad_type}) + + # Then, for constraints that are more than a type constraint, check that the + # error is raised if param does match a valid type but does not match any valid + # value for this type. + constraints = [make_constraint(constraint) for constraint in constraints] + + for constraint in constraints: + try: + bad_value = generate_invalid_param_val(constraint) + except NotImplementedError: + continue + + with pytest.raises(ValueError, match=match): + func(**{**valid_required_params, param_name: bad_value}) diff --git a/imblearn/utils/_param_validation.py b/imblearn/utils/_param_validation.py index a45292c63..ae3855945 100644 --- a/imblearn/utils/_param_validation.py +++ b/imblearn/utils/_param_validation.py @@ -20,10 +20,9 @@ sklearn_version = parse_version(sklearn.__version__) -# if sklearn_version < parse_version("1.2"): -if True: +if sklearn_version < parse_version("1.2"): # TODO: remove `if True` when we have clear support for: - # - dataframe + # - ignoring `*args` and `**kwargs` in the signature def validate_parameter_constraints(parameter_constraints, params, caller_name): """Validate types and values of given parameters. @@ -38,7 +37,6 @@ def validate_parameter_constraints(parameter_constraints, params, caller_name): Constraints can be: - an Interval object, representing a continuous or discrete range of numbers - the string "array-like" - - the string "dataframe" - the string "sparse matrix" - the string "random_state" - callable @@ -119,8 +117,6 @@ def make_constraint(constraint): return _ArrayLikes() if isinstance(constraint, str) and constraint == "sparse matrix": return _SparseMatrices() - if isinstance(constraint, str) and constraint == "dataframe": - return _DataFrames() if isinstance(constraint, str) and constraint == "random_state": return _RandomStates() if constraint is callable: @@ -472,17 +468,6 @@ def is_satisfied_by(self, val): def __str__(self): return "a sparse matrix" - class _DataFrames(_Constraint): - """Constraint representing a DataFrame""" - - def is_satisfied_by(self, val): - # Let's first try the dataframe protocol and then duck-typing for the older - # pandas versions. - return hasattr(val, "__dataframe__") or hasattr(val, "iloc") - - def __str__(self): - return "a DataFrame" - class _Callables(_Constraint): """Constraint representing callables.""" @@ -862,11 +847,6 @@ def generate_valid_param(constraint): if isinstance(constraint, _SparseMatrices): return csr_matrix([[0, 1], [1, 0]]) - if isinstance(constraint, _DataFrames): - import pandas as pd - - return pd.DataFrame({"a": [1, 2, 3]}) - if isinstance(constraint, _RandomStates): return np.random.RandomState(42) diff --git a/imblearn/utils/tests/test_param_validation.py b/imblearn/utils/tests/test_param_validation.py index ec3a37e13..dae58a790 100644 --- a/imblearn/utils/tests/test_param_validation.py +++ b/imblearn/utils/tests/test_param_validation.py @@ -21,7 +21,6 @@ _Booleans, _Callables, _CVObjects, - _DataFrames, _InstancesOf, _IterablesNotString, _MissingValues, @@ -37,15 +36,6 @@ ) -def has_pandas(): - try: - import pandas as pd - - return True, pd.DataFrame({"a": [1, 2, 3]}) - except ImportError: - return False, None - - # Some helpers for the tests @validate_params({"a": [Real], "b": [Real], "c": [Real], "d": [Real]}) def _func(a, b=0, *args, c, d=0, **kwargs): @@ -327,12 +317,6 @@ def test_generate_invalid_param_val_2_intervals(integer_interval, real_interval) "constraints", [ [_ArrayLikes()], - pytest.param( - [_DataFrames()], - marks=pytest.mark.skipif( - not has_pandas()[0], reason="Pandas not installed" - ), - ), [_InstancesOf(list)], [_Callables()], [_NoneConstraint()], @@ -358,12 +342,6 @@ def test_generate_invalid_param_val_all_valid(constraints): "constraint", [ _ArrayLikes(), - pytest.param( - _DataFrames(), - marks=pytest.mark.skipif( - not has_pandas()[0], reason="Pandas not installed" - ), - ), _Callables(), _InstancesOf(list), _NoneConstraint(), @@ -403,13 +381,6 @@ def test_generate_valid_param(constraint): (None, None), ("array-like", [[1, 2], [3, 4]]), ("array-like", np.array([[1, 2], [3, 4]])), - pytest.param( - "dataframe", - has_pandas()[1], - marks=pytest.mark.skipif( - not has_pandas()[0], reason="Pandas not installed" - ), - ), ("sparse matrix", csr_matrix([[1, 2], [3, 4]])), ("random_state", 0), ("random_state", np.random.RandomState(0)), @@ -443,13 +414,6 @@ def test_is_satisfied_by(constraint_declaration, value): (Options(Real, {0.42, 1.23}), Options), ("array-like", _ArrayLikes), ("sparse matrix", _SparseMatrices), - pytest.param( - "dataframe", - _DataFrames, - marks=pytest.mark.skipif( - not has_pandas()[0], reason="Pandas not installed" - ), - ), ("random_state", _RandomStates), (None, _NoneConstraint), (callable, _Callables),