Skip to content

Commit

Permalink
FIX raise informative error message when SV are all noise (scikit-lea…
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre authored Jul 10, 2023
1 parent a8e44ae commit d431b9d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 0 deletions.
4 changes: 4 additions & 0 deletions doc/whats_new/v0.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Bug fixes
they are plugged into an Euclidean distance computation.
:pr:`1014` by :user:`Guillaume Lemaitre <glemaitre>`.

- Raise an informative error message when all support vectors are tagged as noise in
:class:`~imblearn.over_sampling.SVMSMOTE`.
:pr:`1016` by :user:`Guillaume Lemaitre <glemaitre>`.

- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard
deviation of the continuous features was only computed on the minority class. Now,
we are computing this statistic for each class that is up-sampled.
Expand Down
5 changes: 5 additions & 0 deletions imblearn/over_sampling/_smote/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,11 @@ def _fit_resample(self, X, y):
support_vector = _safe_indexing(
support_vector, np.flatnonzero(np.logical_not(noise_bool))
)
if support_vector.shape[0] == 0:
raise ValueError(
"All support vectors are considered as noise. SVM-SMOTE is not "
"adapted to your dataset. Try another SMOTE variant."
)
danger_bool = self._in_danger_noise(
self.nn_m_, support_vector, class_sample, y, kind="danger"
)
Expand Down
25 changes: 25 additions & 0 deletions imblearn/over_sampling/_smote/tests/test_svm_smote.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pytest
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors
from sklearn.svm import SVC
Expand Down Expand Up @@ -61,3 +62,27 @@ def test_svm_smote_not_svm(data):
err_msg = "`svm_estimator` is required to exposed a `support_` fitted attribute."
with pytest.raises(RuntimeError, match=err_msg):
SVMSMOTE(svm_estimator=LogisticRegression()).fit_resample(*data)


def test_svm_smote_all_noise(data):
"""Check that we raise a proper error message when all support vectors are
detected as noise and there is nothing that we can do.
Non-regression test for:
https://github.com/scikit-learn-contrib/imbalanced-learn/issues/742
"""
X, y = make_classification(
n_classes=3,
class_sep=0.001,
weights=[0.004, 0.451, 0.545],
n_informative=3,
n_redundant=0,
flip_y=0,
n_features=3,
n_clusters_per_class=2,
n_samples=1000,
random_state=10,
)

with pytest.raises(ValueError, match="SVM-SMOTE is not adapted to your dataset"):
SVMSMOTE(k_neighbors=4, random_state=42).fit_resample(X, y)

0 comments on commit d431b9d

Please sign in to comment.