Skip to content

Commit

Permalink
Add stopping criteria
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Lemaitre committed Aug 30, 2016
1 parent f905274 commit 75e886e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Bug fixes

- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the right samples during under sampling for the method 3. By `Guillaume Lemaitre`_.
- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the `random_state` generation. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.

New features
~~~~~~~~~~~~
Expand Down
38 changes: 35 additions & 3 deletions imblearn/under_sampling/edited_nearest_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,42 @@ def _sample(self, X, y):
# updating ENN size_ngh
self.enn_.size_ngh = curr_size_ngh
if self.return_indices:
X_, y_, idx_ = self.enn_.fit_sample(X_, y_)
idx_under = idx_under[idx_]
X_enn, y_enn, idx_enn = self.enn_.fit_sample(X_, y_)
else:
X_, y_ = self.enn_.fit_sample(X_, y_)
X_enn, y_enn = self.enn_.fit_sample(X_, y_)

# Check the stopping criterion
# 1. If the number of samples in the other class become inferior to
# the number of samples in the majority class
# 2. If one of the class is disappearing
# Case 1
stats_enn = Counter(y_enn)
self.logger.debug('Current ENN stats: %s', stats_enn)
# Get the number of samples in the non-minority classes
count_non_min = np.array([val for val, key
in zip(stats_enn.itervalues(),
stats_enn.iterkeys())
if key != self.min_c_])
self.logger.debug('Number of samples in the non-majority'
' classes: %s', count_non_min)
# Check the minority stop to be the minority
b_min_bec_maj = np.any(count_non_min < self.stats_c_[self.min_c_])

# Case 2
b_remove_maj_class = (len(stats_enn) < len(self.stats_c_))

if b_min_bec_maj or b_remove_maj_class:
# Log the variables to explain the stop of the algorithm
self.logger.debug('AllKNN minority become majority: %s',
b_min_bec_maj)
self.logger.debug('AllKNN remove one class: %s',
b_remove_maj_class)
break

# Update the data for the next iteration
X_, y_, = X_enn, y_enn
if self.return_indices:
idx_under = idx_under[idx_enn]

self.logger.info('Under-sampling performed: %s', Counter(y_))

Expand Down
6 changes: 3 additions & 3 deletions imblearn/under_sampling/tests/test_allknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,6 @@ def test_multiclass_fit_sample():

# Check the size of y
count_y_res = Counter(y_resampled)
assert_equal(count_y_res[0], 341)
assert_equal(count_y_res[1], 2485)
assert_equal(count_y_res[2], 212)
assert_equal(count_y_res[0], 400)
assert_equal(count_y_res[1], 3600)
assert_equal(count_y_res[2], 1000)

0 comments on commit 75e886e

Please sign in to comment.