From 4fd18e32997a2a28888bb0992d3e84dbac034768 Mon Sep 17 00:00:00 2001 From: Zion <34517628+zionsteiner@users.noreply.github.com> Date: Sun, 6 Dec 2020 03:35:17 -0700 Subject: [PATCH] scikit_wrappers.py bug fix --- scikit_wrappers.py | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/scikit_wrappers.py b/scikit_wrappers.py index 56f6026..d0ca856 100644 --- a/scikit_wrappers.py +++ b/scikit_wrappers.py @@ -172,40 +172,40 @@ def fit_classifier(self, features, y): if train_size // nb_classes < 5 or train_size < 50 or self.penalty is not None: return self.classifier.fit(features, y) else: - grid_search = sklearn.model_selection.GridSearchCV( - self.classifier, { - 'C': [ - 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, - numpy.inf - ], - 'kernel': ['rbf'], - 'degree': [3], - 'gamma': ['scale'], - 'coef0': [0], - 'shrinking': [True], - 'probability': [False], - 'tol': [0.001], - 'cache_size': [200], - 'class_weight': [None], - 'verbose': [False], - 'max_iter': [10000000], - 'decision_function_shape': ['ovr'], - 'random_state': [None] - }, - cv=5, iid=False, n_jobs=5 + grid_search = sklearn.model_selection.GridSearchCV( + self.classifier, { + 'C': [ + 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, + numpy.inf + ], + 'kernel': ['rbf'], + 'degree': [3], + 'gamma': ['scale'], + 'coef0': [0], + 'shrinking': [True], + 'probability': [False], + 'tol': [0.001], + 'cache_size': [200], + 'class_weight': [None], + 'verbose': [False], + 'max_iter': [10000000], + 'decision_function_shape': ['ovr'], + 'random_state': [None] + }, + cv=5, iid=False, n_jobs=5 + ) + if train_size <= 10000: + grid_search.fit(features, y) + else: + # If the training set is too large, subsample 10000 train + # examples + split = sklearn.model_selection.train_test_split( + features, y, + train_size=10000, random_state=0, stratify=y ) - if train_size <= 10000: - grid_search.fit(features, y) - else: - # If the training set is too large, subsample 10000 train - # examples - split = sklearn.model_selection.train_test_split( - features, y, - train_size=10000, random_state=0, stratify=y - ) - grid_search.fit(split[0], split[2]) - self.classifier = grid_search.best_estimator_ - return self.classifier + grid_search.fit(split[0], split[2]) + self.classifier = grid_search.best_estimator_ + return self.classifier def fit_encoder(self, X, y=None, save_memory=False, verbose=False): """