Skip to content

Commit

Permalink
no limit for training after Optuna tuning (mljar#397)
Browse files Browse the repository at this point in the history
  • Loading branch information
pplonski committed Jun 8, 2021
1 parent 8e797a8 commit 27c0a76
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 18 deletions.
8 changes: 3 additions & 5 deletions supervised/base_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,9 +936,7 @@ def _fit(self, X, y, sample_weight=None, cv=None):

self.verbose_print(f"AutoML directory: {self._results_path}")
if self._mode == "Optuna":
ttl = int(
len(self._algorithms) * self._optuna_time_budget
)
ttl = int(len(self._algorithms) * self._optuna_time_budget)
self.verbose_print("Expected computing time:")
self.verbose_print(
f"Time for tuning with Optuna: len(algorithms) * optuna_time_budget = {int(len(self._algorithms) * self._optuna_time_budget)} seconds"
Expand Down Expand Up @@ -1411,8 +1409,8 @@ def _get_total_time_limit(self):
""" Gets the current total_time_limit"""
self._validate_total_time_limit()
if self._get_mode() == "Optuna":
return None # there no training limit for model in the Optuna mode
# just train and be happy with super models :)
return None # there no training limit for model in the Optuna mode
# just train and be happy with super models :)
return deepcopy(self.total_time_limit)

def _get_model_time_limit(self):
Expand Down
40 changes: 29 additions & 11 deletions supervised/tuner/mljar_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,11 @@ def get_loo_categorical_strategy(self, current_models, total_time_limit):

def get_categorical_strategy(self, current_models, strategy, total_time_limit):

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)
generated_params = []
for m_type in algorithms:
Expand Down Expand Up @@ -731,9 +734,11 @@ def df_models_algorithms(
def get_golden_features_params(
self, current_models, results_path, total_time_limit
):

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)

generated_params = []
Expand Down Expand Up @@ -774,9 +779,11 @@ def get_golden_features_params(
def get_kmeans_features_params(
self, current_models, results_path, total_time_limit
):

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit, exclude_golden=True
current_models, time_limit=model_selection_time_limit, exclude_golden=True
)

generated_params = []
Expand All @@ -803,9 +810,11 @@ def get_kmeans_features_params(
return generated_params

def time_features_selection(self, current_models, total_time_limit):

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)

time_needed = 0
Expand Down Expand Up @@ -838,7 +847,7 @@ def get_params_to_insert_random_feature(self, current_models, total_time_limit):

time_needed = self.time_features_selection(current_models, total_time_limit)

if time_needed > 0.1 * total_time_limit:
if total_time_limit is not None and time_needed > 0.1 * total_time_limit:
print("Not enough time to perform features selection. Skip")
print(
"Time needed for features selection ~", np.round(time_needed), "seconds"
Expand All @@ -848,8 +857,11 @@ def get_params_to_insert_random_feature(self, current_models, total_time_limit):
)
return None

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)
if df_models.shape[0] == 0:
return None
Expand Down Expand Up @@ -892,8 +904,11 @@ def get_features_selection_params(
if len(drop_features) <= 1:
return None

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)

generated_params = []
Expand Down Expand Up @@ -1015,8 +1030,11 @@ def add_key(self, model):

def boost_params(self, current_models, results_path, total_time_limit):

model_selection_time_limit = (
None if total_time_limit is None else 0.1 * total_time_limit
)
df_models, algorithms = self.df_models_algorithms(
current_models, time_limit=0.1 * total_time_limit
current_models, time_limit=model_selection_time_limit
)
best_model = None
for i in range(df_models.shape[0]):
Expand Down
4 changes: 2 additions & 2 deletions supervised/tuner/time_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def already_spend(self):
def time_should_use(self, fit_level):

if self._total_time_limit is None:
return 7*24*3600 # 7 days
return 7 * 24 * 3600 # 7 days

ratios = {
"default_algorithms": 0.3,
Expand Down Expand Up @@ -213,7 +213,7 @@ def enough_time(self, model_type, step):
def learner_time_limit(self, model_type, fit_level, k_folds):

if self._total_time_limit is None:
return 7*24*3600
return 7 * 24 * 3600

if self._model_time_limit is not None:
return self._model_time_limit / k_folds
Expand Down

0 comments on commit 27c0a76

Please sign in to comment.