Skip to content
This repository has been archived by the owner on Jul 19, 2024. It is now read-only.

Commit

Permalink
Require explicit arguments for mlos_core optimizers (microsoft#760)
Browse files Browse the repository at this point in the history
This is a simple PR that makes all arguments explicit for
optimizer-related function calls in preparation to add additional
arguments in microsoft#751 and make it easier to review.

---------

Co-authored-by: Brian Kroth <bpkroth@users.noreply.github.com>
Co-authored-by: Brian Kroth <bpkroth@microsoft.com>
  • Loading branch information
3 people authored Jun 28, 2024
1 parent c7a4823 commit bb6c6f2
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 99 deletions.
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def bulk_register(self,

# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_configs, df_scores[opt_targets].astype(float))
self._opt.register(configs=df_configs, scores=df_scores[opt_targets].astype(float))

if _LOG.isEnabledFor(logging.DEBUG):
(score, _) = self.get_best_observation()
Expand Down Expand Up @@ -195,7 +195,7 @@ def register(self, tunables: TunableGroups, status: Status,
_LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float))
self._opt.register(configs=df_config, scores=pd.DataFrame([registered_score], dtype=float))
return registered_score

def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]:
config_df = config_to_dataframe(config)
logger("config: %s", str(config))
try:
logger("prediction: %s", opt._opt.surrogate_predict(config_df))
logger("prediction: %s", opt._opt.surrogate_predict(configs=config_df))
except RuntimeError:
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,29 @@ class BaseBayesianOptimizer(BaseOptimizer, metaclass=ABCMeta):
"""Abstract base class defining the interface for Bayesian optimization."""

@abstractmethod
def surrogate_predict(self, configurations: pd.DataFrame,
def surrogate_predict(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> npt.NDArray:
"""Obtain a prediction from this Bayesian optimizer's surrogate model for the given configuration(s).
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
configs : pd.DataFrame
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
context : pd.DataFrame
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

@abstractmethod
def acquisition_function(self, configurations: pd.DataFrame,
def acquisition_function(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> npt.NDArray:
"""Invokes the acquisition function from this Bayesian optimizer for the given configuration.
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
configs : pd.DataFrame
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
context : pd.DataFrame
Not Yet Implemented.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments
See Also: mlos_bench.optimizer.bulk_register
max_ratio : Optional[int]
Maximum ratio of max_trials to be random configurations to be evaluated
Maximum ratio of max_trials to be random configs to be evaluated
at start to bootstrap the optimizer.
Useful if you want to explicitly control the number of random
configurations evaluated at start.
configs evaluated at start.
use_default_config: bool
Whether to use the default config for the first trial after random initialization.
Expand Down Expand Up @@ -168,7 +168,7 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments
initial_design_args['n_configs'] = n_random_init
if n_random_init > 0.25 * max_trials and max_ratio is None:
warning(
'Number of random initial configurations (%d) is ' +
'Number of random initial configs (%d) is ' +
'greater than 25%% of max_trials (%d). ' +
'Consider setting max_ratio to avoid SMAC overriding n_random_init.',
n_random_init,
Expand Down Expand Up @@ -241,17 +241,17 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
# -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946
raise RuntimeError('This function should never be called.')

def _register(self, configurations: pd.DataFrame,
def _register(self, *, configs: pd.DataFrame,
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.
"""Registers the given configs and scores.
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
configs : pd.DataFrame
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
scores : pd.DataFrame
Scores from running the configurations. The index is the same as the index of the configurations.
Scores from running the configs. The index is the same as the index of the configs.
context : pd.DataFrame
Not Yet Implemented.
Expand All @@ -262,7 +262,7 @@ def _register(self, configurations: pd.DataFrame,
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)

# Register each trial (one-by-one)
for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()):
for (config, (_i, score)) in zip(self._to_configspace_configs(configs=configs), scores.iterrows()):
# Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance
info: TrialInfo = self.trial_info_map.get(
config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
Expand All @@ -272,7 +272,7 @@ def _register(self, configurations: pd.DataFrame,
# Save optimizer once we register all configs
self.base_optimizer.optimizer.save()

def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.
Parameters
Expand All @@ -299,10 +299,10 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys()))
return config_df

def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
from smac.utils.configspace import convert_configurations_to_array # pylint: disable=import-outside-toplevel

if context is not None:
Expand All @@ -318,11 +318,11 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D
if self.base_optimizer._config_selector._model is None:
raise RuntimeError('Surrogate model is not yet trained')

configs: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configurations))
mean_predictions, _ = self.base_optimizer._config_selector._model.predict(configs)
config_array: npt.NDArray = convert_configurations_to_array(self._to_configspace_configs(configs=configs))
mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array)
return mean_predictions.reshape(-1,)

def acquisition_function(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
def acquisition_function(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if self._space_adapter:
Expand All @@ -332,28 +332,28 @@ def acquisition_function(self, configurations: pd.DataFrame, context: Optional[p
if self.base_optimizer._config_selector._acquisition_function is None:
raise RuntimeError('Acquisition function is not yet initialized')

configs: list = self._to_configspace_configs(configurations)
return self.base_optimizer._config_selector._acquisition_function(configs).reshape(-1,)
cs_configs: list = self._to_configspace_configs(configs=configs)
return self.base_optimizer._config_selector._acquisition_function(cs_configs).reshape(-1,)

def cleanup(self) -> None:
if self._temp_output_directory is not None:
self._temp_output_directory.cleanup()
self._temp_output_directory = None

def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSpace.Configuration]:
"""Convert a dataframe of configurations to a list of ConfigSpace configurations.
def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace.Configuration]:
"""Convert a dataframe of configs to a list of ConfigSpace configs.
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
configs : pd.DataFrame
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
Returns
-------
configurations : list
List of ConfigSpace configurations.
configs : list
List of ConfigSpace configs.
"""
return [
ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict())
for (_, config) in configurations.astype('O').iterrows()
for (_, config) in configs.astype('O').iterrows()
]
18 changes: 9 additions & 9 deletions mlos_core/mlos_core/optimizers/flaml_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,24 @@ def __init__(self, *, # pylint: disable=too-many-arguments
self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {}
self._suggested_config: Optional[dict]

def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.
"""Registers the given configs and scores.
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
configs : pd.DataFrame
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
scores : pd.DataFrame
Scores from running the configurations. The index is the same as the index of the configurations.
Scores from running the configs. The index is the same as the index of the configs.
context : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
for (_, config), (_, score) in zip(configurations.astype('O').iterrows(), scores.iterrows()):
for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()):
cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
self.optimizer_parameter_space, values=config.to_dict())
if cs_config in self.evaluated_samples:
Expand All @@ -112,7 +112,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
score=float(np.average(score.astype(float), weights=self._objective_weights)),
)

def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
"""Suggests a new configuration.
Sampled at random using ConfigSpace.
Expand All @@ -132,7 +132,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
config: dict = self._get_next_config()
return pd.DataFrame(config, index=[0])

def register_pending(self, configurations: pd.DataFrame,
def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

Expand Down Expand Up @@ -165,7 +165,7 @@ def _get_next_config(self) -> dict:
Since FLAML does not provide an ask-and-tell interface, we need to create a new instance of FLAML
each time we get asked for a new suggestion. This is suboptimal performance-wise, but works.
To do so, we use any previously evaluated configurations to bootstrap FLAML (i.e., warm-start).
To do so, we use any previously evaluated configs to bootstrap FLAML (i.e., warm-start).
For more info: https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start
Returns
Expand Down
Loading

0 comments on commit bb6c6f2

Please sign in to comment.