TUM-DAML · n-gao · Dec 13, 2021 · Dec 3, 2021 · Dec 3, 2021 · Dec 8, 2021
diff --git a/seml/add.py b/seml/add.py
@@ -110,7 +110,7 @@ def add_configs(collection, seml_config, slurm_config, configs, source_files=Non
     collection.insert_many(db_dicts)
 
 
-def add_experiments(db_collection_name, config_file, force_duplicates, no_hash=False, no_sanity_check=False,
+def add_experiments(db_collection_name, config_file, force_duplicates, overwrite_params=None, no_hash=False, no_sanity_check=False,
                     no_code_checkpoint=False):
     """
     Add configurations from a config file into the database.
@@ -120,6 +120,7 @@ def add_experiments(db_collection_name, config_file, force_duplicates, no_hash=F
     db_collection_name: the MongoDB collection name.
     config_file: path to the YAML configuration.
     force_duplicates: if True, disable duplicate detection.
+    overwrite_params: optional flat dictionary to overwrite parameters in all configs.
     no_hash: if True, disable hashing of the configurations for duplicate detection. This is much slower, so use only
         if you have a good reason to.
     no_sanity_check: if True, do not check the config for missing/unused arguments.
@@ -136,7 +137,7 @@ def add_experiments(db_collection_name, config_file, force_duplicates, no_hash=F
     if 'conda_environment' not in seml_config:
         seml_config['conda_environment'] = os.environ.get('CONDA_DEFAULT_ENV')
 
-    # Set Slurm config with default parameters as fall-back option
+    # Set Slurm config with default parameters as fall-back option  
     slurm_config = merge_dicts(SETTINGS.SLURM_DEFAULT, slurm_config)
 
     # Check for and use sbatch options template
@@ -149,7 +150,7 @@ def add_experiments(db_collection_name, config_file, force_duplicates, no_hash=F
                 slurm_config['sbatch_options'][k] = v
 
     slurm_config['sbatch_options'] = remove_prepended_dashes(slurm_config['sbatch_options'])
-    configs = generate_configs(experiment_config)
+    configs = generate_configs(experiment_config, overwrite_params=overwrite_params)
     collection = get_collection(db_collection_name)
 
     batch_id = get_max_in_collection(collection, "batch_id")

diff --git a/seml/config.py b/seml/config.py
@@ -11,7 +11,7 @@
 
 from seml.sources import import_exe
 from seml.parameters import sample_random_configs, generate_grid, cartesian_product_dict
-from seml.utils import merge_dicts, flatten, unflatten
+from seml.utils import Hashabledict, merge_dicts, flatten, unflatten
 from seml.errors import ConfigError, ExecutableError
 from seml.settings import SETTINGS
 
@@ -133,7 +133,7 @@ def detect_duplicate_parameters(inverted_config: dict, sub_config_name: str = No
                 raise ConfigError(error_str.format(p1=p1, p2=p2))
 
 
-def generate_configs(experiment_config):
+def generate_configs(experiment_config, overwrite_params=None):
     """Generate parameter configurations based on an input configuration.
 
     Input is a nested configuration where on each level there can be 'fixed', 'grid', and 'random' parameters.
@@ -155,6 +155,8 @@ def generate_configs(experiment_config):
     experiment_config: dict
         Dictionary that specifies the "search space" of parameters that will be enumerated. Should be
         parsed from a YAML file.
+    overwrite_params: Optional[dict]
+        Flat dictionary that overwrites configs. Resulting duplicates will be removed.
 
     Returns
     -------
@@ -235,6 +237,16 @@ def generate_configs(experiment_config):
                     for k, v in config.items()}
                    for config in all_configs]
 
+    if overwrite_params is not None:
+        all_configs = [merge_dicts(config, overwrite_params) for config in all_configs]
+        base_length = len(all_configs)
+        # We use a dictionary instead a set because dictionary keys are ordered as of Python 3
+        all_configs = list({Hashabledict(**config): None for config in all_configs})
+        new_length = len(all_configs)
+        if base_length != new_length:
+            diff = base_length - new_length
+            logging.warn(f'Parameter overwrite caused {diff} identical configs. Duplicates were removed.')
+
     all_configs = [unflatten(conf) for conf in all_configs]
     return all_configs
 

diff --git a/seml/main.py b/seml/main.py
@@ -36,6 +36,17 @@ def parse_args(parser, commands):
     return commands
 
 
+class ParameterAction(argparse.Action):
+    def __init__(self, option_strings, dest, **kwargs):
+        super().__init__(option_strings, dest, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, {
+            value.split('=')[0]: eval('='.join(value.split('=')[1:]))
+            for value in values
+        })
+
+
 def main():
     parser = argparse.ArgumentParser(
             description="Manage experiments for the given configuration. "
@@ -98,6 +109,11 @@ def main():
             '-f', '--force-duplicates', action='store_true',
             help="Add experiments to the database even when experiments with identical configurations "
                  "are already in the database.")
+    parser_add.add_argument(
+            '-o', '--overwrite-params', action=ParameterAction, nargs='+', default={},
+            help="Specifies parameters that overwrite their respective values in all configs."
+                 "Format: <param>=<value>, use flat dictionary notation with key1.key2=value."
+    )
     parser_add.set_defaults(func=add_experiments)
 
     parser_start = subparsers.add_parser(

diff --git a/seml/utils.py b/seml/utils.py
@@ -1,3 +1,4 @@
+from collections.abc import Iterable
 import logging
 import json
 import copy
@@ -254,3 +255,16 @@ def format(self, record):
         log_fmt = self.FORMATS.get(record.levelno, self.FORMATS['DEFAULT'])
         formatter = logging.Formatter(log_fmt)
         return formatter.format(record)
+
+
+class Hashabledict(dict):
+
+    def hashable_values(self):
+        for value in self.values():
+            if isinstance(value, Iterable):
+                yield tuple(value)
+            else:
+                yield value
+
+    def __hash__(self):
+        return hash((frozenset(self), frozenset(self.hashable_values())))
diff --git a/test/resources/config/config_with_grid.yaml b/test/resources/config/config_with_grid.yaml
@@ -0,0 +1,13 @@
+grid:
+  dataset:
+    type: choice
+    options:
+      - small
+      - big
+
+  lr:
+    type: choice
+    options:
+      - 0.1
+      - 0.01
+
diff --git a/test/test_config.py b/test/test_config.py
@@ -16,6 +16,7 @@ class TestParseConfigDicts(unittest.TestCase):
     CONFIG_WITH_DUPLICATE_RDM_PARAMETERS_2 = "resources/config/config_with_duplicate_random_parameters_1.yaml"
     CONFIG_WITH_ALL_TYPES = "resources/config/config_with_all_types.yaml"
     CONFIG_WITH_EMPTY_DICT = "resources/config/config_with_empty_dictionary.yaml"
+    CONFIG_WITH_GRID = "resources/config/config_with_grid.yaml"
 
     def load_config_dict(self, path):
         with open(path, 'r') as conf:
@@ -80,6 +81,23 @@ def test_empty_dictionary(self):
             }
         }
         self.assertEqual(configs, expected_config)
+
+    def test_overwrite_parameters(self):
+        config_dict = self.load_config_dict(self.CONFIG_WITH_GRID)
+        configs = config.generate_configs(config_dict, {
+            'dataset': 'small'
+        })
+        expected_configs = [
+            {
+                'dataset': 'small',
+                'lr': 0.1
+            },
+            {
+                'dataset': 'small',
+                'lr': 0.01
+            }
+        ]
+        self.assertEqual(configs, expected_configs)
 
     def test_duplicate_parameters(self):
         config_dict = self.load_config_dict(self.CONFIG_WITH_DUPLICATE_PARAMETERS_1)