Skip to content

[FEATURE] Creating Scheduler V2 (same as create_optimizer_v2 concept) #1168

Closed
@timothylimyl

Description

@timothylimyl

When building optimizers now, we can use the function create_optimizer_v2 to take in keyword arguments instead of the parser arguments. This makes it easier to set up configuration files (can just pass in kwargs).

I believe the same can be done for the scheduler too. Please let me know what is stopping us from this doing, I do not mind spending time to contribute this feature but I am concerned that I may miss something fundamental about schedulers that are hindering us from doing this.

Amendments can be made here: https://github.com/rwightman/pytorch-image-models/blob/7c67d6aca992f039eece0af5f7c29a43d48c00e4/timm/scheduler/scheduler_factory.py

So far I have written a rough code that should do the trick, please let me know if this is something you are looking for in a PR and I will submit it after testing with various training scripts:

imports ...

def scheduler_kwargs(cfg):
    """cfg/argparse to kwargs helper
    Convert scheduler args in argparse args or cfg like object to keyword args for updated create fn.
    """
    # Get all the arguments (just so that we are compatible with version 1 of create_scheduler function and train.py)
    kwargs = vars(cfg)
    return kwargs


def create_scheduler(args, optimizer):
    return create_scheduler_v2(optimizer, **scheduler_kwargs(cfg=args))


def create_scheduler_v2(
    optimizer,
    sched="cosine",
    epochs=300,
    min_lr=1e-6,
    warmup_lr=10,
    warmup_epochs=10,
    lr_k_decay=1.0,
    decay_epochs=100,
    decay_rate=0.1,
    patience_epochs=10,
    cooldown_epochs=10,
    lr_noise=None,
    lr_noise_pct=0.67,
    lr_noise_std=1,
    seed=42,
    lr_cycle_mul=1.0,
    lr_cycle_decay=0.1,
    lr_cycle_limit=1,
    **kwargs_additional,
):

    schedulers_available = [
        "cosine",
        "tanh",
        "step",
        "multistep",
        "plateau",
        "poly",
        "none",
    ]
    if sched not in schedulers_available:
        raise SystemExit(
            f"Scheduler that was specified does not exist in this library.\
            Selections of schedulers are as follows: {schedulers_available}"
        )

    num_epochs = epochs
    if sched == "none":
        return None, num_epochs

    if lr_noise is not None:
        if isinstance(lr_noise, (list, tuple)):
            noise_range = [n * num_epochs for n in lr_noise]
            if len(noise_range) == 1:
                noise_range = noise_range[0]
        else:
            noise_range = lr_noise * num_epochs
    else:
        noise_range = None

    kwargs["noise_args"] = dict(
        noise_range_t=noise_range,
        noise_pct=lr_noise_pct,
        noise_std=lr_noise_std,
        noise_seed=seed,
    )
    kwargs["cycle_args"] = dict(
        cycle_mul=lr_cycle_mul,
        cycle_decay=lr_cycle_decay,
        cycle_limit=lr_cycle_limit,
    )

    if sched == "cosine":
        lr_scheduler = CosineLRScheduler(
            optimizer,
            t_initial=num_epochs,
            lr_min=min_lr,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            k_decay=lr_k_decay,
            **kwargs["cycle_args"],
            **kwargs["noise_args"],
        )
        num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs

    elif sched == "tanh":
        lr_scheduler = TanhLRScheduler(
            optimizer,
            t_initial=num_epochs,
            lr_min=min_lr,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            t_in_epochs=True,
            **kwargs["cycle_args"],
            **kwargs["noise_args"],
        )
        num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs

    elif sched == "step":
        lr_scheduler = StepLRScheduler(
            optimizer,
            decay_t=decay_epochs,
            decay_rate=decay_rate,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            **kwargs["noise_args"],
        )

    elif sched == "multistep":
        lr_scheduler = MultiStepLRScheduler(
            optimizer,
            decay_t=decay_epochs,
            decay_rate=decay_rate,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            **kwargs["noise_args"],
        )

    elif sched == "plateau":
        mode = "min" if "loss" in kwargs["eval_metric"] else "max"
        lr_scheduler = PlateauLRScheduler(
            optimizer,
            decay_rate=decay_rate,
            patience_t=patience_epochs,
            lr_min=min_lr,
            mode=mode,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            cooldown_t=0,
            **kwargs["noise_args"],
        )
    elif sched == "poly":
        lr_scheduler = PolyLRScheduler(
            optimizer,
            power=decay_rate,  # overloading 'decay_rate' as polynomial power
            t_initial=num_epochs,
            lr_min=min_lr,
            warmup_lr_init=warmup_lr,
            warmup_t=warmup_epochs,
            k_decay=lr_k_decay,
            **kwargs["cycle_args"],
            **kwargs["noise_args"],
        )
        num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs

    return lr_scheduler, num_epochs

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions