From 67b467740732558841d9f5802a29ad57c7ad4348 Mon Sep 17 00:00:00 2001 From: Emanuele Ballarin Date: Thu, 17 Aug 2023 19:06:34 +0200 Subject: [PATCH] New scheduler: Epochwise OneCycleLR Signed-off-by: Emanuele Ballarin --- ebtorch/__init__.py | 1 + ebtorch/data/datasets.py | 576 +++++++++++++++++++------------------- ebtorch/optim/__init__.py | 1 + ebtorch/optim/custom.py | 33 +++ setup.py | 2 +- 5 files changed, 324 insertions(+), 289 deletions(-) diff --git a/ebtorch/__init__.py b/ebtorch/__init__.py index 5f79711..e034ec3 100644 --- a/ebtorch/__init__.py +++ b/ebtorch/__init__.py @@ -92,6 +92,7 @@ del ralah_optim del wfneal del tricyc1c +del epochwise_onecycle # Deletions (from .logging) del AverageMeter diff --git a/ebtorch/data/datasets.py b/ebtorch/data/datasets.py index 8aae4bb..e9b3578 100644 --- a/ebtorch/data/datasets.py +++ b/ebtorch/data/datasets.py @@ -1,288 +1,288 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Emanuele Ballarin -# Released under the terms of the MIT License -# (see: https://url.ballarin.cc/mitlicense) -# -# ------------------------------------------------------------------------------ -import os -from typing import Optional -from typing import Tuple - -from torch.utils.data import DataLoader -from torchvision.datasets import CIFAR10 -from torchvision.datasets import CIFAR100 -from torchvision.datasets import DatasetFolder -from torchvision.datasets import FashionMNIST -from torchvision.datasets import ImageFolder -from torchvision.datasets import MNIST -from torchvision.transforms import CenterCrop -from torchvision.transforms import Compose -from torchvision.transforms import RandomHorizontalFlip -from torchvision.transforms import RandomResizedCrop -from torchvision.transforms import Resize -from torchvision.transforms import ToTensor - - -data_root_literal: str = "../datasets/" -cuda_args_true: dict = {"num_workers": 8, "pin_memory": True} - - -def _dataloader_dispatcher( - dataset: str, - data_root: str = data_root_literal, - batch_size_train: Optional[int] = None, - batch_size_test: Optional[int] = None, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - if dataset == "mnist": - dataset_fx = MNIST - if batch_size_train is None: - batch_size_train: int = 256 - if batch_size_test is None: - batch_size_test: int = 512 - - elif dataset == "fashionmnist": - dataset_fx = FashionMNIST - if batch_size_train is None: - batch_size_train: int = 256 - if batch_size_test is None: - batch_size_test: int = 512 - - elif dataset == "cifar10": - dataset_fx = CIFAR10 - if batch_size_train is None: - batch_size_train: int = 256 - if batch_size_test is None: - batch_size_test: int = 512 - - elif dataset == "cifar100": - dataset_fx = CIFAR100 - if batch_size_train is None: - batch_size_train: int = 256 - if batch_size_test is None: - batch_size_test: int = 512 - - else: - raise ValueError("Dataset not supported... yet!") - - os.makedirs(name=data_root, exist_ok=True) - - transforms = Compose([ToTensor()]) - - # Address dictionary mutability as default argument - if dataset_kwargs is None: - dataset_kwargs: dict = {} - if dataloader_kwargs is None: - dataloader_kwargs: dict = {} - - trainset = dataset_fx( - root=data_root, - train=True, - transform=transforms, - download=True, - **dataset_kwargs, - ) - testset = dataset_fx( - root=data_root, - train=False, - transform=transforms, - download=True, - **dataset_kwargs, - ) - - cuda_args: dict = {} - if cuda_accel: - cuda_args: dict = cuda_args_true - - trainloader = DataLoader( - dataset=trainset, - batch_size=batch_size_train, - shuffle=(not unshuffle_train), - **cuda_args, - **dataloader_kwargs, - ) - testloader = DataLoader( - dataset=testset, - batch_size=batch_size_test, - shuffle=shuffle_test, - **cuda_args, - **dataloader_kwargs, - ) - test_on_train_loader = DataLoader( - dataset=trainset, - batch_size=batch_size_test, - shuffle=shuffle_test, - **cuda_args, - **dataloader_kwargs, - ) - - return trainloader, testloader, test_on_train_loader - - -def mnist_dataloader_dispatcher( - data_root: str = data_root_literal, - batch_size_train: Optional[int] = None, - batch_size_test: Optional[int] = None, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - return _dataloader_dispatcher( - dataset="mnist", - data_root=data_root, - batch_size_train=batch_size_train, - batch_size_test=batch_size_test, - cuda_accel=cuda_accel, - unshuffle_train=unshuffle_train, - shuffle_test=shuffle_test, - dataset_kwargs=dataset_kwargs, - dataloader_kwargs=dataloader_kwargs, - ) - - -def fashionmnist_dataloader_dispatcher( - data_root: str = data_root_literal, - batch_size_train: Optional[int] = None, - batch_size_test: Optional[int] = None, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - return _dataloader_dispatcher( - dataset="fashionmnist", - data_root=data_root, - batch_size_train=batch_size_train, - batch_size_test=batch_size_test, - cuda_accel=cuda_accel, - unshuffle_train=unshuffle_train, - shuffle_test=shuffle_test, - dataset_kwargs=dataset_kwargs, - dataloader_kwargs=dataloader_kwargs, - ) - - -def cifarten_dataloader_dispatcher( - data_root: str = data_root_literal, - batch_size_train: Optional[int] = None, - batch_size_test: Optional[int] = None, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - return _dataloader_dispatcher( - dataset="cifar10", - data_root=data_root, - batch_size_train=batch_size_train, - batch_size_test=batch_size_test, - cuda_accel=cuda_accel, - unshuffle_train=unshuffle_train, - shuffle_test=shuffle_test, - dataset_kwargs=dataset_kwargs, - dataloader_kwargs=dataloader_kwargs, - ) - - -def cifarhundred_dataloader_dispatcher( - data_root: str = data_root_literal, - batch_size_train: Optional[int] = None, - batch_size_test: Optional[int] = None, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - return _dataloader_dispatcher( - dataset="cifar100", - data_root=data_root, - batch_size_train=batch_size_train, - batch_size_test=batch_size_test, - cuda_accel=cuda_accel, - unshuffle_train=unshuffle_train, - shuffle_test=shuffle_test, - dataset_kwargs=dataset_kwargs, - dataloader_kwargs=dataloader_kwargs, - ) - - -def imagenette_dataloader_dispatcher( - data_root: str = data_root_literal, - batch_size_train: int = 64, - batch_size_test: int = 128, - cuda_accel: bool = False, - unshuffle_train: bool = False, - shuffle_test: bool = False, - dataset_kwargs: Optional[dict] = None, - dataloader_kwargs: Optional[dict] = None, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - if dataset_kwargs is None: - dataset_kwargs: dict = {} - - train_ds: DatasetFolder = ImageFolder( - root=data_root + "imagenette2-320/train", - transform=Compose( - [ - RandomResizedCrop(224), - RandomHorizontalFlip(), - ToTensor(), - ] - ), - **dataset_kwargs, - ) - - test_ds: DatasetFolder = ImageFolder( - root=data_root + "imagenette2-320/val", - transform=Compose( - [ - Resize(256), - CenterCrop(224), - ToTensor(), - ] - ), - **dataset_kwargs, - ) - - if dataloader_kwargs is None: - dataloader_kwargs: dict = {} - - cuda_kwargs: dict = {} - if cuda_accel: - cuda_kwargs: dict = cuda_args_true - - train_dl: DataLoader = DataLoader( - dataset=train_ds, - batch_size=batch_size_train, - shuffle=(not unshuffle_train), - **cuda_kwargs, - **dataloader_kwargs, - ) - - test_dl: DataLoader = DataLoader( - dataset=test_ds, - batch_size=batch_size_test, - shuffle=shuffle_test, - **cuda_kwargs, - **dataloader_kwargs, - ) - - tot_dl: DataLoader = DataLoader( - dataset=train_ds, - batch_size=batch_size_test, - shuffle=shuffle_test, - **cuda_kwargs, - **dataloader_kwargs, - ) - - return train_dl, test_dl, tot_dl +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Emanuele Ballarin +# Released under the terms of the MIT License +# (see: https://url.ballarin.cc/mitlicense) +# +# ------------------------------------------------------------------------------ +import os +from typing import Optional +from typing import Tuple + +from torch.utils.data import DataLoader +from torchvision.datasets import CIFAR10 +from torchvision.datasets import CIFAR100 +from torchvision.datasets import DatasetFolder +from torchvision.datasets import FashionMNIST +from torchvision.datasets import ImageFolder +from torchvision.datasets import MNIST +from torchvision.transforms import CenterCrop +from torchvision.transforms import Compose +from torchvision.transforms import RandomHorizontalFlip +from torchvision.transforms import RandomResizedCrop +from torchvision.transforms import Resize +from torchvision.transforms import ToTensor + + +data_root_literal: str = "../datasets/" +cuda_args_true: dict = {"num_workers": 8, "pin_memory": True} + + +def _dataloader_dispatcher( + dataset: str, + data_root: str = data_root_literal, + batch_size_train: Optional[int] = None, + batch_size_test: Optional[int] = None, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + if dataset == "mnist": + dataset_fx = MNIST + if batch_size_train is None: + batch_size_train: int = 256 + if batch_size_test is None: + batch_size_test: int = 512 + + elif dataset == "fashionmnist": + dataset_fx = FashionMNIST + if batch_size_train is None: + batch_size_train: int = 256 + if batch_size_test is None: + batch_size_test: int = 512 + + elif dataset == "cifar10": + dataset_fx = CIFAR10 + if batch_size_train is None: + batch_size_train: int = 256 + if batch_size_test is None: + batch_size_test: int = 512 + + elif dataset == "cifar100": + dataset_fx = CIFAR100 + if batch_size_train is None: + batch_size_train: int = 256 + if batch_size_test is None: + batch_size_test: int = 512 + + else: + raise ValueError("Dataset not supported... yet!") + + os.makedirs(name=data_root, exist_ok=True) + + transforms = Compose([ToTensor()]) + + # Address dictionary mutability as default argument + if dataset_kwargs is None: + dataset_kwargs: dict = {} + if dataloader_kwargs is None: + dataloader_kwargs: dict = {} + + trainset = dataset_fx( + root=data_root, + train=True, + transform=transforms, + download=True, + **dataset_kwargs, + ) + testset = dataset_fx( + root=data_root, + train=False, + transform=transforms, + download=True, + **dataset_kwargs, + ) + + cuda_args: dict = {} + if cuda_accel: + cuda_args: dict = cuda_args_true + + trainloader = DataLoader( + dataset=trainset, + batch_size=batch_size_train, + shuffle=(not unshuffle_train), + **cuda_args, + **dataloader_kwargs, + ) + testloader = DataLoader( + dataset=testset, + batch_size=batch_size_test, + shuffle=shuffle_test, + **cuda_args, + **dataloader_kwargs, + ) + test_on_train_loader = DataLoader( + dataset=trainset, + batch_size=batch_size_test, + shuffle=shuffle_test, + **cuda_args, + **dataloader_kwargs, + ) + + return trainloader, testloader, test_on_train_loader + + +def mnist_dataloader_dispatcher( + data_root: str = data_root_literal, + batch_size_train: Optional[int] = None, + batch_size_test: Optional[int] = None, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + return _dataloader_dispatcher( + dataset="mnist", + data_root=data_root, + batch_size_train=batch_size_train, + batch_size_test=batch_size_test, + cuda_accel=cuda_accel, + unshuffle_train=unshuffle_train, + shuffle_test=shuffle_test, + dataset_kwargs=dataset_kwargs, + dataloader_kwargs=dataloader_kwargs, + ) + + +def fashionmnist_dataloader_dispatcher( + data_root: str = data_root_literal, + batch_size_train: Optional[int] = None, + batch_size_test: Optional[int] = None, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + return _dataloader_dispatcher( + dataset="fashionmnist", + data_root=data_root, + batch_size_train=batch_size_train, + batch_size_test=batch_size_test, + cuda_accel=cuda_accel, + unshuffle_train=unshuffle_train, + shuffle_test=shuffle_test, + dataset_kwargs=dataset_kwargs, + dataloader_kwargs=dataloader_kwargs, + ) + + +def cifarten_dataloader_dispatcher( + data_root: str = data_root_literal, + batch_size_train: Optional[int] = None, + batch_size_test: Optional[int] = None, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + return _dataloader_dispatcher( + dataset="cifar10", + data_root=data_root, + batch_size_train=batch_size_train, + batch_size_test=batch_size_test, + cuda_accel=cuda_accel, + unshuffle_train=unshuffle_train, + shuffle_test=shuffle_test, + dataset_kwargs=dataset_kwargs, + dataloader_kwargs=dataloader_kwargs, + ) + + +def cifarhundred_dataloader_dispatcher( + data_root: str = data_root_literal, + batch_size_train: Optional[int] = None, + batch_size_test: Optional[int] = None, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + return _dataloader_dispatcher( + dataset="cifar100", + data_root=data_root, + batch_size_train=batch_size_train, + batch_size_test=batch_size_test, + cuda_accel=cuda_accel, + unshuffle_train=unshuffle_train, + shuffle_test=shuffle_test, + dataset_kwargs=dataset_kwargs, + dataloader_kwargs=dataloader_kwargs, + ) + + +def imagenette_dataloader_dispatcher( + data_root: str = data_root_literal, + batch_size_train: int = 64, + batch_size_test: int = 128, + cuda_accel: bool = False, + unshuffle_train: bool = False, + shuffle_test: bool = False, + dataset_kwargs: Optional[dict] = None, + dataloader_kwargs: Optional[dict] = None, +) -> Tuple[DataLoader, DataLoader, DataLoader]: + if dataset_kwargs is None: + dataset_kwargs: dict = {} + + train_ds: DatasetFolder = ImageFolder( + root=data_root + "imagenette2-320/train", + transform=Compose( + [ + RandomResizedCrop(224), + RandomHorizontalFlip(), + ToTensor(), + ] + ), + **dataset_kwargs, + ) + + test_ds: DatasetFolder = ImageFolder( + root=data_root + "imagenette2-320/val", + transform=Compose( + [ + Resize(256), + CenterCrop(224), + ToTensor(), + ] + ), + **dataset_kwargs, + ) + + if dataloader_kwargs is None: + dataloader_kwargs: dict = {} + + cuda_kwargs: dict = {} + if cuda_accel: + cuda_kwargs: dict = cuda_args_true + + train_dl: DataLoader = DataLoader( + dataset=train_ds, + batch_size=batch_size_train, + shuffle=(not unshuffle_train), + **cuda_kwargs, + **dataloader_kwargs, + ) + + test_dl: DataLoader = DataLoader( + dataset=test_ds, + batch_size=batch_size_test, + shuffle=shuffle_test, + **cuda_kwargs, + **dataloader_kwargs, + ) + + tot_dl: DataLoader = DataLoader( + dataset=train_ds, + batch_size=batch_size_test, + shuffle=shuffle_test, + **cuda_kwargs, + **dataloader_kwargs, + ) + + return train_dl, test_dl, tot_dl diff --git a/ebtorch/optim/__init__.py b/ebtorch/optim/__init__.py index 91c676d..68d2e7c 100644 --- a/ebtorch/optim/__init__.py +++ b/ebtorch/optim/__init__.py @@ -25,6 +25,7 @@ from .adahessian import AdaHessian from .adan import Adan from .autowu import AutoWU +from .custom import epochwise_onecycle from .custom import ralah_optim from .custom import tricyc1c from .custom import wfneal diff --git a/ebtorch/optim/custom.py b/ebtorch/optim/custom.py index 035ea5e..d961158 100644 --- a/ebtorch/optim/custom.py +++ b/ebtorch/optim/custom.py @@ -148,3 +148,36 @@ def tricyc1c( # Return return optim, sched + + +def epochwise_onecycle( + optim: torch.optim.Optimizer, + init_lr: float, + max_lr: float, + final_lr: float, + up_frac: float, + total_steps: int, +) -> Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LRScheduler]: + """Epochwise OneCycleLR learning rate scheduler.""" + + # Prepare optim + for grp in optim.param_groups: + grp["lr"] = init_lr + + sched = torch.optim.lr_scheduler.OneCycleLR( + optimizer=optim, + max_lr=max_lr, + total_steps=total_steps, + epochs=total_steps, + steps_per_epoch=1, + pct_start=up_frac, + anneal_strategy="cos", + cycle_momentum=False, + div_factor=max_lr / init_lr, + final_div_factor=init_lr / final_lr, + three_phase=False, + verbose=False, + ) + + # Return + return optim, sched diff --git a/setup.py b/setup.py index 6834d03..7126bd0 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def check_dependencies(dependencies: list[str]): setup( name=PACKAGENAME, - version="0.10.2", + version="0.11.0", author="Emanuele Ballarin", author_email="emanuele@ballarin.cc", url="https://github.com/emaballarin/ebtorch",