-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
jbloom-md
committed
Dec 10, 2023
1 parent
3843c39
commit ce73042
Showing
11 changed files
with
233 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
''' | ||
Took the LR scheduler from my previous work: https://github.com/jbloomAus/DecisionTransformerInterpretability/blob/ee55df35cdb92e81d689c72fb9dd5a7252893363/src/decision_transformer/utils.py#L425 | ||
''' | ||
import math | ||
from typing import Optional | ||
|
||
import torch.optim as optim | ||
import torch.optim.lr_scheduler as lr_scheduler | ||
|
||
|
||
# None | ||
# Linear Warmup and decay | ||
# Cosine Annealing with Warmup | ||
# Cosine Annealing with Warmup / Restarts | ||
def get_scheduler( | ||
scheduler_name: Optional[str], optimizer: optim.Optimizer, **kwargs | ||
): | ||
""" | ||
Loosely based on this, seemed simpler write this than import | ||
transformers: https://huggingface.co/docs/transformers/main_classes/optimizer_schedules | ||
Args: | ||
scheduler_name (Optional[str]): Name of the scheduler to use. If None, returns a constant scheduler | ||
optimizer (optim.Optimizer): Optimizer to use | ||
**kwargs: Additional arguments to pass to the scheduler including warm_up_steps, | ||
training_steps, num_cycles, lr_end. | ||
""" | ||
|
||
def get_warmup_lambda(warm_up_steps, training_steps): | ||
def lr_lambda(steps): | ||
if steps < warm_up_steps: | ||
return (steps + 1) / warm_up_steps | ||
else: | ||
return (training_steps - steps) / ( | ||
training_steps - warm_up_steps | ||
) | ||
|
||
return lr_lambda | ||
|
||
# heavily derived from hugging face although copilot helped. | ||
def get_warmup_cosine_lambda(warm_up_steps, training_steps, lr_end): | ||
def lr_lambda(steps): | ||
if steps < warm_up_steps: | ||
return (steps + 1) / warm_up_steps | ||
else: | ||
progress = (steps - warm_up_steps) / ( | ||
training_steps - warm_up_steps | ||
) | ||
return lr_end + 0.5 * (1 - lr_end) * ( | ||
1 + math.cos(math.pi * progress) | ||
) | ||
|
||
return lr_lambda | ||
|
||
if scheduler_name is None or scheduler_name.lower() == "constant": | ||
return lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda steps: 1.0) | ||
elif scheduler_name.lower() == "constantwithwarmup": | ||
warm_up_steps = kwargs.get("warm_up_steps", 0) | ||
return lr_scheduler.LambdaLR( | ||
optimizer, | ||
lr_lambda=lambda steps: min(1.0, (steps + 1) / warm_up_steps), | ||
) | ||
elif scheduler_name.lower() == "linearwarmupdecay": | ||
warm_up_steps = kwargs.get("warm_up_steps", 0) | ||
training_steps = kwargs.get("training_steps") | ||
lr_lambda = get_warmup_lambda(warm_up_steps, training_steps) | ||
return lr_scheduler.LambdaLR(optimizer, lr_lambda) | ||
elif scheduler_name.lower() == "cosineannealing": | ||
training_steps = kwargs.get("training_steps") | ||
eta_min = kwargs.get("lr_end", 0) | ||
return lr_scheduler.CosineAnnealingLR( | ||
optimizer, T_max=training_steps, eta_min=eta_min | ||
) | ||
elif scheduler_name.lower() == "cosineannealingwarmup": | ||
warm_up_steps = kwargs.get("warm_up_steps", 0) | ||
training_steps = kwargs.get("training_steps") | ||
eta_min = kwargs.get("lr_end", 0) | ||
lr_lambda = get_warmup_cosine_lambda( | ||
warm_up_steps, training_steps, eta_min | ||
) | ||
return lr_scheduler.LambdaLR(optimizer, lr_lambda) | ||
elif scheduler_name.lower() == "cosineannealingwarmrestarts": | ||
training_steps = kwargs.get("training_steps") | ||
eta_min = kwargs.get("lr_end", 0) | ||
num_cycles = kwargs.get("num_cycles", 1) | ||
T_0 = training_steps // num_cycles | ||
return lr_scheduler.CosineAnnealingWarmRestarts( | ||
optimizer, T_0=T_0, eta_min=eta_min | ||
) | ||
else: | ||
raise ValueError(f"Unsupported scheduler: {scheduler_name}") |
Oops, something went wrong.