Skip to content

Commit

Permalink
Check out to native AdamW
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhangcs committed Jun 27, 2023
1 parent 854c7ba commit 21afbee
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions supar/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import torch.distributed as dist
import torch.nn as nn
from torch.cuda.amp import GradScaler
from torch.optim import Adam, Optimizer
from torch.optim import Adam, AdamW, Optimizer
from torch.optim.lr_scheduler import ExponentialLR, _LRScheduler

import supar
Expand Down Expand Up @@ -501,8 +501,6 @@ def init_optimizer(self) -> Optimizer:
eps=self.args.get('eps', 1e-8),
weight_decay=self.args.get('weight_decay', 0))
else:
# we found that Huggingface's AdamW is more robust and empirically better than the native implementation
from transformers import AdamW
optimizer = AdamW(params=[{'params': p, 'lr': self.args.lr * (1 if n.startswith('encoder') else self.args.lr_rate)}
for n, p in self.model.named_parameters()],
lr=self.args.lr,
Expand Down

0 comments on commit 21afbee

Please sign in to comment.