-
Notifications
You must be signed in to change notification settings - Fork 155
/
train.py
82 lines (64 loc) · 2.79 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
from argparse import ArgumentParser
import torch
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger
from data import CIFAR10Data
from module import CIFAR10Module
def main(args):
if bool(args.download_weights):
CIFAR10Data.download_weights()
else:
seed_everything(0)
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
if args.logger == "wandb":
logger = WandbLogger(name=args.classifier, project="cifar10")
elif args.logger == "tensorboard":
logger = TensorBoardLogger("cifar10", name=args.classifier)
checkpoint = ModelCheckpoint(monitor="acc/val", mode="max", save_last=False)
trainer = Trainer(
fast_dev_run=bool(args.dev),
logger=logger if not bool(args.dev + args.test_phase) else None,
gpus=-1,
deterministic=True,
weights_summary=None,
log_every_n_steps=1,
max_epochs=args.max_epochs,
checkpoint_callback=checkpoint,
precision=args.precision,
)
model = CIFAR10Module(args)
data = CIFAR10Data(args)
if bool(args.pretrained):
state_dict = os.path.join(
"cifar10_models", "state_dicts", args.classifier + ".pt"
)
model.model.load_state_dict(torch.load(state_dict))
if bool(args.test_phase):
trainer.test(model, data.test_dataloader())
else:
trainer.fit(model, data)
trainer.test()
if __name__ == "__main__":
parser = ArgumentParser()
# PROGRAM level args
parser.add_argument("--data_dir", type=str, default="/data/huy/cifar10")
parser.add_argument("--download_weights", type=int, default=0, choices=[0, 1])
parser.add_argument("--test_phase", type=int, default=0, choices=[0, 1])
parser.add_argument("--dev", type=int, default=0, choices=[0, 1])
parser.add_argument(
"--logger", type=str, default="tensorboard", choices=["tensorboard", "wandb"]
)
# TRAINER args
parser.add_argument("--classifier", type=str, default="resnet18")
parser.add_argument("--pretrained", type=int, default=0, choices=[0, 1])
parser.add_argument("--precision", type=int, default=32, choices=[16, 32])
parser.add_argument("--batch_size", type=int, default=256)
parser.add_argument("--max_epochs", type=int, default=100)
parser.add_argument("--num_workers", type=int, default=8)
parser.add_argument("--gpu_id", type=str, default="3")
parser.add_argument("--learning_rate", type=float, default=1e-2)
parser.add_argument("--weight_decay", type=float, default=1e-2)
args = parser.parse_args()
main(args)