-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
127 lines (99 loc) · 3.94 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import argparse
import copy
import os
import torch
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch import nn
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm
from dataset import EvalDataset, TrainDataset
from srcnn import SRCNN
from utils import AverageMeter, calculate_psnr
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--train-file", type=str, required=True, help="training dataset"
)
parser.add_argument(
"--eval-file", type=str, required=True, help="evaluation dataset"
)
parser.add_argument(
"--outputs-dir", type=str, required=True, help="outputs directory"
)
parser.add_argument("--scale", type=int, default=3, help="super resolution scale")
parser.add_argument("--lr", type=float, default=1e-4, help="learning rate")
parser.add_argument("--batch-size", type=int, default=16, help="batch size")
parser.add_argument("--num-epochs", type=int, default=400, help="number of epochs")
parser.add_argument("--num-workers", type=int, default=8, help="number of workers")
parser.add_argument("--seed", type=int, default=123, help="seed")
args = parser.parse_args()
args.outputs_dir = os.path.join(args.outputs_dir, "x{}".format(args.scale))
if not os.path.exists(args.outputs_dir):
os.makedirs(args.outputs_dir)
cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(args.seed)
model = SRCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(
[
{"params": model.conv1.parameters()},
{"params": model.conv2.parameters()},
{"params": model.conv3.parameters(), "lr": args.lr * 0.1},
],
lr=args.lr,
)
train_dataset = TrainDataset(args.train_file)
train_dataloader = DataLoader(
dataset=train_dataset,
batch_size=args.batch_size,
shuffle=True,
num_workers=args.num_workers,
pin_memory=True,
drop_last=True,
)
eval_dataset = EvalDataset(args.eval_file)
eval_dataloader = DataLoader(dataset=eval_dataset, batch_size=1)
best_weights = copy.deepcopy(model.state_dict())
best_epoch = 0
best_psnr = 0.0
for epoch in range(args.num_epochs):
model.train()
epoch_losses = AverageMeter()
with tqdm(
total=(len(train_dataset) - len(train_dataset) % args.batch_size)
) as progress_bar:
progress_bar.set_description(f"epoch: {epoch}/{args.num_epochs - 1}")
for data in train_dataloader:
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
preds = model(inputs)
loss = criterion(preds, labels)
epoch_losses.update(loss.item(), len(inputs))
optimizer.zero_grad()
loss.backward()
optimizer.step()
progress_bar.set_postfix(loss=f"{epoch_losses.avg:.6f}")
progress_bar.update(len(inputs))
torch.save(
model.state_dict(),
os.path.join(args.outputs_dir, "epoch_{epoch}.pth"),
)
model.eval()
epoch_psnr = AverageMeter()
for data in eval_dataloader:
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
with torch.no_grad():
preds = model(inputs).clamp(0.0, 1.0)
epoch_psnr.update(calculate_psnr(preds, labels), len(inputs))
print("eval psnr: f{epoch_psnr.avg:.2f}")
if epoch_psnr.avg > best_psnr:
best_epoch = epoch
best_psnr = epoch_psnr.avg
best_weights = copy.deepcopy(model.state_dict())
print(f"best epoch: {best_epoch}, psnr: {best_psnr:.2f}")
torch.save(best_weights, os.path.join(args.outputs_dir, "best.pth"))