-
Notifications
You must be signed in to change notification settings - Fork 26
/
fast-b.yml
123 lines (119 loc) · 3.52 KB
/
fast-b.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
name: FAST-VQA-B-Refactor-1*4
num_epochs: 30
l_num_epochs: 0
warmup_epochs: 2.5
ema: true
save_model: true
batch_size: 16
num_workers: 6
wandb:
project_name: VQA_Experiments_2022
data:
train:
type: FusionDataset
args:
phase: train
anno_file: ./examplar_data_labels/train_labels.txt
data_prefix: ../datasets/LSVQ
sample_types:
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 32
clip_len: 32
frame_interval: 2
num_clips: 1
val-livevqc:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LIVE_VQC/labels.txt
data_prefix: ../datasets/LIVE_VQC/
sample_types:
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 32
clip_len: 32
frame_interval: 2
num_clips: 4
val-kv1k:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/KoNViD/labels.txt
data_prefix: ../datasets/KoNViD/
sample_types:
#resize:
# size_h: 224
# size_w: 224
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 32
clip_len: 32
frame_interval: 2
num_clips: 4
val-ltest:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_test.txt
data_prefix: ../datasets/LSVQ/
sample_types:
#resize:
# size_h: 224
# size_w: 224
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 32
clip_len: 32
frame_interval: 2
num_clips: 4
val-l1080p:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_1080p.txt
data_prefix: ../datasets/LSVQ/
sample_types:
#resize:
# size_h: 224
# size_w: 224
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 32
clip_len: 32
frame_interval: 2
num_clips: 4
model:
type: DiViDeAddEvaluator
args:
backbone:
fragments:
checkpoint: false
pretrained:
backbone_size: swin_tiny_grpb
backbone_preserve_keys: fragments
divide_head: false
vqa_head:
in_channels: 768
hidden_channels: 64
optimizer:
lr: !!float 1e-3
backbone_lr_mult: !!float 1e-1
wd: 0.05
load_path: ../pretrained/swin_tiny_patch244_window877_kinetics400_1k.pth
test_load_path: ./pretrained_weights/FAST_VQA_B_1*4.pth