-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathaudio_talking_heads.json
177 lines (167 loc) · 7.59 KB
/
audio_talking_heads.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
{
"name": "TalkingHeadDataset", // experiments name
"gpu_ids": [0,1,2,3], // gpu ids list, default is single 0
"seed" : -1, // random seed, seed <0 represents randomization not used
"finetune_norm": false, // find the parameters to optimize
"path": { //set every part file path
"base_dir": "experiments", // base path for all log except resume_state
"code": "code", // code backup
"tb_logger": "tb_logger", // path of tensorboard logger
"results": "results",
"checkpoint": "checkpoint",
"resume_state": "weights/multi-speaker_checkpoint/735"
//"resume_state": null
},
"datasets": { // train or test
"train": {
"which_dataset": { // import designated dataset using arguments
"name":["data.dataset", "VideoEditing"], // import Dataset() class / function(not recommend) from data.dataset.py (default is [data.dataset.py])
"args":{ // arguments to initialize dataset
"img_dataset_root": "datasets/CREMAD/Cropped_Frames_Train",
"audio_path": "datasets/CREMAD/Audio_16Khz_Numpy",
"mask_mode": "face_mask_square"
}
},
"dataloader":{
"validation_split": 2, // percent or number
"args":{ // arguments to initialize train_dataloader
"batch_size": 40, // batch size in each gpu
"num_workers": 40,
"shuffle": true,
"pin_memory": true,
"drop_last": true
},
"val_args":{ // arguments to initialize valid_dataloader, will overwrite the parameters in train_dataloader
"batch_size": 2, // batch size in each gpu
"num_workers": 0,
"shuffle": false,
"pin_memory": true,
"drop_last": false
}
}
},
"test": {
"which_dataset": {
"name": "VideoEditing", // import Dataset() class / function(not recommend) from default file
"args":{
"img_data_root": "datasets/CREMAD/Cropped_Frames_Train",
"audio_data_root": "datasets/CREMAD/Audio_16Khz_Numpy",
"mask_mode": "face_mask_square"
}
},
"dataloader":{
"args":{
"batch_size": 1,
"num_workers": 0,
"pin_memory": true
}
}
},
"infer": {
"which_dataset": { // import designated dataset using arguments
"name": ["data.dataset", "VideoEditing_Inference"], // import Dataset() class / function(not recommend) from data.dataset.py (default is [data.dataset.py])
"args":{ // arguments to initialize dataset
"img_dataset_root": "datasets/CREMAD/Cropped_Frames_Test/15",
"audio_path": "datasets/CREMAD/Audio_16Khz_Numpy",
"mask_mode": "face_mask_square"
}
},
"dataloader":{
"args":{
"batch_size": 1,
"num_workers": 0,
"pin_memory": true
}
}
}
},
"model": { // networks/metrics/losses/optimizers/lr_schedulers is a list and model is a dict
"which_model": { // import designated model(trainer) using arguments
"name": ["models.model", "Palette"], // import Model() class / function(not recommend) from models.model.py (default is [models.model.py])
"args": {
"sample_num": 8, // process of each image
"task": "inpainting",
"ema_scheduler": {
"ema_start": 1,
"ema_iter": 1,
"ema_decay": 0.9999
},
"optimizers": [
{ "lr": 5e-5, "weight_decay": 0}
]
}
},
"which_networks": [ // import designated list of networks using arguments
{
"name": ["models.network", "Network"], // import Network() class / function(not recommend) from default file (default is [models/network.py])
"args": { // arguments to initialize network
"init_type": "kaiming", // method can be [normal | xavier| xavier_uniform | kaiming | orthogonal], default is kaiming
"module_name": "guided_diffusion", // sr3 | guided_diffusion
"unet": {
"in_channel": 9,
"out_channel": 3,
"inner_channel": 64,
"use_audio_emb": "True",
"channel_mults": [
1,
2,
3
],
"attn_res": [
32
//16
// 8
],
"num_head_channels": 64,
"res_blocks": 2,
"dropout": 0.1,
"image_size": 128
},
"beta_schedule": {
"train": {
"schedule": "cosine",
"n_timestep": 1000,
// "n_timestep": 10, // debug
"linear_start": 0.0015,
"linear_end": 0.0195
},
"test": {
"schedule": "cosine",
"n_timestep": 1000,
// "n_timestep": 10, // debug
"linear_start": 0.0015,
"linear_end": 0.0195
},
"infer": {
"schedule": "cosine",
"n_timestep": 100,
// "n_timestep": 10, // debug
"linear_start": 0.0015,
"linear_end": 0.0195
}
}
}
}
],
"which_losses": [ // import designated list of losses without arguments
"mse_loss" // import mse_loss() function/class from default file (default is [models/losses.py]), equivalent to { "name": "mse_loss", "args":{}}
],
"which_metrics": [ // import designated list of metrics without arguments
"mae" // import mae() function/class from default file (default is [models/metrics.py]), equivalent to { "name": "mae", "args":{}}
]
},
"train": { // arguments for basic training
"n_epoch": 1e8, // max epochs, not limited now
"n_iter": 1e8, // max interations
"val_epoch": 5, // valdation every specified number of epochs
"save_checkpoint_epoch": 5,
"log_iter": 1e3, // log every specified number of iterations
"tensorboard" : false // tensorboardX enable
},
"debug": { // arguments in debug mode, which will replace arguments in train
"val_epoch": 1,
"save_checkpoint_epoch": 1,
"log_iter": 2,
"debug_split": 50 // percent or number, change the size of dataloder to debug_split.
}
}