-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathconfig.py
103 lines (96 loc) · 3.27 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import torch
from dataclasses import dataclass
from .conditioner_config import ConditionerConfig
@dataclass
class DataConfig:
dataset_dir = ''
sr = 48000
channels = 2
# min_duration, max_duration, and sample_duration are all listed in seconds.
min_duration = 0
max_duration = 300
sample_duration = 10 # This length determines the length of the latent variable. Adjust the length of the latent variable so that it is 2**(num_layers).
aug_shift = True
batch_size = 3
shuffle = True
train_test_split = 0.5
device = 'cuda' if torch.cuda.is_available() else 'cpu'
durations_path = None # if you use preprocess, specify your path
cumsum_path = None # if you use preprocess, specify your path
audio_file_txt_path = None # if you use preprocess, specify your path
@dataclass
class GDM_Config:
steps = 1000 #num timesteps
noise_schedule = 'linear' #noise scheduler
objective = 'noise' # training objectives optional['noise', 'x0', 'v']
loss_type='l2' #loss type
device = 'cuda' if torch.cuda.is_available() else 'cpu'
cfg_dropout_proba = 0.2
embedding_scale = 0.8
batch_cfg = True
scale_cfg = True
@dataclass
class VDM_Config:
loss_type='l2'
device='cuda' if torch.cuda.is_available() else 'cpu'
cfg_dropout_proba = 0.2
embedding_scale = 0.8
batch_cfg = True
scale_cfg = True
@dataclass
class DiffusionConfig:
gaussian_diffusion = GDM_Config
variational_diffusion = VDM_Config
@dataclass
class ModelConfig:
in_channels = 128 #number of potential embedded channels to be entered
channels = 128
multipliers = [1, 1, 1, 2, 2, 4, 4, 4, 8, 8] # indicates how many times the input channels of each block of UNet are in_channels.
factors = [1, 4, 4, 4, 2, 2, 2, 2, 2] # convolution layer parameters for each block
num_blocks = [1, 3, 3, 3, 3, 3, 3, 3, 1] # number of ResNet Blocks in each block
attentions = [0, 0, 0, 1, 1, 1, 1, 1, 1] # number of Attention layers in each block
patch_size = 1
resnet_groups = 8
use_context_time = True
kernel_multiplier_downsample: int = 2
use_nearest_upsample = False
use_skip_scale = True
use_snake = False
use_stft = False
use_stft_context = False
use_xattn_time = True
out_channels = 128
context_features = None # if you want to use cond['global_cond']
context_features_multiplier = 4 # if you want to use cond['global_cond'] or use_context_time == True
context_channels = [129]
context_embedding_features = 1024
context_embedding_max_length = 128
attention_heads = 8
attention_multiplier = 1
@dataclass
class OptimizerConfig:
lr = 3e-5
beta_1 = 0.9
beta_2 = 0.95
weight_decay = 0.1
grad_clip = 0.7
@dataclass
class Config:
save_dir = ''
log_dir = ''
use_ddp = False
use_fp16 = True
use_ema = False
is_finetuning = False
seed = 4996
tasks = ['text_guided', 'music_inpaint', 'music_cont']
num_epoch = 100
eval_interval = 30
grad_accum_every = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'
diffusion_type = 'gdm'
dataset_config = DataConfig
diffusion_config = DiffusionConfig
model_config = ModelConfig
optimizer_config = OptimizerConfig
conditioner_config = ConditionerConfig