|
8 | 8 | # "latent_dim": 32,
|
9 | 9 | # "latent_dim": 48,
|
10 | 10 | # "latent_dim": 64,
|
11 |
| - # "latent_dim": 128, |
12 |
| - "latent_dim": 256, |
13 |
| - #"num_primitives": 4, |
| 11 | + "latent_dim": 128, |
| 12 | + # "latent_dim": 256, |
| 13 | + # "num_primitives": 4, |
14 | 14 | "num_primitives": 6,
|
15 | 15 | # "num_primitives": 8,
|
16 | 16 | # "hidden_dim": 16,
|
17 | 17 | # "hidden_dim": 32,
|
18 | 18 | # "hidden_dim": 48,
|
19 | 19 | # "hidden_dim": 64,
|
20 |
| - # "hidden_dim": 128, |
21 |
| - "hidden_dim": 256, |
| 20 | + "hidden_dim": 128, |
| 21 | + # "hidden_dim": 256, |
22 | 22 | "learn_segmentation": True,
|
23 | 23 | "masking_slope": 1,
|
24 | 24 | # "masking_slope": 0.75,
|
|
27 | 27 | # "kl_weight": 5e-3,
|
28 | 28 | # "kl_weight": 1e-2,
|
29 | 29 | # "kl_weight": 2e-2,
|
30 |
| - "kl_weight": 1e-3, |
| 30 | + # "kl_weight": 5e-2, |
| 31 | + "kl_weight": 1e-1, |
| 32 | + # "kl_weight": 1e-3, |
31 | 33 | # "kl_weight": 1e-4,
|
32 | 34 | "anneal_start": 10,
|
33 | 35 | # "anneal_start": 5,
|
|
41 | 43 | # "cycle_length": 100,
|
42 | 44 | # "cycle_length": 200,
|
43 | 45 | # "durations_weight": 1e-6,
|
44 |
| - #"durations_weight": 1e-4, |
| 46 | + # "durations_weight": 1e-4, |
45 | 47 | "durations_weight": 0,
|
46 | 48 | # "durations_weight": 1e-5,
|
47 | 49 | "lr": 1e-4,
|
|
53 | 55 | "N_val": 40000,
|
54 | 56 | # "N_val": 2,
|
55 | 57 | "sequence_length": 128,
|
56 |
| - #"epochs": 200, |
| 58 | + # "epochs": 200, |
57 | 59 | # "epochs": 250,
|
58 | 60 | # "epochs": 230,
|
59 |
| - # "epochs": 3000, |
| 61 | + # "epochs": 2000, |
60 | 62 | # "epochs": 1000,
|
61 | 63 | # "epochs": 300,
|
62 | 64 | "epochs": 500,
|
|
67 | 69 | # "run_name": "fresh-Transformer",
|
68 | 70 | # "run_name": "smol-Transformer",
|
69 | 71 | # "run_name": "resume-Transformer",
|
70 |
| - #"run_name": "smol-Transformer", |
71 |
| - #"run_name": "notrafo-Transformer", |
| 72 | + # "run_name": "smol-Transformer", |
| 73 | + # "run_name": "notrafo-Transformer", |
72 | 74 | # "run_name": "notrafo-sigmoid-Transformer",
|
73 | 75 | # "run_name": "noanneal-sigmoid-Transformer",
|
74 | 76 | # "run_name": "sigmoid-Transformer",
|
75 | 77 | # "run_name": "midKL-Transformer",
|
76 |
| - "run_name": "lowKL-Transformer", |
| 78 | + # "run_name": "lowKL-Transformer", |
| 79 | + # "run_name": "highKL-Transformer", |
| 80 | + "run_name": "veryhighKL-Transformer", |
| 81 | + # "run_name": "noanneal-highKL-Transformer", |
77 | 82 | # "run_name": "cyclical-Transformer",
|
| 83 | + # "run_name": "cyclical-lowKL-Transformer", |
78 | 84 | # "run_name": "nosigmoid-Transformer",
|
79 | 85 | # "run_name": "nosigmoid-Transformer",
|
80 | 86 | # "run_name": "relu-sigmoid-Transformer",
|
|
0 commit comments