-
Notifications
You must be signed in to change notification settings - Fork 2
/
experiments.conf
144 lines (118 loc) · 2.85 KB
/
experiments.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
basic {
data_dir = /xxx/data_dir # Edit this
dataset_dir = ${basic.data_dir}/dataset
log_root = ${basic.data_dir}
use_amp = true
}
#*************** Data-specific config ***************
data_full = ${basic} {
dataset_name = full_seed100
only_title = false
emb_similarity = 0.95
similarity_layer = -2
num_epochs = 3
report_frequency = 100
eval_frequency = 400
start_eval_after_ratio = 0
}
#*************** Transformer encoder (or encoder-decoder) config ***************
segment_encoder {
max_seq_len = 80
}
bert_large = ${segment_encoder} {
model_type = bert
pretrained = bert-large-uncased
tokenizer = bert-large-uncased
}
#*************** Model config ***************
model {
# Only for tagging-based
do_tag = false
opentag = false
# Steps
batch_size = 128
eval_batch_size = 128
gradient_accumulation_steps = 1
warmup_ratio = 0.01
# Optimizer
bert_learning_rate = 2e-5
task_learning_rate = 1e-4
bert_wd = 1e-2
task_wd = 0
adam_eps = 1e-8
max_grad_norm = 1 # Set 0 to disable clipping
freeze_layers = -4 # to freeze: > 0 or < 0
# Seed expansion
expand_by_sim = false
# Model hyperparameters
dropout_rate = 0.1
max_itr = 1
num_views = 1
baseline_rpl = false
baseline_triplet_coef = false
baseline_triplet_channels = 8
baseline_triplet_margin = 1
contrastive_temp = 0.1
attr_cls_coef = false
attr_cls_th = 5
# Regularization
vi_coef = false
vi_diverse = false
vi_only_ngram = true
vi_asin_bow = false # If true, use bow; false: use CLS
vi_no_kl = false
num_topics = 200
vi_sampling = 4
usp_bullet_coef = false
usp_bullet_temp = 1
# Gradual clustering
cluster_sim_relax = 0.8
dbscan_eps = 0.05
dbscan_min_samples = 4
# Baseline clustering
baseline_clustering = false
}
#*************** RE experiment config ***************
exp_full_bert_large = ${data_full} ${model} ${bert_large} {
}
exp_full_bert_large_v4 = ${exp_full_bert_large} {
num_topics = 50
vi_coef = 0.02
usp_bullet_coef = 0.01
}
exp_full_bert_large_bs_v6 = ${exp_full_bert_large_bs} {
attr_cls_coef = 1
baseline_triplet_margin = 1
baseline_triplet_coef = 2
}
exp_full_bert_large_tag = ${exp_full_bert_large} {
do_tag = true
use_crf = false
num_epochs = 1
start_eval_after_ratio = 10
}
exp_full_bert_large_tag_v3 = ${exp_full_bert_large_tag} {
use_crf = true
}
exp_full_bert_large_opentag = ${exp_full_bert_large} {
do_tag = true
opentag = true
use_crf = true
num_epochs = 1
batch_size = 96
eval_batch_size = 64
start_eval_after_ratio = 10
}
exp_full_chunk_bert_large = ${exp_full_bert_large} {
dataset_name = full_seed100chunk
}
exp_chunk_no_rpl_gradual = ${exp_full_chunk_bert_large} {
dbscan_eps = 0.1
}
exp_no_rpl_baseline = ${exp_full_bert_large} {
baseline_clustering = true
dbscan_eps = 0.1
}
exp_no_rpl_gradual = ${exp_full_bert_large} {
dbscan_eps = 0.1
}