-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathtrain.nezha_large_wwm_v2.json
executable file
·71 lines (71 loc) · 1.81 KB
/
train.nezha_large_wwm_v2.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
"gpu": {
"use": true,
"id": "2",
"local_rank": -1,
"data_parallel": false
},
"general": {
"do_train": true,
"do_eval": true,
"fp16": true,
"fp16_opt_level": "O1",
"exp_name": "train.nezha_large_wwm_v2",
"task": "daguan",
"model_type": "nezha_large",
"model_encoder_type": "nezha",
"vocab_mapping": true,
"main_data_dir": "/data2/code/DaguanFengxian/bert_model/data/",
"data_dir": "splits/fold_0_nezha_base_vocab",
"label_file_level_1": "labels_level_1.txt",
"label_file_level_2": "labels_level_2.txt",
"label2freq_level_1": "label2freq_level_1.json",
"label2freq_level_2": "label2freq_level_2.json",
"encoder_name_or_path": "/data2/pre-model/nezha/NEZHA-Large-WWM"
},
"params": {
"seed": 1234,
"max_time": 24,
"metric_key_for_early_stop": "macro avg__f1-score__level_2",
"logging_steps": 200,
"save_steps": 200,
"max_steps": -1,
"patience": 6,
"print_epoch_interval": 5,
"lr": {
"lr": 5e-5,
"linear_lr": 9e-4,
"encoder_learning_rate": 0.9e-4,
"min_lr": 1e-6,
"embeddings_learning_rate": 0.9e-4
},
"schedule": {
"weight_decay": 0.0,
"warmup_steps": 200,
"max_grad_norm": 1.0,
"num_train_epochs": 50,
"per_gpu_train_batch_size": 64,
"gradient_accumulation_steps": 1,
"eval_batch_size": 2
},
"loss": {
"loss_fct_name": "ce",
"use_focal_loss": true,
"focal_loss_gamma": 2.0,
"use_class_weights": true,
"use_weighted_sampler": false
}
},
"net_params": {
"hidden_dim": 768,
"use_lstm": true,
"aggregator": "bert_pooler,dr_pooler,slf_attn_pooler",
"dropout": 0.15,
"use_fgm": false,
"use_pgd": false
},
"encoder_params": {
"max_seq_len": 132,
"do_lower_case": true
}
}