-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathtrain.nezha_base_lr0.7e-4_ce_v2.json
executable file
·84 lines (83 loc) · 2.65 KB
/
train.nezha_base_lr0.7e-4_ce_v2.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*
线上:0.52170734544
loss = 2.575510252846612
09/01/2021 20:33:04 - INFO - training.Trainer - macro avg__f1-score__level_2 = 0.5323257686923674
09/01/2021 20:33:04 - INFO - training.Trainer - macro avg__precision__level_2 = 0.5433092793565473
09/01/2021 20:33:04 - INFO - training.Trainer - macro avg__recall__level_2 = 0.5547449039221721
09/01/2021 20:33:04 - INFO - training.Trainer - macro avg__support__level_2 = 2821
09/01/2021 20:33:04 - INFO - training.Trainer - weighted avg__f1-score__level_2 = 0.6697822619742119
09/01/2021 20:33:04 - INFO - training.Trainer - weighted avg__precision__level_2 = 0.6897326029522685
09/01/2021 20:33:04 - INFO - training.Trainer - weighted avg__recall__level_2 = 0.6618220489188231
09/01/2021 20:33:04 - INFO - training.Trainer - weighted avg__support__level_2 = 2821
*/
{
"gpu": {
"use": true,
"id": "2",
"local_rank": -1,
"data_parallel": false
},
"general": {
"do_train": true,
"do_eval": true,
"fp16": true,
"fp16_opt_level": "O1",
"exp_name": "train.nezha_base_lr0.7e-4_ce_v2",
"task": "daguan",
"model_type": "nezha_base",
"model_encoder_type": "nezha",
"vocab_mapping": true,
"main_data_dir": "/data2/code/DaguanFengxian/bert_model/data/",
"data_dir": "splits/fold_0_nezha_base_vocab",
"label_file_level_1": "labels_level_1.txt",
"label_file_level_2": "labels_level_2.txt",
"label2freq_level_1": "label2freq_level_1.json",
"label2freq_level_2": "label2freq_level_2.json",
"encoder_name_or_path": "/data2/pre-model/nezha/NEZHA-Base-WWM"
},
"params": {
"seed": 1234,
"max_time": 24,
"metric_key_for_early_stop": "macro avg__f1-score__level_2",
"logging_steps": 200,
"save_steps": 200,
"max_steps": -1,
"patience": 12,
"print_epoch_interval": 5,
"lr": {
"lr": 5e-5,
"linear_lr": 5e-4,
"encoder_learning_rate": 0.5e-4,
"min_lr": 1e-6,
"embeddings_learning_rate": 0.5e-4
},
"schedule": {
"weight_decay": 0.0,
"warmup_steps": 200,
"max_grad_norm": 1.0,
"num_train_epochs": 50,
"per_gpu_train_batch_size": 32,
"gradient_accumulation_steps": 1,
"eval_batch_size": 64
},
"loss": {
"loss_fct_name": "ce",
"use_focal_loss": true,
"focal_loss_gamma": 2.0,
"use_class_weights": true,
"use_weighted_sampler": false
}
},
"net_params": {
"hidden_dim": 768,
"use_lstm": true,
"aggregator": "bert_pooler,dr_pooler,slf_attn_pooler",
"dropout": 0.15,
"use_fgm": false,
"use_pgd": false
},
"encoder_params": {
"max_seq_len": 133,
"do_lower_case": true
}
}