forked from deepspeedai/DeepSpeedExamples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbert_large.json
More file actions
59 lines (59 loc) · 1.77 KB
/
bert_large.json
File metadata and controls
59 lines (59 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
{
"name": "bing_bert_large_seq",
"bert_token_file": "bert-large-uncased",
"bert_model_file": "bert-large-uncased",
"bert_model_config": {
"vocab_size_or_config_json_file": 119547,
"hidden_size": 1024,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"intermediate_size": 4096,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_position_embeddings": 512,
"type_vocab_size": 2,
"initializer_range": 0.02
},
"data": {
"flags": {
"pretrain_dataset": true,
"pretrain_type": "wiki_bc"
},
"mixed_seq_datasets": {
"128": {
"wiki_pretrain_dataset": "bnorick_format/128/wiki_pretrain",
"bc_pretrain_dataset": "bnorick_format/128/bookcorpus_pretrain"
},
"512": {
"wiki_pretrain_dataset": "bnorick_format/512/wiki_pretrain",
"bc_pretrain_dataset": "bnorick_format/512/bookcorpus_pretrain"
}
}
},
"mixed_seq_training": {
"128": {
"num_epochs": 500,
"warmup_proportion": 0.1,
"learning_rate": 4e-4,
"num_workers": 0,
"async_worker": true,
"decay_rate": 0.99,
"decay_step": 520,
"total_training_steps": 125000
},
"512": {
"num_epochs": 160,
"warmup_proportion": 0.02,
"learning_rate": 1e-5,
"num_workers": 0,
"async_worker": true,
"decay_rate": 0.90,
"decay_step": 150,
"total_training_steps": 7500
}
},
"validation": {
"path": "validation_set/"
}
}