-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.sh
69 lines (63 loc) · 2.13 KB
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
batch_size=2
question_mask_ratio=0.5
distillation_temp=1.0
compressor_hidden_size=4096
num_compressor_layers=4
num_compressor_encoder_layers=2
pool_window_size=4
cand_pool_window_sizes=(4 6 8 10)
min_num_documents=1 # 1 for NQ and TQA, 2 for HQA
max_num_documents=5
compressor_path=/path-to-llama-2-7B # used to initial compressor parameters
# target LLM is LongChat-13B
lm_model_name=longchat
lm_model_hidden_size=5120
lm_model_path=/path-to-longchat-13B
# # target LLM is LLaMA-2-7B
# lm_model_name=llama
# lm_model_hidden_size=4096
# lm_model_path=/path-to-llama-2-7B
data_path=/path-to-dataset
max_steps=20000
dev_steps=500
test_steps=500
save_steps=1000
logging_steps=100
benchmark_dev_steps=1000
benchmark_test_steps=1000
instruction_name=base # 'base' for NQ, 'short' for TQA and HQA
benchmark_metric=accuracy # NQ: accuracy; TQA: em; HQA: f1
output_dir=/path-to-save
mkdir -p ${output_dir}
accelerate launch --config_file config/bf16.yaml \
src/train.py \
--data_path $data_path \
--compressor_path $compressor_path \
--lm_model_name $lm_model_name \
--lm_model_path $lm_model_path \
--output_dir $output_dir \
--question_mask_ratio $question_mask_ratio \
--instruction_name $instruction_name \
--compressor_hidden_size $compressor_hidden_size \
--lm_model_hidden_size $lm_model_hidden_size \
--num_compressor_layers $num_compressor_layers \
--num_compressor_encoder_layers $num_compressor_encoder_layers \
--random_num_documents \
--max_num_documents $max_num_documents \
--min_num_documents $min_num_documents \
--pool_window_size $pool_window_size \
--train_batch_size $batch_size \
--eval_batch_size $batch_size \
--max_steps $max_steps \
--dev_steps $dev_steps \
--test_steps $test_steps \
--save_steps $save_steps \
--logging_steps $logging_steps \
--do_benchmark \
--benchmark_dev_steps $benchmark_dev_steps \
--benchmark_test_steps $benchmark_test_steps \
--benchmark_metric $benchmark_metric \
--gold_first_for_kd \
--random_pool_window_size \
--cand_pool_window_sizes ${cand_pool_window_sizes[@]} \
| tee ${output_dir}/train.log