diff --git a/examples/alpaca/scripts/run_alpaca.sh b/examples/alpaca/scripts/run_alpaca.sh index fa42bb3..61bc512 100644 --- a/examples/alpaca/scripts/run_alpaca.sh +++ b/examples/alpaca/scripts/run_alpaca.sh @@ -8,28 +8,13 @@ python train_alpaca.py \ --gradient_accumulation_steps 8 \ --evaluation_strategy "no" \ --save_strategy "steps" \ - --save_steps 2000 \ + --save_steps 500 \ --save_total_limit 5 \ --learning_rate 2e-5 \ --weight_decay 0. \ + --warmup_steps 50 \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ - --logging_steps 1 - -python train_alpaca_mem.py \ - --model_name_or_path facebook/opt-125m \ - --data_path tatsu-lab/alpaca \ - --output_dir work_dir/ \ - --num_train_epochs 3 \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 8 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 2000 \ - --save_total_limit 5 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 + --logging_steps 1 \ + --gradient_checkpointing True \ + --fp16 True \ No newline at end of file diff --git a/examples/alpaca/scripts/run_alpaca_mem.sh b/examples/alpaca/scripts/run_alpaca_mem.sh new file mode 100644 index 0000000..59fa75b --- /dev/null +++ b/examples/alpaca/scripts/run_alpaca_mem.sh @@ -0,0 +1,17 @@ +python train_alpaca_mem.py \ + --model_name_or_path facebook/opt-125m \ + --data_path tatsu-lab/alpaca \ + --output_dir work_dir/ \ + --num_train_epochs 3 \ + --per_device_train_batch_size 4 \ + --per_device_eval_batch_size 4 \ + --gradient_accumulation_steps 8 \ + --evaluation_strategy "no" \ + --save_strategy "steps" \ + --save_steps 2000 \ + --save_total_limit 5 \ + --learning_rate 2e-5 \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --lr_scheduler_type "cosine" \ + --logging_steps 1