Add training scripts for step2 DPO in DeepSpeed-Chat.

microsoft · tjruwase · Jan 6, 2025 · Dec 8, 2023 · Dec 8, 2023 · Dec 8, 2023
commit 7efa35a2f20b622d68b64f4a6a5357d25f18bd05
@@ -0,0 +1,6 @@
+### 💁For each folder, the bash scripts are examples of "facebook/opt" family.
+
+If you want to change your model such as EleutherAI/gpt-j-6b, you may simply replace
+`` --model_name_or_path facebook/opt-1.3b`` to ``--model_name_or_path EleutherAI/gpt-j-6b ``.
+
+For the models we support, please see [our landing page](./../../../README.md#-supported-models-)
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output_step1_llama2_7b
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path meta-llama/Llama-2-7b-hf \
+   --per_device_train_batch_size 4 \
+   --per_device_eval_batch_size 4 \
+   --max_seq_len 512 \
+   --learning_rate 9.65e-6 \
+   --weight_decay 0. \
+   --num_train_epochs 4  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage $ZERO_STAGE \
+   --deepspeed \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output_step1_llama2_7b_lora
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path meta-llama/Llama-2-7b-hf \
+   --per_device_train_batch_size 4 \
+   --per_device_eval_batch_size 4 \
+   --max_seq_len 512 \
+   --learning_rate 9.65e-6 \
+   --weight_decay 0. \
+   --num_train_epochs 4  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage $ZERO_STAGE \
+   --deepspeed \
+   --lora_dim 128 \
+   --lora_module_name "layers." \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
diff --git a/...s/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_66b.sh b/...s/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/multi_node/run_66b.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-66b \
+   --per_device_train_batch_size 4 \
+   --per_device_eval_batch_size 4 \
+   --max_seq_len 512 \
+   --learning_rate 1e-4 \
+   --weight_decay 0.1 \
+   --num_train_epochs 2  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage $ZERO_STAGE \
+   --lora_dim 128 \
+   --lora_module_name decoder.layers. \
+   --deepspeed \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
diff --git a/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_1.3b.sh b/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_1.3b.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+# Note that usually LoRA needs to use larger learning rate
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=0
+fi
+mkdir -p $OUTPUT
+
+deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-1.3b \
+   --gradient_accumulation_steps 8 --lora_dim 128 --zero_stage $ZERO_STAGE \
+   --enable_tensorboard \
+   --tensorboard_path $OUTPUT \
+   --deepspeed --output_dir $OUTPUT &> $OUTPUT/training.log
diff --git a/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh b/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+# Note that usually LoRA needs to use larger learning rate
+OUTPUT_PATH=./output
+mkdir -p $OUTPUT_PATH
+
+deepspeed --num_gpus 1 main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-6.7b \
+   --per_device_train_batch_size 8 \
+   --per_device_eval_batch_size 8 \
+   --max_seq_len 512 \
+   --learning_rate 1e-3 \
+   --weight_decay 0. \
+   --num_train_epochs 16 \
+   --gradient_accumulation_steps 16 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage 0 \
+   --lora_dim 128 \
+   --lora_module_name decoder.layers. \
+   --deepspeed \
+   --output_dir $OUTPUT_PATH \
+   &> $OUTPUT_PATH/training.log
diff --git a/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh b/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=2
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-1.3b \
+   --per_device_train_batch_size 8 \
+   --per_device_eval_batch_size 8 \
+   --max_seq_len 512 \
+   --learning_rate 9.65e-6 \
+   --weight_decay 0. \
+   --num_train_epochs 16 \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --zero_stage $ZERO_STAGE \
+   --deepspeed \
+   --enable_tensorboard \
+   --tensorboard_path $OUTPUT \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
diff --git a/...peed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh b/...peed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+# Note that usually LoRA needs to use larger learning rate
+OUTPUT_PATH=./output
+mkdir -p $OUTPUT_PATH
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-1.3b \
+   --per_device_train_batch_size 8 \
+   --per_device_eval_batch_size 8 \
+   --max_seq_len 512 \
+   --learning_rate 1e-3 \
+   --weight_decay 0.1 \
+   --num_train_epochs 16 \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --zero_stage 0 \
+   --lora_dim 128 \
+   --lora_module_name decoder.layers. \
+   --only_optimize_lora \
+   --deepspeed \
+   --output_dir $OUTPUT_PATH \
+   &> $OUTPUT_PATH/training.log
diff --git a/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh b/.../DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-13b \
+   --per_device_train_batch_size 4 \
+   --per_device_eval_batch_size 4 \
+   --max_seq_len 512 \
+   --learning_rate 1e-4 \
+   --weight_decay 0. \
+   --num_train_epochs 16  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage $ZERO_STAGE \
+   --lora_dim 128 \
+   --lora_module_name decoder.layers. \
+   --deepspeed \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
diff --git a/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh b/...Speed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT_PATH=./output
+mkdir -p $OUTPUT_PATH
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-30b \
+   --per_device_train_batch_size 4 \
+   --per_device_eval_batch_size 4 \
+   --max_seq_len 512 \
+   --learning_rate 9.65e-6 \
+   --weight_decay 0. \
+   --num_train_epochs 16  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --lora_dim 128 \
+   --gradient_checkpointing \
+   --zero_stage 3 \
+   --deepspeed \
+   --output_dir $OUTPUT_PATH \
+   &> $OUTPUT_PATH/training.log
diff --git a/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_6.7b.sh b/...DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_6.7b.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+OUTPUT=$1
+ZERO_STAGE=$2
+if [ "$OUTPUT" == "" ]; then
+    OUTPUT=./output
+fi
+if [ "$ZERO_STAGE" == "" ]; then
+    ZERO_STAGE=3
+fi
+mkdir -p $OUTPUT
+
+deepspeed main.py \
+   --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
+   --data_split 2,4,4 \
+   --model_name_or_path facebook/opt-6.7b \
+   --per_device_train_batch_size 6 \
+   --per_device_eval_batch_size 6 \
+   --max_seq_len 512 \
+   --learning_rate 9.65e-6 \
+   --weight_decay 0. \
+   --num_train_epochs 16  \
+   --gradient_accumulation_steps 1 \
+   --lr_scheduler_type cosine \
+   --num_warmup_steps 0 \
+   --seed 1234 \
+   --gradient_checkpointing \
+   --zero_stage $ZERO_STAGE \
+   --deepspeed \
+   --output_dir $OUTPUT \
+   &> $OUTPUT/training.log
@@ -0,0 +1,21 @@
+# DeepSpeed Characterization Script
+
+# Contents
+   * [Introduction](#introduction)
+   * [Usage](#usage)
+
+# Introduction
+The step 1 characterization script sweeps across various training parameters. Currently, the following parameters are swept:
+<pre>
+Zero Stage: 2, 3
+Offload: True, False
+Lora: True, False
+</pre>
+
+The `run_step1_sweep.sh` script passes configuration arguments to `run_single.sh`, which can be extended to sweep beyond the parameters listed above (e.g. learning rate, weight decay, etc).
+
+# Usage
+The sweep script can be run as follows:
+<pre>
+DeepSpeedExamples/applications/DeepSpeed-Chat/training/step1_supervised_finetuning$ bash training_scripts/opt/single_node/sweep/run_step1_sweep.sh
+</pre>