Skip to content

Commit 9912290

Browse files
committed
temp: only test ppo_trainer for sglang
1 parent 766a082 commit 9912290

17 files changed

+0
-1119
lines changed

.github/workflows/dataset.yml

Lines changed: 0 additions & 62 deletions
This file was deleted.

.github/workflows/e2e_ascend.yml

Lines changed: 0 additions & 49 deletions
This file was deleted.

.github/workflows/e2e_dapo.yml

Lines changed: 0 additions & 58 deletions
This file was deleted.

.github/workflows/e2e_eval_aime24.yml

Lines changed: 0 additions & 60 deletions
This file was deleted.

.github/workflows/e2e_ppo_trainer.yml

Lines changed: 0 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -55,118 +55,6 @@ jobs:
5555
git add .pre-commit-config.yaml
5656
- uses: pre-commit/action@v3.0.1
5757

58-
e2e_ppo_trainer_vllm:
59-
runs-on: [L20x8]
60-
timeout-minutes: 40 # Increase this timeout value as needed
61-
env:
62-
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
63-
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
64-
NO_PROXY: "localhost,127.0.0.1"
65-
HF_ENDPOINT: "https://hf-mirror.com"
66-
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
67-
container:
68-
image: whatcanyousee/verl:ngc-cu124-vllm0.8.3-sglang0.4.5-mcore0.12.0-te2.2
69-
options: --gpus all --shm-size=10g
70-
steps:
71-
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
72-
with:
73-
fetch-depth: 0
74-
- name: Install the current repository
75-
run: |
76-
pip3 install -e .[test,vllm]
77-
- name: Prepare GSM8K dataset
78-
run: |
79-
ray stop --force
80-
python3 examples/data_preprocess/gsm8k.py
81-
# Function RM
82-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving
83-
run: |
84-
ray stop --force
85-
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/ppo_trainer/run_function_reward.sh
86-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
87-
run: |
88-
ray stop --force
89-
RESUME_MODE=auto bash tests/e2e/ppo_trainer/run_function_reward.sh
90-
- name: Running GSM8K E2E without rmpad using function rm
91-
run: |
92-
ray stop --force
93-
RM_PAD=False bash tests/e2e/ppo_trainer/run_function_reward.sh
94-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
95-
run: |
96-
ray stop --force
97-
ADV_ESTIMATOR=grpo USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
98-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
99-
run: |
100-
ray stop --force
101-
ADV_ESTIMATOR=remax USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
102-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
103-
run: |
104-
ray stop --force
105-
CUSTOM_REWARD_FN=True bash tests/e2e/ppo_trainer/run_function_reward.sh
106-
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
107-
run: |
108-
ray stop --force
109-
USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
110-
# Model RM
111-
- name: Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
112-
run: |
113-
ray stop --force
114-
MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/ppo_trainer/run_function_reward.sh
115-
- name: Running GSM8K E2E with rmpad using model rm
116-
run: |
117-
ray stop --force
118-
bash tests/e2e/ppo_trainer/run_model_reward.sh
119-
- name: Running GSM8K E2E without rmpad using model rm
120-
run: |
121-
ray stop --force
122-
RM_PAD=False bash tests/e2e/ppo_trainer/run_model_reward.sh
123-
- name: Running GSM8K E2E with rmpad using model rm and ulysses sp=2
124-
run: |
125-
ray stop --force
126-
SP_SIZE=2 bash tests/e2e/ppo_trainer/run_model_reward.sh
127-
- name: Running GSM8K E2E with rmpad using model rm and dynamic batch size
128-
run: |
129-
ray stop --force
130-
SEQ_BALANCE=True bash tests/e2e/ppo_trainer/run_model_reward.sh
131-
- name: Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
132-
run: |
133-
ray stop --force
134-
LIGER=True bash tests/e2e/ppo_trainer/run_model_reward.sh
135-
136-
e2e_ppo_trainer_vllm_vlm:
137-
runs-on: [L20x8]
138-
needs: pre_commit_for_ppo
139-
timeout-minutes: 40 # Increase this timeout value as needed
140-
env:
141-
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
142-
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
143-
NO_PROXY: "localhost,127.0.0.1"
144-
HF_ENDPOINT: "https://hf-mirror.com"
145-
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
146-
container:
147-
image: hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
148-
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
149-
steps:
150-
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
151-
with:
152-
fetch-depth: 0
153-
- name: Install the current repository
154-
run: |
155-
pip3 install -e .[test,geo,vllm]
156-
# Geo3k
157-
- name: Prepare Geo3k dataset
158-
run: |
159-
ray stop --force
160-
python3 examples/data_preprocess/geo3k.py
161-
- name: Running Geo3k VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
162-
run: |
163-
ray stop --force
164-
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
165-
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
166-
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
167-
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
168-
bash tests/e2e/ppo_trainer/run_function_reward.sh
169-
17058
e2e_ppo_trainer_sglang:
17159
runs-on: [L20x8]
17260
needs: pre_commit_for_ppo

0 commit comments

Comments
 (0)