@@ -55,118 +55,6 @@ jobs:
55
55
git add .pre-commit-config.yaml
56
56
- uses : pre-commit/action@v3.0.1
57
57
58
- e2e_ppo_trainer_vllm :
59
- runs-on : [L20x8]
60
- timeout-minutes : 40 # Increase this timeout value as needed
61
- env :
62
- HTTP_PROXY : ${{ secrets.PROXY_HTTP }}
63
- HTTPS_PROXY : ${{ secrets.PROXY_HTTPS }}
64
- NO_PROXY : " localhost,127.0.0.1"
65
- HF_ENDPOINT : " https://hf-mirror.com"
66
- HF_HUB_ENABLE_HF_TRANSFER : " 0" # This is more stable
67
- container :
68
- image : whatcanyousee/verl:ngc-cu124-vllm0.8.3-sglang0.4.5-mcore0.12.0-te2.2
69
- options : --gpus all --shm-size=10g
70
- steps :
71
- - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
72
- with :
73
- fetch-depth : 0
74
- - name : Install the current repository
75
- run : |
76
- pip3 install -e .[test,vllm]
77
- - name : Prepare GSM8K dataset
78
- run : |
79
- ray stop --force
80
- python3 examples/data_preprocess/gsm8k.py
81
- # Function RM
82
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving
83
- run : |
84
- ray stop --force
85
- VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/ppo_trainer/run_function_reward.sh
86
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
87
- run : |
88
- ray stop --force
89
- RESUME_MODE=auto bash tests/e2e/ppo_trainer/run_function_reward.sh
90
- - name : Running GSM8K E2E without rmpad using function rm
91
- run : |
92
- ray stop --force
93
- RM_PAD=False bash tests/e2e/ppo_trainer/run_function_reward.sh
94
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
95
- run : |
96
- ray stop --force
97
- ADV_ESTIMATOR=grpo USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
98
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
99
- run : |
100
- ray stop --force
101
- ADV_ESTIMATOR=remax USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
102
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
103
- run : |
104
- ray stop --force
105
- CUSTOM_REWARD_FN=True bash tests/e2e/ppo_trainer/run_function_reward.sh
106
- - name : Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
107
- run : |
108
- ray stop --force
109
- USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
110
- # Model RM
111
- - name : Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
112
- run : |
113
- ray stop --force
114
- MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/ppo_trainer/run_function_reward.sh
115
- - name : Running GSM8K E2E with rmpad using model rm
116
- run : |
117
- ray stop --force
118
- bash tests/e2e/ppo_trainer/run_model_reward.sh
119
- - name : Running GSM8K E2E without rmpad using model rm
120
- run : |
121
- ray stop --force
122
- RM_PAD=False bash tests/e2e/ppo_trainer/run_model_reward.sh
123
- - name : Running GSM8K E2E with rmpad using model rm and ulysses sp=2
124
- run : |
125
- ray stop --force
126
- SP_SIZE=2 bash tests/e2e/ppo_trainer/run_model_reward.sh
127
- - name : Running GSM8K E2E with rmpad using model rm and dynamic batch size
128
- run : |
129
- ray stop --force
130
- SEQ_BALANCE=True bash tests/e2e/ppo_trainer/run_model_reward.sh
131
- - name : Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
132
- run : |
133
- ray stop --force
134
- LIGER=True bash tests/e2e/ppo_trainer/run_model_reward.sh
135
-
136
- e2e_ppo_trainer_vllm_vlm :
137
- runs-on : [L20x8]
138
- needs : pre_commit_for_ppo
139
- timeout-minutes : 40 # Increase this timeout value as needed
140
- env :
141
- HTTP_PROXY : ${{ secrets.PROXY_HTTP }}
142
- HTTPS_PROXY : ${{ secrets.PROXY_HTTPS }}
143
- NO_PROXY : " localhost,127.0.0.1"
144
- HF_ENDPOINT : " https://hf-mirror.com"
145
- HF_HUB_ENABLE_HF_TRANSFER : " 0" # This is more stable
146
- container :
147
- image : hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
148
- options : --gpus all --shm-size=50g # Visual dataloader requires large memory
149
- steps :
150
- - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
151
- with :
152
- fetch-depth : 0
153
- - name : Install the current repository
154
- run : |
155
- pip3 install -e .[test,geo,vllm]
156
- # Geo3k
157
- - name : Prepare Geo3k dataset
158
- run : |
159
- ray stop --force
160
- python3 examples/data_preprocess/geo3k.py
161
- - name : Running Geo3k VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
162
- run : |
163
- ray stop --force
164
- TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
165
- MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
166
- MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
167
- ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
168
- bash tests/e2e/ppo_trainer/run_function_reward.sh
169
-
170
58
e2e_ppo_trainer_sglang :
171
59
runs-on : [L20x8]
172
60
needs : pre_commit_for_ppo
0 commit comments