Skip to content

Commit 61a284d

Browse files
authored
reorgnize the workder initializer (#63)
1 parent 7bd73e9 commit 61a284d

File tree

4 files changed

+252
-287
lines changed

4 files changed

+252
-287
lines changed

train_ppo.sh

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
#!/bin/bash
22

33
# --- Configuration (defaults, can be overridden via env vars) ---
4-
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-1,2,3,4}
4+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0,5,9}
55
WAND_PROJECT=${WAND_PROJECT:-'OpenManus-rl'}
66
export BASE_MODEL=${BASE_MODEL:-'Qwen/Qwen2.5-3B'}
77
AGENTGYM_HOST=${AGENTGYM_HOST:-'0.0.0.0'} # Default to 0.0.0.0 for external access
88
AGENTGYM_SQL_BIRD_PATH=${AGENTGYM_SQL_BIRD_PATH:-} # Used only for sqlgym
99
export NCCL_IB_DISABLE=1
1010
export NCCL_P2P_DISABLE=1
1111
export PYTHONPATH="./openmanus_rl/agentgym/agentenv:${PYTHONPATH}"
12+
export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
13+
1214

1315
# --- Argument Parsing ---
1416
usage() {
@@ -228,7 +230,6 @@ export EXPERIMENT_NAME="OpenManus-rl-ppo-${BASE_MODEL##*/}-${AGENTGYM_ENV_NAME}$
228230

229231
# --- Run PPO Training in Base Environment ---
230232
echo -e "\\n[Trainer] Running PPO training in base environment '$BASE_CONDA_ENV'..."
231-
export VLLM_ATTENTION_BACKEND=${VLLM_ATTENTION_BACKEND:-XFORMERS}
232233

233234
# Construct server base URL, adding path if needed
234235
AGENTGYM_SERVER_BASE="http://$AGENTGYM_HOST" # Base URL without port
@@ -283,8 +284,8 @@ hydra_overrides=(
283284
"data.env_ports=[${AGENTGYM_PORTS_STR}]"
284285
"data.train_data_num=null"
285286
"data.val_data_num=null"
286-
"data.train_batch_size=4"
287-
"data.val_batch_size=2"
287+
"data.train_batch_size=6"
288+
"data.val_batch_size=3"
288289
"data.max_prompt_length=4096"
289290
"data.max_response_length=1000"
290291
"data.max_start_length=2048"
@@ -296,8 +297,8 @@ hydra_overrides=(
296297
"actor_rollout_ref.model.enable_gradient_checkpointing=true"
297298
"actor_rollout_ref.model.use_remove_padding=True"
298299
"actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95"
299-
"actor_rollout_ref.actor.ppo_mini_batch_size=4"
300-
"actor_rollout_ref.actor.ppo_micro_batch_size=4"
300+
"actor_rollout_ref.actor.ppo_mini_batch_size=6"
301+
"actor_rollout_ref.actor.ppo_micro_batch_size=6"
301302
"actor_rollout_ref.actor.fsdp_config.param_offload=true"
302303
"actor_rollout_ref.actor.fsdp_config.grad_offload=true"
303304
"actor_rollout_ref.actor.fsdp_config.optimizer_offload=true"
@@ -329,7 +330,7 @@ hydra_overrides=(
329330
"+trainer.val_only=false"
330331
"+trainer.val_before_train=true"
331332
"trainer.default_hdfs_dir=null"
332-
"trainer.n_gpus_per_node=4"
333+
"trainer.n_gpus_per_node=3"
333334
"trainer.nnodes=1"
334335
"trainer.save_freq=100"
335336
"trainer.test_freq=50"

0 commit comments

Comments
 (0)