11#! /bin/bash
22
33# --- Configuration (defaults, can be overridden via env vars) ---
4- export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:- 1,2,3,4 }
4+ export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:- 0,5,9 }
55WAND_PROJECT=${WAND_PROJECT:- ' OpenManus-rl' }
66export BASE_MODEL=${BASE_MODEL:- ' Qwen/Qwen2.5-3B' }
77AGENTGYM_HOST=${AGENTGYM_HOST:- ' 0.0.0.0' } # Default to 0.0.0.0 for external access
88AGENTGYM_SQL_BIRD_PATH=${AGENTGYM_SQL_BIRD_PATH:- } # Used only for sqlgym
99export NCCL_IB_DISABLE=1
1010export NCCL_P2P_DISABLE=1
1111export PYTHONPATH=" ./openmanus_rl/agentgym/agentenv:${PYTHONPATH} "
12+ export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues
13+
1214
1315# --- Argument Parsing ---
1416usage () {
@@ -228,7 +230,6 @@ export EXPERIMENT_NAME="OpenManus-rl-ppo-${BASE_MODEL##*/}-${AGENTGYM_ENV_NAME}$
228230
229231# --- Run PPO Training in Base Environment ---
230232echo -e " \\ n[Trainer] Running PPO training in base environment '$BASE_CONDA_ENV '..."
231- export VLLM_ATTENTION_BACKEND=${VLLM_ATTENTION_BACKEND:- XFORMERS}
232233
233234# Construct server base URL, adding path if needed
234235AGENTGYM_SERVER_BASE=" http://$AGENTGYM_HOST " # Base URL without port
@@ -283,8 +284,8 @@ hydra_overrides=(
283284 " data.env_ports=[${AGENTGYM_PORTS_STR} ]"
284285 " data.train_data_num=null"
285286 " data.val_data_num=null"
286- " data.train_batch_size=4 "
287- " data.val_batch_size=2 "
287+ " data.train_batch_size=6 "
288+ " data.val_batch_size=3 "
288289 " data.max_prompt_length=4096"
289290 " data.max_response_length=1000"
290291 " data.max_start_length=2048"
@@ -296,8 +297,8 @@ hydra_overrides=(
296297 " actor_rollout_ref.model.enable_gradient_checkpointing=true"
297298 " actor_rollout_ref.model.use_remove_padding=True"
298299 " actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.95"
299- " actor_rollout_ref.actor.ppo_mini_batch_size=4 "
300- " actor_rollout_ref.actor.ppo_micro_batch_size=4 "
300+ " actor_rollout_ref.actor.ppo_mini_batch_size=6 "
301+ " actor_rollout_ref.actor.ppo_micro_batch_size=6 "
301302 " actor_rollout_ref.actor.fsdp_config.param_offload=true"
302303 " actor_rollout_ref.actor.fsdp_config.grad_offload=true"
303304 " actor_rollout_ref.actor.fsdp_config.optimizer_offload=true"
@@ -329,7 +330,7 @@ hydra_overrides=(
329330 " +trainer.val_only=false"
330331 " +trainer.val_before_train=true"
331332 " trainer.default_hdfs_dir=null"
332- " trainer.n_gpus_per_node=4 "
333+ " trainer.n_gpus_per_node=3 "
333334 " trainer.nnodes=1"
334335 " trainer.save_freq=100"
335336 " trainer.test_freq=50"
0 commit comments