- [2025/05/06] We present the MAS-Orchestra [Project Page | Paper | Code]
- 🏠 [Project Page]
- 📜 [Paper]
- 💻 [Code]
conda create -n mas-orchestra python==3.10
conda activate mas-orchestra
apt update && apt install -y wget curl
cd ./verl
./install.sh
pip install --no-deps -e .
pip install ray==2.49.2 --force-reinstall
pip install protobuf==4.25.8 --force-reinstall
pip install together
pip install math-verify[antlr4_13_2]
pip install antlr4-python3-runtime==4.9.3
pip install langchain-core langchain-together langchain-community duckduckgo-search tavily-python pydantic ddgs langchain_brightdata bs4
pip install pyserini faiss-gpu
pip install git+https://github.com/texttron/tevatron.git
export OPENAI_API_KEY={YourKey}
export TOGETHER_API_KEY={YourKey}
export WANDB_API_KEY={YourKey}
LOG_FILE={YourLogFile}
python -u -m mas_r1_reasoner.main_mas_r1 \
--config-path=configs \
--config-name=grpo_trainer \
data.max_prompt_length=15000 \
data.max_validation_prompt_length=15000 \
data.val_files=data/browse_comp/test_subset_200.parquet \
data.train_files=data/browse_comp/train_subset_1066.parquet \
azr.mas_r1.use_llm_judge=True \
data.raw_data=True \
data.train_batch_size=64 \
actor_rollout_ref.rollout.n=32 \
azr.mas_r1.execution_success_weight=0.0 \
azr.mas_r1.final_answer_weight=1.0 \
azr.mas_r1.agent.model_name=gpt-oss-120b\
azr.mas_r1.multiply_processes=0 \
azr.mas_r1.max_ray_workers=1 \
azr.problem_type=harmony_medium \
azr.mas_r1.agent.init_archive=['COT','COT_SC','Reflexion','LLM_debate','WebSearch'] \
trainer.val_before_train=True \
trainer.test_freq=5 \
trainer.save_freq=10 \
actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \
trainer.experiment_name=harmony_medium_grpo_7b_gpt_oss_120b_browse_comp_plus \
$@ 2>&1 | tee -a "$LOG_FILE"If you find MAS-Orchestra helpful, please consider starring this repo and citing our work. We would be very grateful!
@misc{Ke2026MASOrchestra,
title = {MAS-Orchestra: Understanding and Improving Multi-Agent Reasoning Through Holistic Orchestration and Controlled Benchmarks},
author = {Zixuan Ke and Yifei Ming and Austin Xu and Ryan Chin and Xuan-Phi Nguyen and Prathyusha Jwalapuram and Semih Yavuz and Caiming Xiong and Shafiq Joty},
year = {2026},
eprint = {2601.14652},
archivePrefix= {arXiv},
primaryClass = {cs.AI},
note = {Preprint; Work in Progress},
}This project received help from many researchers at Salesforce AI Research. We also thank thanks to the authors of the verl for their excellent contributions to the community!
Feel free to contact Zixuan Ke via email: zixuan.ke@salesforce.com
