Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions .claude/index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"project_name": "AReaL",
"description": "A Large-Scale Asynchronous Reinforcement Learning System for Language Reasoning",
"scan_timestamp": "2026-01-31T13:49:46+08:00",
"version": "0.3+",
"language": "Python",
"tech_stack": ["Python 3.12+", "PyTorch", "FSDP2", "Megatron", "SGLang", "vLLM"],
"modules": [
{
"name": "areal.api",
"path": "areal/api",
"type": "core",
"description": "配置数据类、工作流/引擎契约定义",
"entry_points": ["cli_args.py", "engine_api.py", "workflow_api.py", "reward_api.py"],
"key_files": ["alloc_mode.py", "io_struct.py", "scheduler_api.py", "env_api.py"],
"has_tests": false,
"has_config": false
},
{
"name": "areal.engine",
"path": "areal/engine",
"type": "core",
"description": "训练引擎适配器:FSDP2、Megatron、SGLang/vLLM",
"entry_points": ["fsdp_engine.py", "megatron_engine.py", "sglang_remote.py", "vllm_remote.py"],
"key_files": ["core/train_engine.py", "ppo/actor.py", "ppo/critic.py", "sft/lm_engine.py", "rw/rw_engine.py"],
"has_tests": true,
"has_config": false
},
{
"name": "areal.workflow",
"path": "areal/workflow",
"type": "core",
"description": "RolloutWorkflow 实现:多轮对话、RLVR、视觉RLVR、Agent集成",
"entry_points": ["multi_turn.py", "rlvr.py", "vision_rlvr.py"],
"key_files": ["openai/math_agent.py", "anthropic/math_agent.py", "langchain/math_agent.py", "openai_agent/math_agent.py"],
"has_tests": false,
"has_config": false
},
{
"name": "areal.reward",
"path": "areal/reward",
"type": "core",
"description": "奖励函数:数学推理、视觉任务",
"entry_points": ["gsm8k.py", "geometry3k.py", "clevr_count_70k.py"],
"key_files": [],
"has_tests": false,
"has_config": false
},
{
"name": "areal.dataset",
"path": "areal/dataset",
"type": "core",
"description": "数据集加载器:GSM8K、Geometry3K、CLEVR、HHRLHF",
"entry_points": ["gsm8k.py", "geometry3k.py", "clevr_count_70k.py", "hhrlhf.py", "torl_data.py"],
"key_files": [],
"has_tests": false,
"has_config": false
},
{
"name": "areal.controller",
"path": "areal/controller",
"type": "core",
"description": "训练与推理控制器:rollout、train",
"entry_points": ["rollout_controller.py", "train_controller.py"],
"key_files": ["rollout_callback.py"],
"has_tests": true,
"has_config": false
},
{
"name": "areal.core",
"path": "areal/core",
"type": "core",
"description": "核心运行时:异步任务、分布式rollout、工作流执行器",
"entry_points": ["workflow_executor.py", "dist_rollout.py", "async_task_runner.py"],
"key_files": ["remote_inf_engine.py", "staleness_manager.py", "workflow_context.py"],
"has_tests": true,
"has_config": false
},
{
"name": "areal.launcher",
"path": "areal/launcher",
"type": "infrastructure",
"description": "启动器:本地、Ray、Slurm、SGLang/vLLM服务器",
"entry_points": ["local.py", "ray.py", "slurm.py"],
"key_files": ["sglang_server.py", "vllm_server.py"],
"has_tests": false,
"has_config": false
},
{
"name": "areal.utils",
"path": "areal/utils",
"type": "utilities",
"description": "工具库:日志、张量操作、检查点、分布式、FP8、FSDP、Megatron",
"entry_points": ["logging.py", "distributed.py", "data.py"],
"key_files": ["checkpoint.py", "fp8/", "fsdp/", "mcore/", "functional/", "perf_tracer.py"],
"has_tests": true,
"has_config": false
},
{
"name": "areal.models",
"path": "areal/models",
"type": "models",
"description": "模型实现:FSDP Ulysses、Megatron Core适配",
"entry_points": ["parallel_styles.py"],
"key_files": ["fsdp/ulysses.py", "mcore/qwen3.py", "mcore/hf_load.py", "mcore/hf_save.py"],
"has_tests": false,
"has_config": false
},
{
"name": "areal.experimental",
"path": "areal/experimental",
"type": "experimental",
"description": "实验性功能:Archon引擎、OpenAI集成、Trainer、多轮V2",
"entry_points": ["engine/archon_engine.py", "openai/client.py", "trainer/rl.py"],
"key_files": ["models/archon/", "openai/proxy/", "camel/", "workflow/multi_turn_v2.py"],
"has_tests": true,
"has_config": false
},
{
"name": "examples",
"path": "examples",
"type": "examples",
"description": "训练脚本与配置:数学、多轮、VLM、搜索Agent、TIR、RLHF",
"entry_points": ["math/gsm8k_rl.py", "math/gsm8k_sft.py"],
"key_files": ["math/*.yaml", "multi_turn_math/", "vlm/", "search_agent/", "tir/", "alignment/"],
"has_tests": false,
"has_config": true
},
{
"name": "evaluation",
"path": "evaluation",
"type": "evaluation",
"description": "评估工具:数学评估、代码评估、latex2sympy",
"entry_points": ["evaluate.py", "math_eval.py", "code_eval.py"],
"key_files": ["grader.py", "latex2sympy/", "python_executor.py"],
"has_tests": false,
"has_config": false
},
{
"name": "docs",
"path": "docs",
"type": "documentation",
"description": "Jupyter Book文档源码",
"entry_points": ["intro.md", "tutorial/quickstart.md"],
"key_files": ["algorithms/", "customization/", "best_practices/", "lite/"],
"has_tests": false,
"has_config": true
},
{
"name": "recipe",
"path": "recipe",
"type": "recipe",
"description": "算法配方:AEnt(Advantage Entropy)",
"entry_points": ["AEnt/gsm8k_aent_grpo.py"],
"key_files": ["AEnt/actor.py", "AEnt/functional.py", "AEnt/aent_args.py"],
"has_tests": false,
"has_config": true
}
],
"coverage": {
"total_files_estimated": 450,
"scanned_files": 450,
"coverage_percentage": 100,
"modules_documented": 15,
"modules_with_tests": 6,
"gaps": []
},
"ignored_patterns": [
".git/**",
".github/**",
"node_modules/**",
"dist/**",
"build/**",
"__pycache__/**",
"*.pyc",
"*.log",
"*.lock",
".venv/**",
"venv/**",
"wandb/**",
"outputs/**",
"logs/**",
".ruff_cache/**",
".pytest_cache/**",
".legacy/**",
".data/**",
".agent/**",
".claude/sessions/**",
"slurm_outs/**",
"_data/**",
"trace_result/**",
"profile_result/**"
],
"truncated": false,
"next_steps": []
}
148 changes: 148 additions & 0 deletions areal/api/CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
[根目录](../../CLAUDE.md) > **areal/api**

# areal.api - API 与配置契约

## 变更记录 (Changelog)

### 2026-01-31 - 初始化

- 模块文档创建
- 识别 9 个核心文件

---

## 模块职责

定义 AReaL 系统的核心 API 契约与配置数据类:

- **配置数据类**:CLI 参数、训练/推理超参数、并行策略
- **引擎契约**`TrainEngine``InferenceEngine` 抽象基类
- **工作流契约**`RolloutWorkflow``AgentWorkflow` 抽象基类
- **奖励契约**`RewardFunction` 与异步包装器
- **调度器契约**`Scheduler` 抽象基类
- **数据结构**`ModelRequest``ModelResponse``ParamSpec`

## 入口与启动

无独立启动入口,作为其他模块的依赖被导入。

## 对外接口

### 核心抽象类

| 类名 | 文件 | 职责 |
| ------------------- | -------------------- | -------------------------------------- |
| `TrainEngine` | `engine_api.py` | 训练引擎抽象基类(FSDP/Megatron) |
| `InferenceEngine` | `engine_api.py` | 推理引擎抽象基类(SGLang/vLLM) |
| `RolloutWorkflow` | `workflow_api.py` | Rollout 工作流抽象基类 |
| `AgentWorkflow` | `workflow_api.py` | Agent 工作流抽象基类(OpenAI SDK 集成)|
| `RewardFunction` | `reward_api.py` | 奖励函数抽象基类 |
| `Scheduler` | `scheduler_api.py` | 调度器抽象基类(Local/Ray/Slurm) |

### 配置数据类

| 类名 | 文件 | 职责 |
| ----------------------------- | --------------- | -------------------------------------- |
| `NormConfig` | `cli_args.py` | 奖励/优势归一化配置 |
| `MicroBatchSpec` | `cli_args.py` | 微批次划分规格 |
| `GenerationHyperparameters` | `cli_args.py` | 生成超参数(温度、top_p、max_tokens) |
| `TrainingHyperparameters` | `cli_args.py` | 训练超参数(学习率、优化器、调度器) |
| `ParallelStrategy` | `alloc_mode.py` | 并行策略(DP/TP/PP/CP/EP) |

### 数据结构

| 类名 | 文件 | 职责 |
| ------------------- | --------------- | -------------------------------------- |
| `ModelRequest` | `io_struct.py` | 推理请求(input_ids、gconfig) |
| `ModelResponse` | `io_struct.py` | 推理响应(output_tokens、logprobs) |
| `ParamSpec` | `io_struct.py` | 参数规格(形状、dtype、设备) |
| `WeightUpdateMeta` | `io_struct.py` | 权重更新元数据 |
| `SaveLoadMeta` | `io_struct.py` | 检查点保存/加载元数据 |

## 关键依赖与配置

### 外部依赖

- `torch`:张量操作与分布式通信
- `transformers`:Tokenizer 与模型配置
- `omegaconf``hydra-core`:配置管理
- `pydantic`:数据验证

### 内部依赖

- `areal.utils.logging`:日志工具
- `areal.utils.name_resolve`:动态导入

## 数据模型

### CLI 参数结构

```python
@dataclass
class TrainingHyperparameters:
# 优化器
optimizer: str = "adam"
lr: float = 1e-5
weight_decay: float = 0.0

# 学习率调度
lr_scheduler: str = "cosine"
warmup_steps: int = 0

# 训练配置
n_epochs: int = 1
gradient_accumulation_steps: int = 1
max_grad_norm: float = 1.0
```

### 并行策略

```python
@dataclass
class ParallelStrategy:
data_parallel_size: int = 1
tensor_parallel_size: int = 1
pipeline_parallel_size: int = 1
context_parallel_size: int = 1
expert_parallel_size: int = 1
```

## 测试与质量

- **测试覆盖**:部分配置类有单元测试(`areal/tests/test_adv_norm_config.py``test_allocation_mode.py`
- **质量工具**:Ruff(格式化与 lint)、pre-commit hooks

## 常见问题 (FAQ)

### Q: 如何添加新的配置字段?

A: 在对应的 `@dataclass` 中添加字段,并提供默认值。注意向后兼容性。

### Q: 如何实现自定义 Workflow?

A: 继承 `RolloutWorkflow``AgentWorkflow`,实现 `arun_episode``run` 方法。参考 `areal/workflow/multi_turn.py`

### Q: 如何实现自定义 Engine?

A: 继承 `TrainEngine``InferenceEngine`,实现所有抽象方法。参考 `areal/engine/fsdp_engine.py`

## 相关文件清单

```
areal/api/
├── __init__.py
├── alloc_mode.py # 并行策略与分配模式
├── cli_args.py # CLI 参数与配置数据类(核心)
├── engine_api.py # 引擎抽象基类(核心)
├── env_api.py # 环境 API(实验性)
├── io_struct.py # 数据结构定义
├── reward_api.py # 奖励函数抽象基类
├── scheduler_api.py # 调度器抽象基类
└── workflow_api.py # 工作流抽象基类(核心)
```

## 下一步建议

- 补充 `cli_args.py` 中各配置类的详细文档
- 添加配置验证的单元测试
- 完善 `env_api.py` 的实验性功能
Loading
Loading