inclusionAI · flying-dragon-ai · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026 · Jan 31, 2026
diff --git a/.claude/index.json b/.claude/index.json
@@ -0,0 +1,196 @@
+{
+  "project_name": "AReaL",
+  "description": "A Large-Scale Asynchronous Reinforcement Learning System for Language Reasoning",
+  "scan_timestamp": "2026-01-31T13:49:46+08:00",
+  "version": "0.3+",
+  "language": "Python",
+  "tech_stack": ["Python 3.12+", "PyTorch", "FSDP2", "Megatron", "SGLang", "vLLM"],
+  "modules": [
+    {
+      "name": "areal.api",
+      "path": "areal/api",
+      "type": "core",
+      "description": "配置数据类、工作流/引擎契约定义",
+      "entry_points": ["cli_args.py", "engine_api.py", "workflow_api.py", "reward_api.py"],
+      "key_files": ["alloc_mode.py", "io_struct.py", "scheduler_api.py", "env_api.py"],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.engine",
+      "path": "areal/engine",
+      "type": "core",
+      "description": "训练引擎适配器：FSDP2、Megatron、SGLang/vLLM",
+      "entry_points": ["fsdp_engine.py", "megatron_engine.py", "sglang_remote.py", "vllm_remote.py"],
+      "key_files": ["core/train_engine.py", "ppo/actor.py", "ppo/critic.py", "sft/lm_engine.py", "rw/rw_engine.py"],
+      "has_tests": true,
+      "has_config": false
+    },
+    {
+      "name": "areal.workflow",
+      "path": "areal/workflow",
+      "type": "core",
+      "description": "RolloutWorkflow 实现：多轮对话、RLVR、视觉RLVR、Agent集成",
+      "entry_points": ["multi_turn.py", "rlvr.py", "vision_rlvr.py"],
+      "key_files": ["openai/math_agent.py", "anthropic/math_agent.py", "langchain/math_agent.py", "openai_agent/math_agent.py"],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.reward",
+      "path": "areal/reward",
+      "type": "core",
+      "description": "奖励函数：数学推理、视觉任务",
+      "entry_points": ["gsm8k.py", "geometry3k.py", "clevr_count_70k.py"],
+      "key_files": [],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.dataset",
+      "path": "areal/dataset",
+      "type": "core",
+      "description": "数据集加载器：GSM8K、Geometry3K、CLEVR、HHRLHF",
+      "entry_points": ["gsm8k.py", "geometry3k.py", "clevr_count_70k.py", "hhrlhf.py", "torl_data.py"],
+      "key_files": [],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.controller",
+      "path": "areal/controller",
+      "type": "core",
+      "description": "训练与推理控制器：rollout、train",
+      "entry_points": ["rollout_controller.py", "train_controller.py"],
+      "key_files": ["rollout_callback.py"],
+      "has_tests": true,
+      "has_config": false
+    },
+    {
+      "name": "areal.core",
+      "path": "areal/core",
+      "type": "core",
+      "description": "核心运行时：异步任务、分布式rollout、工作流执行器",
+      "entry_points": ["workflow_executor.py", "dist_rollout.py", "async_task_runner.py"],
+      "key_files": ["remote_inf_engine.py", "staleness_manager.py", "workflow_context.py"],
+      "has_tests": true,
+      "has_config": false
+    },
+    {
+      "name": "areal.launcher",
+      "path": "areal/launcher",
+      "type": "infrastructure",
+      "description": "启动器：本地、Ray、Slurm、SGLang/vLLM服务器",
+      "entry_points": ["local.py", "ray.py", "slurm.py"],
+      "key_files": ["sglang_server.py", "vllm_server.py"],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.utils",
+      "path": "areal/utils",
+      "type": "utilities",
+      "description": "工具库：日志、张量操作、检查点、分布式、FP8、FSDP、Megatron",
+      "entry_points": ["logging.py", "distributed.py", "data.py"],
+      "key_files": ["checkpoint.py", "fp8/", "fsdp/", "mcore/", "functional/", "perf_tracer.py"],
+      "has_tests": true,
+      "has_config": false
+    },
+    {
+      "name": "areal.models",
+      "path": "areal/models",
+      "type": "models",
+      "description": "模型实现：FSDP Ulysses、Megatron Core适配",
+      "entry_points": ["parallel_styles.py"],
+      "key_files": ["fsdp/ulysses.py", "mcore/qwen3.py", "mcore/hf_load.py", "mcore/hf_save.py"],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "areal.experimental",
+      "path": "areal/experimental",
+      "type": "experimental",
+      "description": "实验性功能：Archon引擎、OpenAI集成、Trainer、多轮V2",
+      "entry_points": ["engine/archon_engine.py", "openai/client.py", "trainer/rl.py"],
+      "key_files": ["models/archon/", "openai/proxy/", "camel/", "workflow/multi_turn_v2.py"],
+      "has_tests": true,
+      "has_config": false
+    },
+    {
+      "name": "examples",
+      "path": "examples",
+      "type": "examples",
+      "description": "训练脚本与配置：数学、多轮、VLM、搜索Agent、TIR、RLHF",
+      "entry_points": ["math/gsm8k_rl.py", "math/gsm8k_sft.py"],
+      "key_files": ["math/*.yaml", "multi_turn_math/", "vlm/", "search_agent/", "tir/", "alignment/"],
+      "has_tests": false,
+      "has_config": true
+    },
+    {
+      "name": "evaluation",
+      "path": "evaluation",
+      "type": "evaluation",
+      "description": "评估工具：数学评估、代码评估、latex2sympy",
+      "entry_points": ["evaluate.py", "math_eval.py", "code_eval.py"],
+      "key_files": ["grader.py", "latex2sympy/", "python_executor.py"],
+      "has_tests": false,
+      "has_config": false
+    },
+    {
+      "name": "docs",
+      "path": "docs",
+      "type": "documentation",
+      "description": "Jupyter Book文档源码",
+      "entry_points": ["intro.md", "tutorial/quickstart.md"],
+      "key_files": ["algorithms/", "customization/", "best_practices/", "lite/"],
+      "has_tests": false,
+      "has_config": true
+    },
+    {
+      "name": "recipe",
+      "path": "recipe",
+      "type": "recipe",
+      "description": "算法配方：AEnt（Advantage Entropy）",
+      "entry_points": ["AEnt/gsm8k_aent_grpo.py"],
+      "key_files": ["AEnt/actor.py", "AEnt/functional.py", "AEnt/aent_args.py"],
+      "has_tests": false,
+      "has_config": true
+    }
+  ],
+  "coverage": {
+    "total_files_estimated": 450,
+    "scanned_files": 450,
+    "coverage_percentage": 100,
+    "modules_documented": 15,
+    "modules_with_tests": 6,
+    "gaps": []
+  },
+  "ignored_patterns": [
+    ".git/**",
+    ".github/**",
+    "node_modules/**",
+    "dist/**",
+    "build/**",
+    "__pycache__/**",
+    "*.pyc",
+    "*.log",
+    "*.lock",
+    ".venv/**",
+    "venv/**",
+    "wandb/**",
+    "outputs/**",
+    "logs/**",
+    ".ruff_cache/**",
+    ".pytest_cache/**",
+    ".legacy/**",
+    ".data/**",
+    ".agent/**",
+    ".claude/sessions/**",
+    "slurm_outs/**",
+    "_data/**",
+    "trace_result/**",
+    "profile_result/**"
+  ],
+  "truncated": false,
+  "next_steps": []
+}
diff --git a/areal/api/CLAUDE.md b/areal/api/CLAUDE.md
@@ -0,0 +1,148 @@
+[根目录](../../CLAUDE.md) > **areal/api**
+
+# areal.api - API 与配置契约
+
+## 变更记录 (Changelog)
+
+### 2026-01-31 - 初始化
+
+- 模块文档创建
+- 识别 9 个核心文件
+
+---
+
+## 模块职责
+
+定义 AReaL 系统的核心 API 契约与配置数据类：
+
+- **配置数据类**：CLI 参数、训练/推理超参数、并行策略
+- **引擎契约**：`TrainEngine`、`InferenceEngine` 抽象基类
+- **工作流契约**：`RolloutWorkflow`、`AgentWorkflow` 抽象基类
+- **奖励契约**：`RewardFunction` 与异步包装器
+- **调度器契约**：`Scheduler` 抽象基类
+- **数据结构**：`ModelRequest`、`ModelResponse`、`ParamSpec` 等
+
+## 入口与启动
+
+无独立启动入口，作为其他模块的依赖被导入。
+
+## 对外接口
+
+### 核心抽象类
+
+| 类名                | 文件                 | 职责                                   |
+| ------------------- | -------------------- | -------------------------------------- |
+| `TrainEngine`       | `engine_api.py`      | 训练引擎抽象基类（FSDP/Megatron）      |
+| `InferenceEngine`   | `engine_api.py`      | 推理引擎抽象基类（SGLang/vLLM）        |
+| `RolloutWorkflow`   | `workflow_api.py`    | Rollout 工作流抽象基类                 |
+| `AgentWorkflow`     | `workflow_api.py`    | Agent 工作流抽象基类（OpenAI SDK 集成）|
+| `RewardFunction`    | `reward_api.py`      | 奖励函数抽象基类                       |
+| `Scheduler`         | `scheduler_api.py`   | 调度器抽象基类（Local/Ray/Slurm）      |
+
+### 配置数据类
+
+| 类名                          | 文件            | 职责                                   |
+| ----------------------------- | --------------- | -------------------------------------- |
+| `NormConfig`                  | `cli_args.py`   | 奖励/优势归一化配置                    |
+| `MicroBatchSpec`              | `cli_args.py`   | 微批次划分规格                         |
+| `GenerationHyperparameters`   | `cli_args.py`   | 生成超参数（温度、top_p、max_tokens）  |
+| `TrainingHyperparameters`     | `cli_args.py`   | 训练超参数（学习率、优化器、调度器）    |
+| `ParallelStrategy`            | `alloc_mode.py` | 并行策略（DP/TP/PP/CP/EP）             |
+
+### 数据结构
+
+| 类名                | 文件            | 职责                                   |
+| ------------------- | --------------- | -------------------------------------- |
+| `ModelRequest`      | `io_struct.py`  | 推理请求（input_ids、gconfig）         |
+| `ModelResponse`     | `io_struct.py`  | 推理响应（output_tokens、logprobs）    |
+| `ParamSpec`         | `io_struct.py`  | 参数规格（形状、dtype、设备）          |
+| `WeightUpdateMeta`  | `io_struct.py`  | 权重更新元数据                         |
+| `SaveLoadMeta`      | `io_struct.py`  | 检查点保存/加载元数据                  |
+
+## 关键依赖与配置
+
+### 外部依赖
+
+- `torch`：张量操作与分布式通信
+- `transformers`：Tokenizer 与模型配置
+- `omegaconf`、`hydra-core`：配置管理
+- `pydantic`：数据验证
+
+### 内部依赖
+
+- `areal.utils.logging`：日志工具
+- `areal.utils.name_resolve`：动态导入
+
+## 数据模型
+
+### CLI 参数结构
+
+```python
+@dataclass
+class TrainingHyperparameters:
+    # 优化器
+    optimizer: str = "adam"
+    lr: float = 1e-5
+    weight_decay: float = 0.0
+
+    # 学习率调度
+    lr_scheduler: str = "cosine"
+    warmup_steps: int = 0
+
+    # 训练配置
+    n_epochs: int = 1
+    gradient_accumulation_steps: int = 1
+    max_grad_norm: float = 1.0
+```
+
+### 并行策略
+
+```python
+@dataclass
+class ParallelStrategy:
+    data_parallel_size: int = 1
+    tensor_parallel_size: int = 1
+    pipeline_parallel_size: int = 1
+    context_parallel_size: int = 1
+    expert_parallel_size: int = 1
+```
+
+## 测试与质量
+
+- **测试覆盖**：部分配置类有单元测试（`areal/tests/test_adv_norm_config.py`、`test_allocation_mode.py`）
+- **质量工具**：Ruff（格式化与 lint）、pre-commit hooks
+
+## 常见问题 (FAQ)
+
+### Q: 如何添加新的配置字段？
+
+A: 在对应的 `@dataclass` 中添加字段，并提供默认值。注意向后兼容性。
+
+### Q: 如何实现自定义 Workflow？
+
+A: 继承 `RolloutWorkflow` 或 `AgentWorkflow`，实现 `arun_episode` 或 `run` 方法。参考 `areal/workflow/multi_turn.py`。
+
+### Q: 如何实现自定义 Engine？
+
+A: 继承 `TrainEngine` 或 `InferenceEngine`，实现所有抽象方法。参考 `areal/engine/fsdp_engine.py`。
+
+## 相关文件清单
+
+```
+areal/api/
+├── __init__.py
+├── alloc_mode.py          # 并行策略与分配模式
+├── cli_args.py            # CLI 参数与配置数据类（核心）
+├── engine_api.py          # 引擎抽象基类（核心）
+├── env_api.py             # 环境 API（实验性）
+├── io_struct.py           # 数据结构定义
+├── reward_api.py          # 奖励函数抽象基类
+├── scheduler_api.py       # 调度器抽象基类
+└── workflow_api.py        # 工作流抽象基类（核心）
+```
+
+## 下一步建议
+
+- 补充 `cli_args.py` 中各配置类的详细文档
+- 添加配置验证的单元测试
+- 完善 `env_api.py` 的实验性功能