PrimeIntellect-ai · snimu · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/docs/environments.md b/docs/environments.md
@@ -797,4 +797,4 @@ Newer and more experimental environment classes include:
 - **`GymEnv`** — universal runner for Gym-compatible environments (OpenAI Gym / Gymnasium API)
 - **`CliAgentEnv`** — runs custom agent code inside sandboxes, intercepting API requests. Accepts sandbox configuration parameters including `docker_image`, `cpu_cores`, `memory_gb`, `disk_size_gb`, `gpu_count`, `timeout_minutes`, `environment_vars`, and `labels` for sandbox categorization
 - **`HarborEnv`** — loads Harbor-format agent benchmark tasks
-- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling. When using the sandbox backend, the sandbox and worker are started eagerly during `setup_state`, and package installs are skipped when the package is already importable in the image.
+- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling. When using the sandbox backend, the sandbox and worker are started eagerly during `setup_state`, and package installs are skipped when the package is already importable in the image. Environments can pre-set `state["rlm_fs_root_remote"]` (and optionally `state["rlm_control_dir_remote"]`) before calling `super().setup_state` to point the worker at an existing filesystem path in the sandbox. For further customization, override `get_sandbox_request`, `on_sandbox_ready`, or `customize_worker_script` on `RLMEnv`.
diff --git a/verifiers/envs/experimental/README.md b/verifiers/envs/experimental/README.md
@@ -24,6 +24,10 @@ Environment implementing [Recursive Language Models](https://alexzhang13.github.
 
 Notes:
 - When using the sandbox backend, the sandbox and worker are started eagerly during `setup_state`.
+  Environments can pre-set `state["rlm_fs_root_remote"]` (and optionally `state["rlm_control_dir_remote"]`)
+  before calling `super().setup_state` to point the worker at an existing filesystem path in the sandbox.
+  You can also override `get_sandbox_request`, `on_sandbox_ready`, and `customize_worker_script` on `RLMEnv`
+  to customize sandbox creation, run setup steps (e.g., repo initialization), or tweak the worker script.
 - Package installation in sandboxes is best-effort: packages are only installed if they are not importable, which avoids unnecessary installs on images that already include them.
 
 Tool split:

diff --git a/verifiers/envs/experimental/rlm_env.py b/verifiers/envs/experimental/rlm_env.py
@@ -1768,6 +1768,7 @@ async def _start_worker(self, state: State, session: LocalRLMReplSession) -> Non
         worker_script = _render_worker_script(
             session.paths, repl_language=self.env.repl_language
         )
+        worker_script = self.env.customize_worker_script(worker_script, state)
         Path(session.paths.worker_path).write_text(worker_script, encoding="utf-8")
 
         env_vars = os.environ.copy()
@@ -1866,7 +1867,22 @@ async def prepare_filesystem(self, state: State) -> None:
             session.sandbox_id = sandbox.id
             self._active_sandboxes.add(sandbox.id)
             state["sandbox_id"] = sandbox.id
+            state["sandbox_state"] = {
+                "ready": False,
+                "ready_wait_time": 0.0,
+                "command_execution_times": [],
+            }
+            start_wait = time.time()
             await self._wait_for_sandbox_ready(sandbox.id)
+            state["sandbox_state"]["ready"] = True
+            state["sandbox_state"]["ready_wait_time"] = time.time() - start_wait
+            try:
+                # Allow environments to run repo/tool setup before the worker starts.
+                await self.env.on_sandbox_ready(state, sandbox.id)
+            except Exception as exc:
+                raise vf.SandboxError(
+                    f"Sandbox setup hook failed: {exc}"
+                ) from exc
 
         if not session.sandbox_id:
             raise vf.SandboxError() from Exception("Sandbox not initialized")
@@ -2063,22 +2079,7 @@ def _get_session(self, state: State) -> SandboxRLMReplSession:
         return self._sessions[rollout_id]
 
     def _build_sandbox_request(self, state: State) -> CreateSandboxRequest:
-        env = self.env
-        env_vars = dict(env.sandbox_environment_vars or {})
-        return CreateSandboxRequest(
-            name=f"rlm-{state.get('rollout_id', 'unknown')}",
-            docker_image=env.sandbox_docker_image,
-            start_command=env.sandbox_start_command,
-            cpu_cores=env.sandbox_cpu_cores,
-            memory_gb=env.sandbox_memory_gb,
-            disk_size_gb=env.sandbox_disk_size_gb,
-            gpu_count=env.sandbox_gpu_count,
-            timeout_minutes=env.sandbox_timeout_minutes,
-            environment_vars=env_vars,
-            team_id=env.sandbox_team_id,
-            advanced_configs=env.sandbox_advanced_configs,
-            labels=env.sandbox_labels or [],
-        )
+        return self.env.get_sandbox_request(state)
 
     async def _install_packages(self, session: SandboxRLMReplSession) -> None:
         sandbox_id = session.sandbox_id
@@ -2146,6 +2147,7 @@ async def _write_sandbox_files(
             repl_language=self.env.repl_language,
             sandboxed=True,
         )
+        worker_script = self.env.customize_worker_script(worker_script, state)
         worker_path.write_text(worker_script, encoding="utf-8")
 
         await self._sandbox_client.upload_file(
@@ -2419,6 +2421,15 @@ class RLMEnv(vf.StatefulToolEnv):
     Works with any dataset that has a normal prompt. Input data can optionally
     be provided via info[context_dir_key] (directory path) or info[context_key]
     (legacy builtin data written to a file).
+    When using the sandbox backend, the sandbox and worker are started eagerly
+    during setup_state. Environments that need the worker to start in an existing
+    sandbox path can set state["rlm_fs_root_remote"] (and optionally
+    state["rlm_control_dir_remote"]) before calling super().setup_state; otherwise
+    the default remote paths are /tmp/rlm_<id>/rlm_fs and /tmp/rlm_<id>/rlm_control.
+    Environments can also override get_sandbox_request, on_sandbox_ready, and
+    customize_worker_script to adjust sandbox creation, perform post-create setup
+    (e.g., repo initialization), or tweak the worker script without forking the
+    executor implementation.
 
     Args:
         tools: List of tools shared by both the root REPL and sub-LLMs.
@@ -2668,6 +2679,45 @@ def __init__(
         else:
             self.add_tool(self.call_python_repl, args_to_skip=["state"])
 
+    def get_sandbox_request(self, state: State) -> CreateSandboxRequest:
+        """Return the sandbox request for this rollout.
+
+        Override this to customize the sandbox image or resources per-state.
+        This is invoked by the sandbox executor when a sandbox needs to be created.
+        """
+        env_vars = dict(self.sandbox_environment_vars or {})
+        return CreateSandboxRequest(
+            name=f"rlm-{state.get('rollout_id', 'unknown')}",
+            docker_image=self.sandbox_docker_image,
+            start_command=self.sandbox_start_command,
+            cpu_cores=self.sandbox_cpu_cores,
+            memory_gb=self.sandbox_memory_gb,
+            disk_size_gb=self.sandbox_disk_size_gb,
+            gpu_count=self.sandbox_gpu_count,
+            timeout_minutes=self.sandbox_timeout_minutes,
+            environment_vars=env_vars,
+            team_id=self.sandbox_team_id,
+            advanced_configs=self.sandbox_advanced_configs,
+            labels=self.sandbox_labels or [],
+        )
+
+    async def on_sandbox_ready(self, state: State, sandbox_id: str) -> None:
+        """Hook for environment-specific sandbox setup.
+
+        Override to perform repo initialization, tool upload, or other sandbox
+        preparation steps after the sandbox is created and ready but before the
+        worker starts. Defaults to a no-op.
+        """
+        return None
+
+    def customize_worker_script(self, script: str, state: State) -> str:
+        """Hook to adjust the generated worker script before it is written.
+
+        Override to inject additional imports or tweaks without replacing the
+        executor implementation. Defaults to returning the script unchanged.
+        """
+        return script
+
     # =========================================================================
     # Sub-Agent Tool Infrastructure
     # =========================================================================
@@ -3636,8 +3686,10 @@ async def setup_state(self, state: State, **kwargs) -> State:
         rollout_id = f"rlm_{uuid.uuid4().hex[:8]}"
         state["rollout_id"] = rollout_id
         if self.execution_backend == "sandbox":
-            state["rlm_fs_root_remote"] = f"/tmp/rlm_{rollout_id}/rlm_fs"
-            state["rlm_control_dir_remote"] = f"/tmp/rlm_{rollout_id}/rlm_control"
+            state.setdefault("rlm_fs_root_remote", f"/tmp/rlm_{rollout_id}/rlm_fs")
+            state.setdefault(
+                "rlm_control_dir_remote", f"/tmp/rlm_{rollout_id}/rlm_control"
+            )
 
         if self.include_sub_llm_in_trajectory and self.interleaved_rollouts:
             raise ValueError(