PrimeIntellect-ai · snimu · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/docs/environments.md b/docs/environments.md
@@ -797,4 +797,4 @@ Newer and more experimental environment classes include:
 - **`GymEnv`** — universal runner for Gym-compatible environments (OpenAI Gym / Gymnasium API)
 - **`CliAgentEnv`** — runs custom agent code inside sandboxes, intercepting API requests. Accepts sandbox configuration parameters including `docker_image`, `cpu_cores`, `memory_gb`, `disk_size_gb`, `gpu_count`, `timeout_minutes`, `environment_vars`, and `labels` for sandbox categorization
 - **`HarborEnv`** — loads Harbor-format agent benchmark tasks
-- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling.
+- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling. When using the sandbox backend, the sandbox and worker are started eagerly during `setup_state`, and package installs are skipped when the package is already importable in the image.
diff --git a/environments/rlm_secrets/rlm_secrets.py b/environments/rlm_secrets/rlm_secrets.py
@@ -28,6 +28,7 @@
 import random
 import shutil
 import string
+import tempfile
 from pathlib import Path
 from typing import Any
 
@@ -284,20 +285,29 @@ async def setup_state(self, state: State) -> State:
         """Setup puzzle files in the filesystem."""
         # Extract puzzle from info and store directly in state for easy access
         info = state.get("info", {})
+        if not isinstance(info, dict):
+            info = {}
         puzzle = info.get("puzzle", {})
         state["puzzle"] = puzzle
 
-        # Let RLMEnv do its setup (creates fs_root, starts worker, etc.)
-        state = await super().setup_state(state)
-
-        # Write puzzle files to the filesystem
-        fs_root = state.get("rlm_fs_root")
-        if fs_root and puzzle:
+        temp_dir: str | None = None
+        if puzzle:
+            temp_dir = tempfile.mkdtemp(prefix="rlm_secrets_")
             for filename, content in zip(
                 puzzle.get("filenames", []), puzzle.get("contents", [])
             ):
-                filepath = Path(fs_root) / filename
+                filepath = Path(temp_dir) / filename
                 filepath.write_text(content, encoding="utf-8")
+            info = dict(info)
+            info["context_dir"] = temp_dir
+            state["info"] = info
+
+        try:
+            # Let RLMEnv do its setup (creates fs_root, starts worker, etc.)
+            state = await super().setup_state(state)
+        finally:
+            if temp_dir:
+                shutil.rmtree(temp_dir, True)
 
         return state
 

diff --git a/verifiers/envs/experimental/README.md b/verifiers/envs/experimental/README.md
@@ -22,6 +22,10 @@ Environment for running custom agent code inside sandboxes. Intercepts the agent
 
 Environment implementing [Recursive Language Models](https://alexzhang13.github.io/blog/2025/rlm/) (RLMs), an inference strategy where language models can decompose and recursively interact with input context of unbounded length through REPL environments. The root model interacts with a REPL (`repl_language="bash"` by default, or `repl_language="python"` for the Python REPL) and can spawn sub-LLM calls to process chunks of the context recursively. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run inside a Prime Sandbox). Extra context is still provided as a filesystem (either a copied `context_dir` or JSON-serializable `context` written to `context.json`/`context.txt`). The RLM scaffolding prompt is injected into the first user message; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. Sandbox interception for sub-LLM/root-tool calls is routed through a Prime Tunnel unless `interception_url` is provided.
 
+Notes:
+- When using the sandbox backend, the sandbox and worker are started eagerly during `setup_state`.
+- Package installation in sandboxes is best-effort: packages are only installed if they are not importable, which avoids unnecessary installs on images that already include them.
+
 Tool split:
 
 - `tools`: shared between root and sub-LLMs

diff --git a/verifiers/envs/experimental/rlm_env.py b/verifiers/envs/experimental/rlm_env.py
@@ -28,6 +28,7 @@
 import os
 import pickle
 import random
+import re
 import shutil
 import signal
 import shlex
@@ -2088,8 +2089,35 @@ async def _install_packages(self, session: SandboxRLMReplSession) -> None:
         packages.extend(extras)
         if not packages:
             return
-        pkg_list = " ".join(packages)
-        cmd = f"bash -lc 'pip install -q {pkg_list}'"
+        # Check each package with a quick import and only
+        # install the ones that are missing. This avoids failures when pip is
+        # unavailable on PATH but the package is already present in the image.
+        # For example, in mini-swe-agent-plus-rlm
+        missing: list[str] = []
+        for pkg in packages:
+            name = pkg.strip()
+            name = name.split("@", 1)[0].strip()
+            name = name.split("[", 1)[0].strip()
+            # Strip version constraints (e.g., "numpy>1.20,<2.0") at the first specifier.
+            name = re.split(r"[<>=!~]", name, 1)[0].strip()
+            module = name.replace("-", "_")
+            check_cmd = f"bash -lc 'python -c \"import {module}\"'"
+            try:
+                result = await self._execute_sandbox_command(
+                    sandbox_id,
+                    check_cmd,
+                    timeout=self.env.max_startup_wait_seconds,
+                )
+            except Exception:
+                missing.append(pkg)
+                continue
+            exit_code = getattr(result, "exit_code", 0)
+            if exit_code not in (0, None):
+                missing.append(pkg)
+        if not missing:
+            return
+        pkg_list = " ".join(missing)
+        cmd = f"bash -lc 'python -m pip install -q {pkg_list}'"
         result = await self._execute_sandbox_command(
             sandbox_id,
             cmd,
@@ -3617,82 +3645,102 @@ async def setup_state(self, state: State, **kwargs) -> State:
                 "include_sub_llm_in_trajectory=True. Use branched rollouts instead."
             )
 
-        # 1. Setup interception and register rollout
-        state = await self._setup_interception_and_register(state, rollout_id)
-
-        # 2. Create rollout directories
-        self._executor.create_rollout_dirs(state)
-
-        # 3. Build filesystem context
-        info = state.get("info") or {}
-        if not isinstance(info, dict):
-            info = {}
-        fs_root = state.get("rlm_fs_root")
-        if not fs_root:
-            raise ValueError("RLM filesystem root not initialized")
-        fs_has_data = False
-        fs_source: str | None = None
-
-        context_dir = info.get(self.context_dir_key)
-        if context_dir:
-            fs_source = str(context_dir)
-            self._copy_context_directory(fs_source, fs_root)
-            fs_has_data = True
-        else:
-            context_data = info.get(self.context_key, None)
-            if context_data is not None:
+        try:
+            # 1. Setup interception and register rollout
+            state = await self._setup_interception_and_register(state, rollout_id)
+
+            # 2. Create rollout directories
+            self._executor.create_rollout_dirs(state)
+
+            # 3. Build filesystem context
+            info = state.get("info") or {}
+            if not isinstance(info, dict):
+                info = {}
+            fs_root = state.get("rlm_fs_root")
+            if not fs_root:
+                raise ValueError("RLM filesystem root not initialized")
+            fs_has_data = False
+            fs_source: str | None = None
+
+            context_dir = info.get(self.context_dir_key)
+            if context_dir:
+                fs_source = str(context_dir)
+                self._copy_context_directory(fs_source, fs_root)
                 fs_has_data = True
-                self._write_builtin_context(context_data, fs_root)
-
-        state["rlm_fs_root"] = fs_root
-        state["rlm_fs_source"] = fs_source
-        state["rlm_fs_has_data"] = fs_has_data
-        state["retain_filesystem_after_rollout"] = self.retain_filesystem_after_rollout
-        if self.custom_system_prompt:
-            base_system_prompt = self.custom_system_prompt
-        elif self.repl_language == "bash":
-            base_system_prompt = _RLM_BASH_SYSTEM_PROMPT_STORE[
-                self.root_prompt_verbosity
+            else:
+                context_data = info.get(self.context_key, None)
+                if context_data is not None:
+                    fs_has_data = True
+                    self._write_builtin_context(context_data, fs_root)
+
+            state["rlm_fs_root"] = fs_root
+            state["rlm_fs_source"] = fs_source
+            state["rlm_fs_has_data"] = fs_has_data
+            state["retain_filesystem_after_rollout"] = (
+                self.retain_filesystem_after_rollout
+            )
+            if self.custom_system_prompt:
+                base_system_prompt = self.custom_system_prompt
+            elif self.repl_language == "bash":
+                base_system_prompt = _RLM_BASH_SYSTEM_PROMPT_STORE[
+                    self.root_prompt_verbosity
+                ]
+            else:
+                base_system_prompt = _RLM_PYTHON_SYSTEM_PROMPT_STORE[
+                    self.root_prompt_verbosity
+                ]
+
+            packages_docs = self._generate_packages_documentation()
+            root_tools_docs = self._generate_root_tools_documentation()
+            sub_tools_docs = self._generate_sub_tools_documentation()
+            state["rlm_system_prompt"] = (
+                base_system_prompt + packages_docs + root_tools_docs + sub_tools_docs
+            )
+            state["rlm_packages_docs"] = packages_docs
+            state["rlm_root_tools_docs"] = root_tools_docs
+            state["rlm_sub_tools_docs"] = sub_tools_docs
+            deduped_shared, _ = _dedupe_tools(
+                self.shared_tools, context="shared tools", reserved_names=set()
+            )
+            state["rlm_shared_tools"] = [
+                _tool_display_name(tool) for tool in deduped_shared
             ]
-        else:
-            base_system_prompt = _RLM_PYTHON_SYSTEM_PROMPT_STORE[
-                self.root_prompt_verbosity
+            state["rlm_root_tools"] = [
+                _tool_display_name(tool) for tool in self.root_tools
+            ]
+            state["rlm_sub_tools"] = [
+                _tool_display_name(tool) for tool in self.sub_tools
             ]
 
-        packages_docs = self._generate_packages_documentation()
-        root_tools_docs = self._generate_root_tools_documentation()
-        sub_tools_docs = self._generate_sub_tools_documentation()
-        state["rlm_system_prompt"] = (
-            base_system_prompt + packages_docs + root_tools_docs + sub_tools_docs
-        )
-        state["rlm_packages_docs"] = packages_docs
-        state["rlm_root_tools_docs"] = root_tools_docs
-        state["rlm_sub_tools_docs"] = sub_tools_docs
-        deduped_shared, _ = _dedupe_tools(
-            self.shared_tools, context="shared tools", reserved_names=set()
-        )
-        state["rlm_shared_tools"] = [
-            _tool_display_name(tool) for tool in deduped_shared
-        ]
-        state["rlm_root_tools"] = [_tool_display_name(tool) for tool in self.root_tools]
-        state["rlm_sub_tools"] = [_tool_display_name(tool) for tool in self.sub_tools]
-
-        # 4. Prepare backend and start worker (defer for sandbox to allow env setup)
-        if self.execution_backend != "sandbox":
+            # 4. Prepare backend and start worker (always eager)
+            await self._executor.prepare_filesystem(state)
             await self._executor.setup(state)
             state["rlm_worker_ready"] = True
-        else:
-            state["rlm_worker_ready"] = False
 
-        # Initialize context warning flag (feature enabled if max_seq_len is set)
-        state["context_warning_sent"] = False
+            # Initialize context warning flag (feature enabled if max_seq_len is set)
+            state["context_warning_sent"] = False
 
-        # Initialize FIFO sequence counter for detecting stale responses
-        state["_exec_seq"] = 0
+            # Initialize FIFO sequence counter for detecting stale responses
+            state["_exec_seq"] = 0
 
-        _ensure_rlm_metric_state(state)
+            _ensure_rlm_metric_state(state)
 
-        return state
+            return state
+        except Exception:
+            # Best-effort cleanup to avoid leaking tunnels/sandboxes on setup failure.
+            if rollout_id in self.active_rollouts:
+                del self.active_rollouts[rollout_id]
+            try:
+                await self._executor.cleanup(state)
+            except Exception:
+                logger.exception("Failed to cleanup RLM executor after setup error")
+            if not self.active_rollouts:
+                try:
+                    await self._teardown_interception_server()
+                finally:
+                    if self.execution_backend == "sandbox":
+                        await self._teardown_tunnel()
+            raise
 
     # =========================================================================
     # Code Execution