PrimeIntellect-ai · willccbb · Feb 6, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 6, 2026
diff --git a/.codex/environments/environment.toml b/.codex/environments/environment.toml
@@ -0,0 +1,6 @@
+# THIS IS AUTOGENERATED. DO NOT EDIT MANUALLY
+version = 1
+name = "verifiers"
+
+[setup]
+script = "uv sync"
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ uv.lock
 .ropeproject/
 .scratch/
 .chroma_db/
+/.codex/environments/
 
 # artifacts
 core.* 

diff --git a/README.md b/README.md
@@ -90,6 +90,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
 ```bash
 prime env init my-env # creates a new template in ./environments/my_env
 ```
+For OpenEnv integration, use:
+```bash
+prime env init my-openenv --openenv
+```
+Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
+```bash
+uv run vf-build my-openenv
+```
 
 This will create a new module called `my_env` with a basic environment template.
 ```

diff --git a/docs/environments.md b/docs/environments.md
@@ -788,8 +788,9 @@ Supported third-party environment integrations include:
 - **`TextArenaEnv`** — wraps [TextArena](https://github.com/LeonGuertler/TextArena) text-based game environments
 - **`ReasoningGymEnv`** — wraps [reasoning-gym](https://github.com/open-thought/reasoning-gym) procedural datasets
 - **`BrowserEnv`** — unified browser automation via [Browserbase](https://browserbase.com) with DOM and CUA modes
+- **`OpenEnvEnv`** — wraps OpenEnv gym and MCP contracts using Prime Sandboxes with prebuilt images referenced from `.build.json`
 
-These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv).
+These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv, `uv add 'verifiers[openenv]'` for OpenEnvEnv). For OpenEnv environments, build the bundled project image with `uv run vf-build <env-id>` before evaluation or training.
 
 Newer and more experimental environment classes include:
 

diff --git a/docs/reference.md b/docs/reference.md
@@ -399,6 +399,25 @@ Sandboxed container execution using `prime` sandboxes.
 
 Persistent Python REPL in sandbox. Extends `SandboxEnv`.
 
+#### OpenEnvEnv
+
+```python
+class OpenEnvEnv(MultiTurnEnv):
+    def __init__(
+        self,
+        openenv_project: str | Path,
+        num_train_examples: int = 100,
+        num_eval_examples: int = 50,
+        seed: int = 0,
+        prompt_renderer: Callable[..., ChatMessages] | None = None,
+        max_turns: int = -1,
+        rubric: Rubric | None = None,
+        **kwargs,
+    ): ...
+```
+
+OpenEnv integration that runs OpenEnv projects in Prime Sandboxes using a prebuilt image manifest (`.build.json`), supports both gym and MCP contracts, and requires a `prompt_renderer` to convert observations into chat messages.
+
 #### EnvGroup
 
 ```python

diff --git a/environments/AGENTS.md b/environments/AGENTS.md
@@ -792,51 +792,13 @@ Supported third-party environment integrations include:
 - **`TextArenaEnv`** — wraps [TextArena](https://github.com/LeonGuertler/TextArena) text-based game environments
 - **`ReasoningGymEnv`** — wraps [reasoning-gym](https://github.com/open-thought/reasoning-gym) procedural datasets
 - **`BrowserEnv`** — unified browser automation via [Browserbase](https://browserbase.com) with DOM and CUA modes
+- **`OpenEnvEnv`** — wraps OpenEnv gym and MCP contracts using Prime Sandboxes with prebuilt images referenced from `.build.json`
 
-These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv).
-
-### BrowserEnv
-
-`BrowserEnv` provides browser automation with two modes:
-
-- **DOM mode** (`mode="dom"`): Natural language browser control via Stagehand SDK. Uses semantic operations like `act("click the login button")`, `observe("find form fields")`, and `extract("get the table data")`.
-
-- **CUA mode** (`mode="cua"`): Vision-based browser control using coordinate-based primitives. Uses low-level operations like `click(x, y)`, `type_text("hello")`, `scroll(0, 0, 0, 500)`, and `goto("https://example.com")`.
-
-**CUA mode with automatic sandbox deployment** (default, recommended):
-
-```python
-env = BrowserEnv(
-    mode="cua",
-    dataset=dataset,
-    rubric=rubric,
-)
-```
-
-When `use_sandbox=True` (the default), the CUA server is automatically deployed to a sandbox container. No manual server setup is required. The sandbox handles:
-- Server file upload and initialization
-- Server lifecycle management
-- Browser session isolation
-- Automatic cleanup on rollout completion
-
-**CUA mode with manual server** (for local development):
-
-```python
-# First start the server manually:
-# cd assets/templates/browserbase/cua && ./start.sh
-
-env = BrowserEnv(
-    mode="cua",
-    use_sandbox=False,
-    server_url="http://localhost:3000",
-    dataset=dataset,
-    rubric=rubric,
-)
-```
+These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv, `uv add 'verifiers[openenv]'` for OpenEnvEnv). For OpenEnv environments, build the bundled project image with `uv run vf-build <env-id>` before evaluation or training.
 
 Newer and more experimental environment classes include:
 
 - **`GymEnv`** — universal runner for Gym-compatible environments (OpenAI Gym / Gymnasium API)
 - **`CliAgentEnv`** — runs custom agent code inside sandboxes, intercepting API requests. Accepts sandbox configuration parameters including `docker_image`, `cpu_cores`, `memory_gb`, `disk_size_gb`, `gpu_count`, `timeout_minutes`, `environment_vars`, and `labels` for sandbox categorization
 - **`HarborEnv`** — loads Harbor-format agent benchmark tasks
-- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling.
+- **`RLMEnv`** — implements Recursive Language Models for unbounded context processing. Execution supports both local and sandbox backends via `execution_backend` (`"local"` default, `"sandbox"` to run the REPL inside a Prime Sandbox). Context is still filesystem-based: a provided `context_dir` is copied into the working directory, or legacy JSON-serializable `context` data is written to `context.json`/`context.txt`. The RLM scaffolding prompt (filesystem availability note, REPL workflow, tool docs) is injected into the first user message wrapped in `<RLM_SCAFFOLDING>...</RLM_SCAFFOLDING>`, preserving any external system prompt; the model-visible prompt is stored in `state["prompt"]`, while the original input prompt is preserved in `state["raw_prompt"]`. The REPL language is configurable via `repl_language` (default: `bash`); use `repl_language="python"` to retain the Python REPL. Bash mode uses `call_bash_repl` and behaves like a terminal; Python mode uses `call_python_repl`. Sub-LLM and root-tool interception for sandboxes is routed through a Prime Tunnel unless `interception_url` is provided. Tooling can be split via `tools` (shared), `root_tools` (REPL-only), and `sub_tools` (sub-LLM tools). Fixed root tools like `llm_batch` are always present and cannot be overridden. Tool ordering is fixed tools → shared tools → role-specific tools, with per-list deduplication by name. Root tools are callable only inside the REPL; sub-LLM tools use standard tool-calling.
diff --git a/environments/README.md b/environments/README.md
@@ -23,6 +23,8 @@ This folder contains installable example environments that showcase common usage
 - **doublecheck**: Simple follow-up turn ("Are you sure?") with math rewards; minimal `is_completed`/`env_response` implementation.
 - **sentence_repeater**: Multi-turn Q/A over a paragraph; rewards compare assistant messages to expected answers.
 - **wordle**: Game-style interaction via `TextArenaEnv`; multiple rewards (correctness, partial credit, few-turn bonus) and XML formatting.
+- **openenv_echo**: OpenEnv MCP integration example using upstream `echo_env`.
+- **openenv_textarena**: OpenEnv gym integration example using upstream `textarena_env` (default `Wordle-v0`).
 
 ### Tool use
 - **ToolEnv (native function-calling)**
@@ -69,6 +71,7 @@ This folder contains installable example environments that showcase common usage
 - **ToolEnv with real tools**: `wiki_search`, `math_python`
 - **Custom MultiTurnEnv**: `alphabet_sort`, `doublecheck`, `sentence_repeater`, `wordle`
 - **GymEnv integration**: `gem_wordle`
+- **OpenEnv integration (gym + MCP)**: `openenv_textarena`, `openenv_echo`
 - **CLI agent sandboxes**: `opencode_harbor`, `terminus_harbor`
 - **MCP integration**: `mcp_search_env`
 - **RLM (recursive LLM)**: `rlm_secrets`

diff --git a/environments/openenv_echo/README.md b/environments/openenv_echo/README.md
@@ -0,0 +1,87 @@
+# openenv-echo
+
+<a href="https://github.com/PrimeIntellect-ai/verifiers/tree/main/environments/openenv_echo">
+<img src="https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="Source Code">
+</a>
+
+### Overview
+
+- **Environment ID**: `openenv-echo`
+- **Short description**: OpenEnv Echo environment via `OpenEnvEnv`, demonstrating MCP tool-calling in Prime Sandboxes.
+- **Tags**: openenv, mcp, tools, example
+
+### Datasets
+
+- **Primary dataset(s)**: Seed-generated episodes (one seed per rollout).
+- **Source links**: Bundled OpenEnv Echo project in `proj/` (copied from OpenEnv).
+- **Split sizes**: 100 train / 50 eval by default (configurable).
+
+### Task
+
+- **Type**: Tool use, multi-turn.
+- **Parser**: Default `Parser` (no special formatting).
+- **Rubric overview**: `OpenEnvEpisodicSumRubric` sums per-step rewards; `MultiTurnMonitorRubric` tracks turn count.
+
+### Quickstart
+
+Build and register the bundled OpenEnv Docker image in the Prime registry:
+
+```bash
+uv run vf-build openenv-echo
+```
+
+This writes `environments/openenv_echo/proj/.build.json` with the fully qualified image reference and runtime metadata.
+
+Verify the image is ready (status **Ready** or **Completed**):
+
+```bash
+prime images list
+```
+
+Run an evaluation with default settings:
+
+```bash
+prime eval run openenv-echo
+```
+
+Configure model and sampling:
+
+```bash
+prime eval run openenv-echo \
+  -m gpt-4.1-mini \
+  -n 20 -r 3 -t 1024 -T 0.7
+```
+
+Notes:
+- If your environments directory is not `./environments`, run:
+`uv run vf-build openenv-echo -p /path/to/environments`
+- If you customize the bundled OpenEnv project, rerun `uv run vf-build openenv-echo` (the `proj/.build.json` manifest is updated).
+- `openenv_echo.py` defines `render_echo_prompt()` and passes it via `prompt_renderer`
+to keep the initial MCP prompt concise.
+
+### Troubleshooting
+
+If you see errors like `waiting to start: trying and failing to pull image`, it means the image is not available to the sandbox. Common causes:
+- The image build is still running or failed (`prime images list` should show **Ready** or **Completed**).
+- The image reference in `proj/.build.json` is stale or invalid.
+- The image is private or not accessible to your team.
+
+If `prime images list` shows **Ready** but the sandbox still cannot pull the image, escalate to the platform team with:
+- Image name/tag
+- Build status/output from `prime images list`
+- Sandbox ID and timestamp from the error log
+
+### Environment Arguments
+
+| Arg | Type | Default | Description |
+| --- | ---- | ------- | ----------- |
+| `num_train_examples` | int | `100` | Number of training seeds to generate. |
+| `num_eval_examples` | int | `50` | Number of eval seeds to generate. |
+| `seed` | int | `0` | Base seed for episode generation. |
+
+### Metrics
+
+| Metric | Meaning |
+| ------ | ------- |
+| `reward` | Sum of per-step rewards from the OpenEnv environment. |
+| `num_turns` | Number of turns taken in the rollout. |
diff --git a/environments/openenv_echo/openenv_echo.py b/environments/openenv_echo/openenv_echo.py
@@ -0,0 +1,58 @@
+from pathlib import Path
+from typing import Any, cast
+
+import verifiers as vf
+from verifiers.types import ChatMessages
+
+
+def render_echo_prompt(
+    observation: Any,
+    *,
+    action_schema: dict[str, Any] | None = None,
+    context: str = "reset",
+    **kwargs: Any,
+) -> ChatMessages:
+    del kwargs
+    if not isinstance(observation, dict):
+        raise RuntimeError(
+            f"openenv-echo prompt renderer expected dict observation, got {type(observation).__name__}."
+        )
+
+    messages = observation.get("messages")
+    if isinstance(messages, list) and messages:
+        return cast(ChatMessages, messages)
+
+    prompt = observation.get("prompt")
+    if isinstance(prompt, str) and prompt.strip():
+        return cast(ChatMessages, [{"role": "user", "content": prompt}])
+
+    if context == "reset" and isinstance(action_schema, dict):
+        return cast(
+            ChatMessages,
+            [
+                {
+                    "role": "user",
+                    "content": (
+                        "You are connected to an OpenEnv MCP environment. "
+                        "Call at least one tool before your final response. "
+                        "Action contract: call_tool(tool_name: str, arguments: object)."
+                    ),
+                }
+            ],
+        )
+
+    raise RuntimeError("openenv-echo observation did not include a renderable prompt.")
+
+
+def load_environment(
+    num_train_examples: int = 100,
+    num_eval_examples: int = 50,
+    seed: int = 0,
+):
+    return vf.OpenEnvEnv(
+        openenv_project=Path(__file__).parent / "proj",
+        num_train_examples=num_train_examples,
+        num_eval_examples=num_eval_examples,
+        seed=seed,
+        prompt_renderer=render_echo_prompt,
+    )
diff --git a/environments/openenv_echo/proj/.build.json b/environments/openenv_echo/proj/.build.json
@@ -0,0 +1,10 @@
+{
+  "app": "server.app:app",
+  "contract": "mcp",
+  "environment_id": "openenv-echo",
+  "image": "cmaeni8ji0001ql2z5gw8204f/openenv-echo:latest",
+  "image_status": "COMPLETED",
+  "port": 8000,
+  "schema_version": 1,
+  "start_command": "sh -lc \"cd /app/env && /app/.venv/bin/uvicorn server.app:app --host 0.0.0.0 --port 8000\""
+}