Agenta-AI · junaway · Jul 2, 2026
diff --git a/docs/design/agent-workflows/projects/add-claude-e2b/research.md b/docs/design/agent-workflows/projects/add-claude-e2b/research.md
@@ -0,0 +1,95 @@
+# Claude on E2B — investigation
+
+## Goal
+
+Make `harness="claude"` + `sandbox="e2b"` a working combination. This worktree builds on the
+`add-sandbox-e2b` base (Pi-on-E2B already works). The Claude harness is the second matrix
+entry; Codex/opencode on E2B follow the same pattern.
+
+## What exists on the base branch
+
+### E2B provider
+
+`services/agent/src/engines/sandbox_agent/provider.ts` — `buildE2bCreate` + `buildSandboxProvider`
+E2B arm. Template defaults to `E2B_TEMPLATE` env or `"agenta-sandbox-agent"`. The daemon
+(`sandbox-agent`) auto-installs Claude at `createSession` time via `install-agent claude` —
+this is NOT baked into the template (Pi is baked; Claude is runtime-installed by the daemon).
+
+### Claude harness
+
+`sdks/python/agenta/sdk/agents/adapters/harnesses.py` — `ClaudeHarness` maps to `acpAgent="claude"`.
+
+`sdks/python/agenta/sdk/agents/dtos.py` — `ClaudeAgentTemplate.wire_harness_files()` calls
+`build_claude_settings_files` and returns `{"harnessFiles": [{"path": ".claude/settings.json",
+"content": "..."}]}`. When permissions are empty, returns `{}` (no harnessFiles).
+
+`sdks/python/agenta/sdk/agents/adapters/claude_settings.py` — `build_claude_settings_files`
+merges author permissions, sandbox-derived deny rules, and MCP/tool permissions into a single
+`.claude/settings.json` payload.
+
+`services/agent/src/engines/sandbox_agent.ts` — `applyClaudeConnectionEnv` sets `ENABLE_TOOL_SEARCH=false`,
+Bedrock/Vertex env, `ANTHROPIC_BASE_URL`, `ANTHROPIC_MODEL`. This env is set in the local daemon
+env dict; on E2B it reaches the daemon through the `buildE2bCreate` envs (plan secrets include
+`ANTHROPIC_API_KEY`; `applyClaudeConnectionEnv` additional vars are merged into `env` before
+`buildSandboxProvider` is called).
+
+### `prepareE2bPiAssets` (Pi-only)
+
+`services/agent/src/engines/sandbox_agent/e2b.ts` — guards on `plan.isPi` and returns early for
+Claude. Pi-specific: uploads `auth.json` (OAuth fallback), `agenta.js` extension, skills,
+system-prompt files.
+
+Claude needs none of those Pi-specific assets. The daemon handles Claude install. What Claude
+does need provisioned into the E2B sandbox:
+
+- `harnessFiles` (`.claude/settings.json` etc.) written to `plan.cwd` inside the sandbox
+- skills under `<cwd>/.claude/skills/<name>/` (project-local tree, same as Daytona)
+- `ANTHROPIC_API_KEY` (or own-login credentials) in the daemon env
+
+### `prepareWorkspace` (Daytona + local, no E2B arm)
+
+`services/agent/src/engines/sandbox_agent/workspace.ts` — handles `isDaytona` (remote fs API)
+or falls through to local. For E2B Claude, the cwd lives in the E2B sandbox (`/root/work/agenta-<hex>`),
+not on the runner host. The local branch writes to the runner's filesystem — incorrect for E2B.
+
+The Daytona arm uses `sandbox.mkdirFs` + `sandbox.writeFsFile`; the E2B provider exposes the
+same API (same `sandbox-agent` package, same SandboxHandle interface).
+
+## The two gaps
+
+| Gap | Fix |
+|---|---|
+| `prepareWorkspace` falls through to local for E2B | Extend `PrepareWorkspaceInput` plan type with `isE2b`; add `isDaytona \|\| isE2b` arm that uses the sandbox fs API. The Daytona and E2B arms are identical in shape (both use `sandbox.mkdirFs` / `sandbox.writeFsFile`). |
+| `prepareE2bPiAssets` returns early for Claude | Add `prepareE2bClaudeAssets` that uploads the Claude own-login from `~/.claude/` if `credentialMode === "runtime_provided"` (same gate as Pi's auth.json path). Wire it in `sandbox_agent.ts` next to the Pi call. |
+
+## Credential modes
+
+- `credentialMode="env"`: `ANTHROPIC_API_KEY` arrives in `plan.secrets`, merged into `env` before
+  `buildSandboxProvider`, and carried into the sandbox through `buildE2bCreate({}, secrets).envs`.
+  No file upload needed.
+- `credentialMode="runtime_provided"`: the user's own Claude login (`~/.claude/` OAuth state).
+  Upload `.claude/` credentials dir into the E2B sandbox — mirrors Pi's `uploadPiAuthToE2bSandbox`.
+  Best-effort (same policy as Pi auth upload).
+- `credentialMode="none"` / missing: no credential action.
+
+## Teardown / leak parity
+
+The per-run `finally` in `sandbox_agent.ts` calls `sandbox.destroySandbox()` on every path
+(normal, error, signal). `buildE2bCreate` sets `autoPause: true` and `timeoutMs` (default 30 min)
+as a backstop for process-KILL leaks. This is identical for Claude-on-E2B — no new teardown code.
+
+## Restricted-network refusal
+
+`buildRunPlan` already refuses any restricted-network E2B run before the harness is checked.
+Claude-on-E2B inherits this gate unchanged; no new code needed.
+
+## Foundation seam
+
+A parallel worktree is generalizing non-Pi remote bootstrap. This branch implements the Claude
+arm directly (clone + specialize), noting where the code would fold:
+
+- `prepareWorkspace` E2B arm → folds onto a single `isDaytona || isE2b` branch (already done here).
+- `prepareE2bClaudeAssets` own-login upload → folds onto a generic `prepareE2bHarnessAssets`
+  that dispatches by `acpAgent`. The Pi arm stays separate (pi-specific: extension, skills-in-pi-dir,
+  system-prompt).
+- `buildE2bCreate` envs param already carries arbitrary secrets → no change needed.
diff --git a/docs/design/agent-workflows/projects/add-claude-e2b/specs.md b/docs/design/agent-workflows/projects/add-claude-e2b/specs.md
@@ -0,0 +1,107 @@
+# Claude on E2B — specs
+
+## Scope
+
+Two TypeScript changes + Dockerfile comment + tests. Python is unchanged (the Python harness
+adapter already renders `harnessFiles` generically; it has no knowledge of sandbox provider).
+
+## workspace.ts — add E2B arm
+
+`PrepareWorkspaceInput.plan` gains `isE2b: boolean`. The remote branch becomes `isDaytona || isE2b`:
+both providers expose the same sandbox fs API (`mkdirFs`, `writeFsFile`). The cleanup returned
+for E2B is `async () => {}` (same as Daytona — sandbox teardown handles the remote cwd).
+
+The local arm is unchanged.
+
+```
+if (plan.isDaytona || plan.isE2b) {
+  // use sandbox.mkdirFs / sandbox.writeFsFile
+  return { cleanup: async () => {} };
+}
+// local arm unchanged
+```
+
+No new public exports.
+
+## e2b.ts — add prepareE2bClaudeAssets
+
+New export:
+
+```typescript
+export interface PrepareE2bClaudeAssetsInput {
+  sandbox: any;
+  plan: Pick<RunPlan, "isClaude" | "credentialMode">;
+  log?: Log;
+}
+
+export async function prepareE2bClaudeAssets({
+  sandbox,
+  plan,
+  log = () => {},
+}: PrepareE2bClaudeAssetsInput): Promise<void>
+```
+
+Guards on `plan.isClaude`. When `shouldUploadOwnLogin(plan)` is true (i.e. `credentialMode ===
+"runtime_provided"` or back-compat no-key heuristic), uploads `~/.claude/` state files into the
+E2B sandbox at `/root/.claude/`. Best-effort (log on failure, do not throw). When `credentialMode
+=== "env"` the key arrives via `buildE2bCreate` envs — no file upload.
+
+`RunPlan` gains `isClaude: boolean` (parallel to `isPi`).
+
+## run-plan.ts — add isClaude
+
+```typescript
+const isClaude = acpAgent === "claude";
+```
+
+Carried on the plan. The existing `isPi` assertion (`isPi === (acpAgent === "pi")`) already covers
+the negative case; add a parallel `assert` for Claude.
+
+## sandbox_agent.ts — wire prepareE2bClaudeAssets
+
+In the E2B asset-prep block (currently only `prepareE2bPiAssets`):
+
+```typescript
+} else if (plan.isE2b) {
+  await (deps.prepareE2bPiAssets ?? prepareE2bPiAssets)({ sandbox, plan, log: logger });
+  await (deps.prepareE2bClaudeAssets ?? prepareE2bClaudeAssets)({ sandbox, plan, log: logger });
+}
+```
+
+`SandboxAgentDeps` gains `prepareE2bClaudeAssets?: typeof prepareE2bClaudeAssets`.
+
+## Dockerfile / README
+
+`sandbox-images/e2b/e2b.Dockerfile` — no new layer (daemon already installs Claude via
+`install-agent claude` at `createSession`). Update the header comment to mention Claude.
+
+`sandbox-images/e2b/README.md` — update "What is baked in" to clarify Claude is runtime-installed
+by the daemon, not baked, and that Claude-on-E2B is now supported.
+
+## Credential flow summary
+
+```
+credentialMode="env":
+  ANTHROPIC_API_KEY in plan.secrets
+  → merged into env by sandbox_agent.ts
+  → carried into sandbox by buildE2bCreate({}, secrets).envs
+  (no file upload)
+
+credentialMode="runtime_provided":
+  prepareE2bClaudeAssets uploads ~/.claude/ into /root/.claude/ in the sandbox
+  (best-effort; same pattern as Pi auth.json upload)
+```
+
+## Security invariants
+
+- Managed key never written to the sandbox filesystem (env-only, same as Pi-on-E2B).
+- Own-login upload is gated by `shouldUploadOwnLogin` (same function as Pi), so it never fires
+  when a resolved key is present.
+- Restricted-network refusal already in `buildRunPlan` — unchanged.
+- `autoPause: true` + `timeoutMs` backstop already in `buildE2bCreate` — unchanged.
+
+## Foundation seam
+
+When the non-Pi remote-bootstrap generalization lands, the `prepareE2bClaudeAssets` function
+folds into a generic `prepareE2bHarnessAssets(plan)` dispatcher. The `isDaytona || isE2b`
+workspace arm is already the generalized form.
diff --git a/docs/design/agent-workflows/projects/add-claude-e2b/tasks.md b/docs/design/agent-workflows/projects/add-claude-e2b/tasks.md
@@ -0,0 +1,42 @@
+# Claude on E2B — tasks
+
+## Done
+
+- [x] Research: understand Pi-on-E2B shape, Claude harness provisioning, and the two gaps.
+
+## Implementation
+
+- [x] `run-plan.ts`: add `isClaude: boolean` to `RunPlan`; derive from `acpAgent === "claude"`.
+- [x] `workspace.ts`: extend `PrepareWorkspaceInput.plan` with `isE2b`; change `if (plan.isDaytona)`
+      to `if (plan.isDaytona || plan.isE2b)` so E2B uses the sandbox fs API.
+- [x] `e2b.ts`: add `prepareE2bClaudeAssets` — uploads `~/.claude/` on `runtime_provided` credential
+      mode; export `PrepareE2bClaudeAssetsInput`.
+- [x] `sandbox_agent.ts`: import and call `prepareE2bClaudeAssets` in the E2B block; add it to
+      `SandboxAgentDeps`.
+- [x] `sandbox-images/e2b/e2b.Dockerfile`: update header comment (Claude is runtime-installed).
+- [x] `sandbox-images/e2b/README.md`: update scope section; Claude-on-E2B now supported.
+
+## Tests (unit)
+
+- [x] `tests/unit/sandbox-agent-workspace.test.ts`: E2B arm — harnessFiles written via sandbox fs
+      API; skills uploaded; no .claude path touched on Pi-on-E2B.
+- [x] `tests/unit/sandbox-agent-e2b-assets.test.ts`: `prepareE2bClaudeAssets` — own-login upload
+      on `runtime_provided`; skipped on `env`; skipped on `none`; skipped when `isClaude=false`;
+      best-effort (sandbox error logged, not thrown).
+- [x] `tests/unit/sandbox-agent-e2b-run-plan.test.ts`: `isClaude` flag — claude harness sets
+      `isClaude=true`, pi sets `isClaude=false`.
+- [x] `tests/unit/sandbox-agent-orchestration.test.ts`: Claude-on-E2B flow — uses sandbox fs API
+      for workspace; `prepareE2bClaudeAssets` injected and called; teardown fires.
+
+## Tests (integration — requires live E2B account)
+
+These are marked `@skip` / described with a comment — they verify the end-to-end:
+
+- Claude-on-E2B returns a non-empty output and a trace ID.
+- E2B sandbox is torn down after the run (no leaked sandbox ID).
+
+## Not in scope
+
+- Codex/opencode on E2B (later matrix entries).
+- Baking Claude Code into the E2B template (daemon installs it at createSession via `install-agent claude`).
+- Any Python changes (harnessFiles are already emitted generically by `ClaudeAgentTemplate.wire_harness_files`).
diff --git a/services/runner/src/engines/sandbox_agent.ts b/services/runner/src/engines/sandbox_agent.ts
@@ -57,7 +57,10 @@ import {
   createCookieFetch,
   prepareDaytonaPiAssets,
 } from "./sandbox_agent/daytona.ts";
-import { prepareE2BPiAssets } from "./sandbox_agent/e2b.ts";
+import {
+  prepareE2BPiAssets,
+  prepareE2BClaudeAssets,
+} from "./sandbox_agent/e2b.ts";
 import {
   extendE2BSandboxTimeout,
   startE2BKeepalive,
@@ -230,6 +233,7 @@ export interface SandboxAgentDeps extends BuildRunPlanDeps {
   discoverTunnelEndpoint?: typeof discoverTunnelEndpoint;
   responderFactory?: (permissionPolicy: string | undefined) => Responder;
   prepareE2BPiAssets?: typeof prepareE2BPiAssets;
+  prepareE2BClaudeAssets?: typeof prepareE2BClaudeAssets;
   startE2BKeepalive?: typeof startE2BKeepalive;
   extendE2BSandboxTimeout?: typeof extendE2BSandboxTimeout;
   log?: Log;
@@ -494,6 +498,11 @@ export async function runSandboxAgent(
         plan,
         log: logger,
       });
+      await (deps.prepareE2BClaudeAssets ?? prepareE2BClaudeAssets)({
+        sandbox,
+        plan,
+        log: logger,
+      });
     }
 
     // Start the E2B idle-refresh keepalive as soon as the sandbox ID is known (D3): from this

diff --git a/services/runner/src/engines/sandbox_agent/e2b.ts b/services/runner/src/engines/sandbox_agent/e2b.ts
@@ -100,6 +100,75 @@ export async function uploadPiAuthToE2BSandbox(
   }
 }
 
+/** In-sandbox Claude config dir (daemon runs as root in the E2B template). */
+export const E2B_CLAUDE_DIR =
+  process.env.AGENTA_AGENT_SANDBOX_CLAUDE_DIR ?? "/root/.claude";
+
+/**
+ * Explicit allow-list of `~/.claude` files needed for the own-login (`runtime_provided`)
+ * path. Only `.credentials.json` — the OAuth/subscription login store (see
+ * docs/design/agent-workflows/projects/subscription-sidecar/README.md) — is required; Claude
+ * Code reads it via `$HOME` / `CLAUDE_CONFIG_DIR`. `settings.json` is deliberately excluded:
+ * the run's own rendered `.claude/settings.json` is already written into the sandbox from
+ * `harnessFiles` by `prepareWorkspace`, and that rendered copy must win over the host user's
+ * settings. Uploading the whole directory (previous behavior) over-shared `.mcp.json` (other
+ * services' MCP tokens), `settings.json` (env secrets/hooks), `history.jsonl`, and caches.
+ */
+const CLAUDE_OWN_LOGIN_ALLOWLIST = [".credentials.json"] as const;
+
+/**
+ * Upload the Claude own-login credentials from the host into an E2B sandbox. Best-effort per
+ * file: an allow-listed file that is absent or unreadable is logged and skipped, it never
+ * aborts the others. Only called when `credentialMode === "runtime_provided"` (own-login path).
+ */
+export async function uploadClaudeAuthToE2BSandbox(
+  sandbox: any,
+  log: Log = () => {},
+): Promise<void> {
+  const localDir = process.env.CLAUDE_CONFIG_DIR || join(process.env.HOME ?? "", ".claude");
+  if (!existsSync(localDir)) return;
+  try {
+    await sandbox.mkdirFs({ path: E2B_CLAUDE_DIR });
+  } catch (err) {
+    log(`claude auth upload skipped: ${(err as Error).message}`);
+    return;
+  }
+  for (const name of CLAUDE_OWN_LOGIN_ALLOWLIST) {
+    const filePath = join(localDir, name);
+    if (!existsSync(filePath)) {
+      log(`claude auth upload: ${name} not found in ${localDir}, skipping`);
+      continue;
+    }
+    try {
+      const content = readFileSync(filePath, "utf-8");
+      await sandbox.writeFsFile({ path: `${E2B_CLAUDE_DIR}/${name}` }, content);
+      log(`claude auth upload: uploaded ${name}`);
+    } catch (err) {
+      log(`claude auth upload failed for ${name}: ${(err as Error).message}`);
+    }
+  }
+}
+
+export interface PrepareE2BClaudeAssetsInput {
+  sandbox: any;
+  plan: Pick<RunPlan, "isClaude" | "credentialMode" | "hasApiKey">;
+  log?: Log;
+}
+
+/**
+ * Push the Claude own-login credentials into an E2B sandbox when running under
+ * `runtime_provided` credential mode. Managed-key runs need no file upload: the key arrives
+ * via `buildE2BCreate` envs. Pi assets are handled separately by `prepareE2BPiAssets`.
+ */
+export async function prepareE2BClaudeAssets({
+  sandbox,
+  plan,
+  log = () => {},
+}: PrepareE2BClaudeAssetsInput): Promise<void> {
+  if (!plan.isClaude) return;
+  if (shouldUploadOwnLogin(plan)) await uploadClaudeAuthToE2BSandbox(sandbox, log);
+}
+
 export interface PrepareE2BPiAssetsInput {
   sandbox: any;
   plan: Pick<

diff --git a/services/runner/src/engines/sandbox_agent/run-plan.ts b/services/runner/src/engines/sandbox_agent/run-plan.ts
@@ -52,6 +52,7 @@ export interface RunPlan {
   acpAgent: string;
   sandboxId: string;
   isPi: boolean;
+  isClaude: boolean;
   isDaytona: boolean;
   isE2B: boolean;
   /** True for any remote sandbox (`isDaytona || isE2B`); use for remoteness-only checks. */
@@ -201,6 +202,7 @@ export function buildRunPlan(
   }
 
   const isPi = acpAgent === "pi";
+  const isClaude = acpAgent === "claude";
   const isDaytona = sandboxId === "daytona";
   const isE2B = sandboxId === "e2b";
   const isRemoteSandbox = isDaytona || isE2B;
@@ -339,6 +341,7 @@ export function buildRunPlan(
       acpAgent,
       sandboxId,
       isPi,
+      isClaude,
       isDaytona,
       isE2B,
       isRemoteSandbox,