Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 133 additions & 24 deletions src/cli/update.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
*/

import { execFileSync } from "node:child_process";
import { existsSync, readFileSync, realpathSync } from "node:fs";
import { dirname, sep } from "node:path";
import { closeSync, existsSync, mkdirSync, openSync, readFileSync, realpathSync, unlinkSync, writeSync } from "node:fs";
import { homedir } from "node:os";
import { dirname, join, sep } from "node:path";
import { fileURLToPath } from "node:url";
import { getVersion } from "./version.js";
import { log, warn } from "./util.js";
Expand All @@ -30,6 +31,16 @@ import { isNewer } from "../utils/version-check.js";
const NPM_REGISTRY_URL = "https://registry.npmjs.org/@deeplake/hivemind/latest";
const PKG_NAME = "@deeplake/hivemind";

/**
* Default lock path: `~/.deeplake/hivemind-update.lock`. Matches the
* existing convention (`src/notifications/queue.ts`,
* `src/embeddings/protocol.ts`) of keeping per-user state under
* `~/.deeplake/`.
*/
export function defaultLockPath(): string {
return join(homedir(), ".deeplake", "hivemind-update.lock");
}

export type InstallKind =
| "npm-global" // npm install -g @deeplake/hivemind — owns its own prefix dir
| "npx" // ran via `npx @deeplake/hivemind` — cached in ~/.npm/_npx
Expand Down Expand Up @@ -147,12 +158,96 @@ export interface UpdateOptions {
currentVersionOverride?: string;
/** Inject the spawn impl (tests). Default: execSync with stdio inherit. */
spawn?: (cmd: string, args: string[]) => void;
/** Override the lockfile path (tests). Default: `~/.deeplake/hivemind-update.lock`. */
lockPathOverride?: string;
}

const defaultSpawn = (cmd: string, args: string[]): void => {
execFileSync(cmd, args, { stdio: "inherit" });
};

/**
* Non-blocking O_EXCL pidfile lock around `npm install -g @deeplake/hivemind`.
*
* Why this exists: `SessionStart` hooks dispatch `hivemind update` detached
* on every Claude Code session start (twice per session — from both
* `session-start.ts` and `session-start-setup.ts`, by design). Multiple
* sessions starting within the same second produced 2–N concurrent
* `npm install -g @deeplake/hivemind@latest` invocations, which race in
* npm's reify step: each one renames the existing install to the SAME
* deterministic backup path (`.hivemind-<hash>`), all but one fail with
* `ENOTEMPTY`, and the winner can still be SIGKILLed mid-extract — leaving
* a partially-populated install on disk (node_modules/ present but
* package.json / bundle/ missing → dangling bin symlink → `hivemind:
* command not found`). Observed in production on 2026-05-19 with three
* concurrent installs at 17:39:21 from cwd `~/al-projects/tests`.
*
* Semantics on contention: non-blocking. The autoupdate path is
* fire-and-forget — late arrivals must exit 0 silently, not queue up and
* eventually run a redundant install. The next session start will dispatch
* again anyway (the cache was intentionally removed; see
* src/hooks/shared/autoupdate.ts:37-54).
*
* Stale-lock reclaim: if the lockfile holds a PID that `process.kill(pid, 0)`
* reports gone (ESRCH / not-a-process), the previous holder crashed and we
* reclaim the lock atomically.
*
* Returns the open fd on success (caller must `releaseLock(fd, path)` on
* every exit path), or `null` if a live holder owns it.
*/
function tryAcquireLock(path: string): number | null {
mkdirSync(dirname(path), { recursive: true, mode: 0o700 });

const claim = (): number => {
const fd = openSync(path, "wx", 0o600);
writeSync(fd, String(process.pid));
return fd;
};

try {
return claim();
} catch (e: unknown) {
if ((e as NodeJS.ErrnoException).code !== "EEXIST") throw e;
}

// EEXIST: check if the holder is alive.
let holderPid = 0;
try {
holderPid = Number(readFileSync(path, "utf-8").trim()) || 0;
} catch {
// Lockfile vanished between EEXIST and read — another caller is mid-
// cleanup. Try once more to acquire; if that also fails, treat the
// current state as "someone else owns it" and bail.
try { return claim(); } catch { return null; }
}

if (holderPid > 0) {
try {
process.kill(holderPid, 0);
// Holder is alive — refuse to proceed.
log(`another hivemind update is already running (pid=${holderPid}); skipping.`);
return null;
} catch {
// Holder is gone — fall through to stale-reclaim.
}
}

// Stale lock: unlink + retry once. If retry races against another
// reclaim, give up — they own it now.
try { unlinkSync(path); } catch { /* best-effort */ }
try {
return claim();
} catch {
log(`another hivemind update is already running; skipping.`);
return null;
}
}

function releaseLock(fd: number, path: string): void {
try { closeSync(fd); } catch { /* best-effort */ }
try { unlinkSync(path); } catch { /* best-effort */ }
}

/**
* Run the update flow. Returns the exit code the CLI should use.
*
Expand Down Expand Up @@ -189,30 +284,44 @@ export async function runUpdate(opts: UpdateOptions = {}): Promise<number> {
log(`(dry-run) Would re-run: hivemind install --skip-auth`);
return 0;
}
log(`Upgrading via npm…`);
try {
spawn("npm", ["install", "-g", `${PKG_NAME}@latest`]);
} catch (e: any) {
warn(`npm install failed: ${e.message}`);
warn(`Try running it manually: npm install -g ${PKG_NAME}@latest`);
return 1;
}
log(``);
log(`Refreshing agent bundles…`);

// Serialize concurrent updaters. The autoupdate path can dispatch
// 2–N `hivemind update` processes within the same second (per-session
// double-fire × N concurrent sessions); without this lock they race
// on npm's reify step and corrupt the install. See `tryAcquireLock`
// for the full incident context.
const lockPath = opts.lockPathOverride ?? defaultLockPath();
const lockFd = tryAcquireLock(lockPath);
if (lockFd === null) return 0;

try {
// Re-exec the NEW binary to use new pkgRoot()/bundle paths. The
// user's $PATH is preserved through stdio: "inherit", so this
// resolves to the freshly-installed `hivemind` regardless of how
// npm laid it out.
spawn("hivemind", ["install", "--skip-auth"]);
} catch (e: any) {
warn(`Agent refresh failed: ${e.message}`);
warn(`Run manually: hivemind install`);
return 1;
log(`Upgrading via npm…`);
try {
spawn("npm", ["install", "-g", `${PKG_NAME}@latest`]);
} catch (e: any) {
warn(`npm install failed: ${e.message}`);
warn(`Try running it manually: npm install -g ${PKG_NAME}@latest`);
return 1;
}
log(``);
log(`Refreshing agent bundles…`);
try {
// Re-exec the NEW binary to use new pkgRoot()/bundle paths. The
// user's $PATH is preserved through stdio: "inherit", so this
// resolves to the freshly-installed `hivemind` regardless of how
// npm laid it out.
spawn("hivemind", ["install", "--skip-auth"]);
} catch (e: any) {
warn(`Agent refresh failed: ${e.message}`);
warn(`Run manually: hivemind install`);
return 1;
}
log(``);
log(`Updated to ${latest}.`);
return 0;
} finally {
releaseLock(lockFd, lockPath);
}
log(``);
log(`Updated to ${latest}.`);
return 0;
}

case "npx": {
Expand Down
142 changes: 141 additions & 1 deletion tests/cli/cli-update.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
import { existsSync, mkdtempSync, mkdirSync, openSync, writeFileSync, writeSync, closeSync, readFileSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";

Expand Down Expand Up @@ -469,3 +469,143 @@ describe("getLatestNpmVersion", () => {
fetchSpy.mockRestore();
});
});

/**
* Concurrency: regression guard for the "@deeplake/hivemind partial-install"
* incident — 3 concurrent `npm install -g @deeplake/hivemind@latest` racing
* on the same retired-backup path, ENOTEMPTY, partial install on disk, dead
* bin symlink, hivemind command-not-found.
*
* Root cause (verified 2026-05-19): SessionStart hooks fire `hivemind update`
* detached on every session start, twice per session (once from
* session-start.ts, once from session-start-setup.ts), and `runUpdate()` had
* no inter-process serialization despite the design comment in
* src/hooks/shared/autoupdate.ts:32-53 promising one. Multiple Claude Code
* sessions starting within the same second → multiple concurrent npm i -g
* runs → race.
*
* The fix: non-blocking O_EXCL pidfile lock around the npm-global branch of
* runUpdate(). Late-arriving callers exit 0 silently. Tests use
* `lockPathOverride` to point at a tmp lockfile per case (CLAUDE.md
* destructive-safety rule 2).
*/
describe("runUpdate — concurrency lock", () => {
let TMP = "";
let LOCK = "";
beforeEach(() => {
TMP = mkdtempSync(join(tmpdir(), "hivemind-update-lock-test-"));
LOCK = join(TMP, "hivemind-update.lock");
});
afterEach(() => {
rmSync(TMP, { recursive: true, force: true });
});

it("skips with exit 0 and does NOT spawn when an ALIVE holder already owns the lock", async () => {
// Pre-create the lock with the current test process's PID (definitely
// alive). The new runUpdate call must see it, refuse to proceed, and
// exit 0 without touching npm.
mkdirSync(TMP, { recursive: true });
const fd = openSync(LOCK, "wx", 0o600);
writeSync(fd, String(process.pid));
closeSync(fd);

const spawn = vi.fn();
const code = await runUpdate({
currentVersionOverride: "1.2.3",
latestVersionOverride: "1.3.0",
installKindOverride: { kind: "npm-global", installDir: "/x" },
lockPathOverride: LOCK,
spawn,
});
expect(code).toBe(0);
expect(spawn).not.toHaveBeenCalled();
expect(stdoutText() + stderrText()).toMatch(/another.*update.*in.*flight|already.*running/i);
// The other holder's lockfile must remain untouched — we don't own it.
expect(existsSync(LOCK)).toBe(true);
expect(readFileSync(LOCK, "utf-8")).toBe(String(process.pid));
});

it("reclaims a STALE lock (dead PID), runs the update, releases the lock", async () => {
// PID 0x7FFFFFFF is well above /proc/sys/kernel/pid_max on Linux
// (4194304 = 0x400000) and is reserved on macOS — guaranteed not alive.
mkdirSync(TMP, { recursive: true });
const deadPid = 0x7FFFFFFF;
writeFileSync(LOCK, String(deadPid), { mode: 0o600 });

const spawn = vi.fn();
const code = await runUpdate({
currentVersionOverride: "1.2.3",
latestVersionOverride: "1.3.0",
installKindOverride: { kind: "npm-global", installDir: "/x" },
lockPathOverride: LOCK,
spawn,
});
expect(code).toBe(0);
expect(spawn).toHaveBeenCalledTimes(2);
expect(spawn.mock.calls[0]).toEqual(["npm", ["install", "-g", "@deeplake/hivemind@latest"]]);
expect(spawn.mock.calls[1]).toEqual(["hivemind", ["install", "--skip-auth"]]);
// Released on success.
expect(existsSync(LOCK)).toBe(false);
});

it("releases the lock on the SUCCESS path", async () => {
const spawn = vi.fn();
const code = await runUpdate({
currentVersionOverride: "1.2.3",
latestVersionOverride: "1.3.0",
installKindOverride: { kind: "npm-global", installDir: "/x" },
lockPathOverride: LOCK,
spawn,
});
expect(code).toBe(0);
expect(existsSync(LOCK)).toBe(false);
});

it("releases the lock when `npm install` throws", async () => {
const spawn = vi.fn().mockImplementation((cmd: string) => {
if (cmd === "npm") throw new Error("ENOTEMPTY");
});
const code = await runUpdate({
currentVersionOverride: "1.2.3",
latestVersionOverride: "1.3.0",
installKindOverride: { kind: "npm-global", installDir: "/x" },
lockPathOverride: LOCK,
spawn,
});
expect(code).toBe(1);
expect(existsSync(LOCK)).toBe(false);
});

it("releases the lock when the agent refresh throws", async () => {
const spawn = vi.fn().mockImplementation((cmd: string) => {
if (cmd === "hivemind") throw new Error("missing platforms");
});
const code = await runUpdate({
currentVersionOverride: "1.2.3",
latestVersionOverride: "1.3.0",
installKindOverride: { kind: "npm-global", installDir: "/x" },
lockPathOverride: LOCK,
spawn,
});
expect(code).toBe(1);
expect(existsSync(LOCK)).toBe(false);
});

it("the lock is NOT acquired for non-upgrade exit paths (up-to-date, registry-fail, npx, local-dev, unknown, dry-run)", async () => {
// CLAUDE.md rule 8: assert the bad pattern is NOT present in the
// captured side effects. Acquiring the global lock for a no-op path
// would let a misbehaving caller block real updaters.
const spawn = vi.fn();
for (const opts of [
{ latestVersionOverride: "1.2.3", currentVersionOverride: "1.2.3" }, // up-to-date
{ latestVersionOverride: null, currentVersionOverride: "1.2.3" }, // registry-fail
{ latestVersionOverride: "1.3.0", currentVersionOverride: "1.2.3", installKindOverride: { kind: "npx" as const, installDir: "/x" } },
{ latestVersionOverride: "1.3.0", currentVersionOverride: "1.2.3", installKindOverride: { kind: "local-dev" as const, installDir: "/x" } },
{ latestVersionOverride: "1.3.0", currentVersionOverride: "1.2.3", installKindOverride: { kind: "unknown" as const, installDir: "/x" } },
{ latestVersionOverride: "1.3.0", currentVersionOverride: "1.2.3", installKindOverride: { kind: "npm-global" as const, installDir: "/x" }, dryRun: true },
]) {
await runUpdate({ ...opts, lockPathOverride: LOCK, spawn });
expect(existsSync(LOCK)).toBe(false);
}
});
});
Loading