Aleph-Alpha · phvalguima · Apr 14, 2026 · Copilot · Apr 15, 2026 · Copilot
diff --git a/Dockerfile b/Dockerfile
@@ -37,4 +37,4 @@ COPY --chown=node --from=builder /home/node/app/node_modules ./node_modules
 # Use a non-root user for security
 EXPOSE 3000
 
-CMD ["node", "dist/index.js"]
+CMD ["node", "dist/cluster.js"]
diff --git a/eslint.config.js b/eslint.config.js
@@ -24,6 +24,8 @@ export default [
 				process: "readonly",
 				fetch: "readonly",
 				performance: "readonly",
+				setTimeout: "readonly",
+				NodeJS: "readonly",
 			},
 		},
 		plugins: {

diff --git a/package.json b/package.json
@@ -51,6 +51,7 @@
 		"prepublishOnly": "pnpm run build",
 		"prepare": "pnpm run build",
 		"start": "node dist/index.js",
+		"start:cluster": "node dist/cluster.js",
 		"example": "node examples/_run.js",
 		"demo:build": "cd demo && npm run build",
 		"demo:dev": "cd demo && npm run dev",

diff --git a/src/index.ts b/src/index.ts
@@ -12,24 +12,27 @@ import { createApp } from "./server.js";
 import { logger } from "./lib/logger.js";
 
 const app = createApp();
-const port = process.env.PORT || 3000;
+const port = parseInt(String(process.env.PORT || 3000), 10);
 const highWaterMark = parseInt(process.env.STREAM_HIGH_WATER_MARK || "65536", 10);
+const backlog = parseInt(process.env.TCP_BACKLOG || "5000", 10);
 
 // Start server with configurable highWaterMark for SSE streaming backpressure
-createServer({ highWaterMark }, app).listen(port, () => {
-	logger.info({ port, highWaterMark }, "Server started");
+// and configurable TCP backlog (SO_MAXCONN / listen queue depth)
+const server = createServer({ highWaterMark }, app);
+server.listen(port, "0.0.0.0", backlog, () => {
+	logger.info({ port, highWaterMark, backlog, pid: process.pid }, "Server started");
 	logger.info({ url: `http://localhost:${port}` }, "Server is running");
 });
 
 // Graceful shutdown logging
 process.on("SIGINT", () => {
-	logger.info("Server shutting down (SIGINT)");
-	process.exit(0);
+	logger.info({ pid: process.pid }, "Server shutting down (SIGINT)");
+	server.close(() => process.exit(0));
 });
 
 process.on("SIGTERM", () => {
-	logger.info("Server shutting down (SIGTERM)");
-	process.exit(0);
+	logger.info({ pid: process.pid }, "Server shutting down (SIGTERM)");
+	server.close(() => process.exit(0));
 });
 
 export default app;
diff --git a/src/routes/responses/handleOneTurn.ts b/src/routes/responses/handleOneTurn.ts
@@ -23,6 +23,16 @@ import { recordError, requiresApproval } from "./utils.js";
 import { closeLastOutputItem } from "./closeOutputItem.js";
 import { modelCallCounter, modelCallDuration } from "../../lib/metrics.js";
 
+// Shared undici Agent per worker process — avoids creating a new connection pool per request.
+// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
+const sharedDispatcher = new Agent({
+	allowH2: true,
+	connections: parseInt(process.env.UPSTREAM_MAX_CONNECTIONS || "128", 10),
+	pipelining: 1,
+	keepAliveTimeout: parseInt(process.env.UPSTREAM_KEEP_ALIVE_TIMEOUT_MS || "30000", 10),
+	connectTimeout: parseInt(process.env.UPSTREAM_CONNECT_TIMEOUT_MS || "30000", 10),
-// Shared undici Agent per worker process — avoids creating a new connection pool per request.
-// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
-const sharedDispatcher = new Agent({
-	allowH2: true,
-	connections: parseInt(process.env.UPSTREAM_MAX_CONNECTIONS || "128", 10),
-	pipelining: 1,
-	keepAliveTimeout: parseInt(process.env.UPSTREAM_KEEP_ALIVE_TIMEOUT_MS || "30000", 10),
-	connectTimeout: parseInt(process.env.UPSTREAM_CONNECT_TIMEOUT_MS || "30000", 10),
+function getFiniteIntegerEnv(name: string, fallback: number): number {
+	const rawValue = process.env[name];
+	if (rawValue === undefined) {
+		return fallback;
+	}
+
+	const parsedValue = Number(rawValue);
+	return Number.isFinite(parsedValue) && Number.isInteger(parsedValue) ? parsedValue : fallback;
+}
+
+// Shared undici Agent per worker process — avoids creating a new connection pool per request.
+// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
+const sharedDispatcher = new Agent({
+	allowH2: true,
+	connections: getFiniteIntegerEnv("UPSTREAM_MAX_CONNECTIONS", 128),
+	pipelining: 1,
+	keepAliveTimeout: getFiniteIntegerEnv("UPSTREAM_KEEP_ALIVE_TIMEOUT_MS", 30000),
+	connectTimeout: getFiniteIntegerEnv("UPSTREAM_CONNECT_TIMEOUT_MS", 30000),
-// Shared undici Agent per worker process — avoids creating a new connection pool per request.
-// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
-const sharedDispatcher = new Agent({
-	allowH2: true,
-	connections: parseInt(process.env.UPSTREAM_MAX_CONNECTIONS || "128", 10),
-	pipelining: 1,
-	keepAliveTimeout: parseInt(process.env.UPSTREAM_KEEP_ALIVE_TIMEOUT_MS || "30000", 10),
-	connectTimeout: parseInt(process.env.UPSTREAM_CONNECT_TIMEOUT_MS || "30000", 10),
+function getFiniteIntegerEnv(name: string, fallback: number): number {
+	const rawValue = process.env[name];
+	if (rawValue === undefined) {
+		return fallback;
+	}
+
+	const parsedValue = Number(rawValue);
+	return Number.isFinite(parsedValue) && Number.isInteger(parsedValue) ? parsedValue : fallback;
+}
+
+// Shared undici Agent per worker process — avoids creating a new connection pool per request.
+// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
+const sharedDispatcher = new Agent({
+	allowH2: true,
+	connections: getFiniteIntegerEnv("UPSTREAM_MAX_CONNECTIONS", 128),
+	pipelining: 1,
+	keepAliveTimeout: getFiniteIntegerEnv("UPSTREAM_KEEP_ALIVE_TIMEOUT_MS", 30000),
+	connectTimeout: getFiniteIntegerEnv("UPSTREAM_CONNECT_TIMEOUT_MS", 30000),
+});
+
 /*
  * Call LLM and stream the response.
  */
@@ -54,7 +64,7 @@ export async function* handleOneTurnStream(
 		apiKey: apiKey,
 		defaultHeaders,
 		fetchOptions: {
-			dispatcher: new Agent({ allowH2: true }),
+			dispatcher: sharedDispatcher,
 		},
 	});
 	const modelCallStart = performance.now();