Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ COPY --chown=node --from=builder /home/node/app/node_modules ./node_modules
# Use a non-root user for security
EXPOSE 3000

CMD ["node", "dist/index.js"]
CMD ["node", "dist/cluster.js"]
Comment thread
phvalguima marked this conversation as resolved.
2 changes: 2 additions & 0 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export default [
process: "readonly",
fetch: "readonly",
performance: "readonly",
setTimeout: "readonly",
NodeJS: "readonly",
},
},
plugins: {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"prepublishOnly": "pnpm run build",
"prepare": "pnpm run build",
"start": "node dist/index.js",
"start:cluster": "node dist/cluster.js",
Comment thread
phvalguima marked this conversation as resolved.
"example": "node examples/_run.js",
"demo:build": "cd demo && npm run build",
"demo:dev": "cd demo && npm run dev",
Expand Down
17 changes: 10 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,27 @@ import { createApp } from "./server.js";
import { logger } from "./lib/logger.js";

const app = createApp();
const port = process.env.PORT || 3000;
const port = parseInt(String(process.env.PORT || 3000), 10);
const highWaterMark = parseInt(process.env.STREAM_HIGH_WATER_MARK || "65536", 10);
const backlog = parseInt(process.env.TCP_BACKLOG || "5000", 10);

// Start server with configurable highWaterMark for SSE streaming backpressure
createServer({ highWaterMark }, app).listen(port, () => {
logger.info({ port, highWaterMark }, "Server started");
// and configurable TCP backlog (SO_MAXCONN / listen queue depth)
const server = createServer({ highWaterMark }, app);
server.listen(port, "0.0.0.0", backlog, () => {
logger.info({ port, highWaterMark, backlog, pid: process.pid }, "Server started");
Comment thread
phvalguima marked this conversation as resolved.
Comment thread
phvalguima marked this conversation as resolved.
logger.info({ url: `http://localhost:${port}` }, "Server is running");
});

// Graceful shutdown logging
process.on("SIGINT", () => {
logger.info("Server shutting down (SIGINT)");
process.exit(0);
logger.info({ pid: process.pid }, "Server shutting down (SIGINT)");
server.close(() => process.exit(0));
});

process.on("SIGTERM", () => {
logger.info("Server shutting down (SIGTERM)");
process.exit(0);
logger.info({ pid: process.pid }, "Server shutting down (SIGTERM)");
server.close(() => process.exit(0));
});
Comment thread
phvalguima marked this conversation as resolved.

export default app;
12 changes: 11 additions & 1 deletion src/routes/responses/handleOneTurn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ import { recordError, requiresApproval } from "./utils.js";
import { closeLastOutputItem } from "./closeOutputItem.js";
import { modelCallCounter, modelCallDuration } from "../../lib/metrics.js";

// Shared undici Agent per worker process — avoids creating a new connection pool per request.
// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
const sharedDispatcher = new Agent({
allowH2: true,
connections: parseInt(process.env.UPSTREAM_MAX_CONNECTIONS || "128", 10),
pipelining: 1,
keepAliveTimeout: parseInt(process.env.UPSTREAM_KEEP_ALIVE_TIMEOUT_MS || "30000", 10),
connectTimeout: parseInt(process.env.UPSTREAM_CONNECT_TIMEOUT_MS || "30000", 10),
Comment on lines +26 to +33
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The inline comment says the Agent is configurable via UPSTREAM_MAX_CONNECTIONS and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS, but the code also reads UPSTREAM_CONNECT_TIMEOUT_MS. Update the comment to include the connect-timeout env var (or drop the env-var list) so the documentation matches behavior.

Copilot uses AI. Check for mistakes.
Comment on lines +26 to +33
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Agent options are derived via parseInt(...) without validating the result. If any of these env vars are set to a non-numeric value, parseInt will return NaN and undici may throw or behave unexpectedly at runtime. Consider coercing with Number(...) and falling back to defaults (or throwing a clear startup error) when the parsed value is not a finite integer.

Suggested change
// Shared undici Agent per worker process — avoids creating a new connection pool per request.
// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
const sharedDispatcher = new Agent({
allowH2: true,
connections: parseInt(process.env.UPSTREAM_MAX_CONNECTIONS || "128", 10),
pipelining: 1,
keepAliveTimeout: parseInt(process.env.UPSTREAM_KEEP_ALIVE_TIMEOUT_MS || "30000", 10),
connectTimeout: parseInt(process.env.UPSTREAM_CONNECT_TIMEOUT_MS || "30000", 10),
function getFiniteIntegerEnv(name: string, fallback: number): number {
const rawValue = process.env[name];
if (rawValue === undefined) {
return fallback;
}
const parsedValue = Number(rawValue);
return Number.isFinite(parsedValue) && Number.isInteger(parsedValue) ? parsedValue : fallback;
}
// Shared undici Agent per worker process — avoids creating a new connection pool per request.
// Configurable via UPSTREAM_MAX_CONNECTIONS (connections per origin) and UPSTREAM_KEEP_ALIVE_TIMEOUT_MS.
const sharedDispatcher = new Agent({
allowH2: true,
connections: getFiniteIntegerEnv("UPSTREAM_MAX_CONNECTIONS", 128),
pipelining: 1,
keepAliveTimeout: getFiniteIntegerEnv("UPSTREAM_KEEP_ALIVE_TIMEOUT_MS", 30000),
connectTimeout: getFiniteIntegerEnv("UPSTREAM_CONNECT_TIMEOUT_MS", 30000),

Copilot uses AI. Check for mistakes.
});

/*
* Call LLM and stream the response.
*/
Expand Down Expand Up @@ -54,7 +64,7 @@ export async function* handleOneTurnStream(
apiKey: apiKey,
defaultHeaders,
fetchOptions: {
dispatcher: new Agent({ allowH2: true }),
dispatcher: sharedDispatcher,
},
});
const modelCallStart = performance.now();
Expand Down
Loading