Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 13 additions & 25 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ RUN set -eux; \
less \
# Network scanning & enumeration (from Kali repos - pre-compiled!)
nmap \
masscan \
naabu \
nikto \
whatweb \
Expand All @@ -99,7 +98,6 @@ RUN set -eux; \
subfinder \
dnsrecon \
dnsenum \
theharvester \
# Web fuzzing & discovery
ffuf \
arjun \
Expand All @@ -113,12 +111,12 @@ RUN set -eux; \
enum4linux \
# Network discovery
hping3 \
arp-scan \
# Utilities
socat \
proxychains4 \
seclists \
hashid \
commix \
libimage-exiftool-perl \
cewl \
# SSL/TLS testing
Expand All @@ -133,11 +131,12 @@ RUN set -eux; \
nodejs \
npm \
# Forensics
steghide \
binwalk \
foremost \
# Document conversion
pandoc \
# Web application security scanner
zaproxy \
&& apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

# ============================================================================
Expand Down Expand Up @@ -188,26 +187,16 @@ RUN set -eux; \
git clone https://github.com/internetwache/GitTools.git /opt/gittools && \
chmod +x /opt/gittools/Dumper/gitdumper.sh /opt/gittools/Extractor/extractor.sh && \
ln -sf /opt/gittools/Dumper/gitdumper.sh /usr/local/bin/gitdumper && \
ln -sf /opt/gittools/Extractor/extractor.sh /usr/local/bin/gitextractor && \
# RSA CTF tool - install dependencies
apt-get update && apt-get install -y --no-install-recommends libgmp-dev libmpfr-dev libmpc-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
git clone https://github.com/RsaCtfTool/RsaCtfTool.git /opt/rsactftool && \
cd /opt/rsactftool && \
pip3 install --break-system-packages -r requirements.txt && \
ln -sf /opt/rsactftool/RsaCtfTool.py /usr/local/bin/rsactftool || true
ln -sf /opt/gittools/Extractor/extractor.sh /usr/local/bin/gitextractor

# ============================================================================
# Binary Downloads (Platform-aware)
# ============================================================================
RUN set -eux; \
# Linux privilege escalation scripts
mkdir -p /opt/peass && \
wget -qO /opt/peass/linpeas.sh https://github.com/peass-ng/PEASS-ng/releases/latest/download/linpeas.sh && \
chmod +x /opt/peass/linpeas.sh && \
ln -sf /opt/peass/linpeas.sh /usr/local/bin/linpeas && \
# Secret scanning tool
curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh | sh -s -- -b /usr/local/bin && \
# Vulnerability scanner for containers and filesystems
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin && \
# httpx with proper architecture detection
arch=$(uname -m); \
if [ "$arch" = "x86_64" ]; then platform="linux_amd64"; \
Expand Down Expand Up @@ -236,8 +225,8 @@ RUN set -eux; \
# ============================================================================
RUN set -eux; \
go install github.com/projectdiscovery/interactsh/cmd/interactsh-client@latest && \
go install github.com/hahwul/dalfox/v2@latest && \
go install github.com/jpillora/chisel@latest
go install github.com/projectdiscovery/katana/cmd/katana@latest && \
go install github.com/projectdiscovery/cvemap/cmd/cvemap@latest

# ============================================================================
# TestSSL Symlink - Allow access via both 'testssl' and 'testssl.sh'
Expand All @@ -260,7 +249,6 @@ RUN set -eux; \
test -d /usr/share/seclists && \
echo "✓ SecLists installed" && \
which nmap && echo "✓ nmap" && \
which masscan && echo "✓ masscan" && \
which naabu && echo "✓ naabu" && \
which nikto && echo "✓ nikto" && \
which whatweb && echo "✓ whatweb" && \
Expand All @@ -271,7 +259,6 @@ RUN set -eux; \
which subfinder && echo "✓ subfinder" && \
which dnsrecon && echo "✓ dnsrecon" && \
which dnsenum && echo "✓ dnsenum" && \
which theHarvester && echo "✓ theHarvester" && \
which ffuf && echo "✓ ffuf" && \
which arjun && echo "✓ arjun" && \
which gobuster && echo "✓ gobuster" && \
Expand All @@ -284,15 +271,16 @@ RUN set -eux; \
which smbmap && echo "✓ smbmap" && \
which nbtscan && echo "✓ nbtscan" && \
which enum4linux && echo "✓ enum4linux" && \
which commix && echo "✓ commix" && \
which arp-scan && echo "✓ arp-scan" && \
which gospider && echo "✓ gospider" && \
which dalfox && echo "✓ dalfox" && \
which chisel && echo "✓ chisel" && \
which interactsh-client && echo "✓ interactsh-client" && \
which katana && echo "✓ katana" && \
which cvemap && echo "✓ cvemap" && \
which jwt-tool && echo "✓ jwt_tool" && \
which gitdumper && echo "✓ gitdumper" && \
which linpeas && echo "✓ linpeas" && \
which trufflehog && echo "✓ trufflehog" && \
which trivy && echo "✓ trivy" && \
which zaproxy && echo "✓ zaproxy" && \
which httpx && echo "✓ httpx" && \
which go && echo "✓ go" && \
which python3 && echo "✓ python3" && \
Expand Down
9 changes: 6 additions & 3 deletions lib/ai/tools/__tests__/sandbox-capabilities.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
* - 3fde55c: Restored user:root for E2B only
*/

import { buildSandboxCommandOptions } from "../utils/sandbox-command-options";
import {
buildSandboxCommandOptions,
MAX_COMMAND_EXECUTION_TIME,
} from "../utils/sandbox-command-options";
import { isE2BSandbox } from "../utils/sandbox-types";

// Mock E2B sandbox (has jupyterUrl property - this is how isE2BSandbox detects it)
Expand All @@ -35,7 +38,7 @@ describe("Sandbox Capabilities for Network Tools", () => {

expect(options).toHaveProperty("user", "root");
expect(options).toHaveProperty("cwd", "/home/user");
expect(options.timeoutMs).toBe(7 * 60 * 1000);
expect(options.timeoutMs).toBe(MAX_COMMAND_EXECUTION_TIME);
});

it("should NOT include user:root for ConvexSandbox (uses Docker capabilities)", () => {
Expand All @@ -45,7 +48,7 @@ describe("Sandbox Capabilities for Network Tools", () => {

expect(options).not.toHaveProperty("user");
expect(options).not.toHaveProperty("cwd");
expect(options.timeoutMs).toBe(7 * 60 * 1000);
expect(options.timeoutMs).toBe(MAX_COMMAND_EXECUTION_TIME);
});

it("should include handlers when provided", () => {
Expand Down
83 changes: 24 additions & 59 deletions lib/ai/tools/run-terminal-cmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import {
checkCommandGuardrails,
} from "./utils/guardrails";

const MAX_COMMAND_EXECUTION_TIME = 7 * 60 * 1000; // 7 minutes
const STREAM_TIMEOUT_SECONDS = 60;
const DEFAULT_STREAM_TIMEOUT_SECONDS = 60;
const MAX_TIMEOUT_SECONDS = 600;

export const createRunTerminalCmd = (context: ToolContext) => {
const {
Expand All @@ -39,12 +39,6 @@ export const createRunTerminalCmd = (context: ToolContext) => {
const userGuardrailConfig = parseGuardrailConfig(guardrailsConfig);
const effectiveGuardrails = getEffectiveGuardrails(userGuardrailConfig);

// Wait instructions for E2B sandbox (local sandbox uses different commands)
// Note: Code also handles 'while ps -p' loops for robustness, but we only document tail --pid
const waitForProcessInstruction = `To wait for a background process to complete, use \`tail --pid=<pid> -f /dev/null\`. This blocks until the process exits and gets extended timeout (up to ${Math.floor(MAX_COMMAND_EXECUTION_TIME / 1000 / 60)} minutes). Example workflow: Start scan with is_background=true (returns PID 12345) → Wait with \`tail --pid=12345 -f /dev/null\``;

const timeoutWaitInstruction = `If a foreground command times out after ${STREAM_TIMEOUT_SECONDS} seconds but is still running and producing results (you'll see the timeout message), the process continues in the background. To wait for it: 1) Note the PID from the error/timeout message or use \`ps aux | grep <command_name>\` to find it, 2) Use \`tail --pid=<pid> -f /dev/null\` to wait for completion. This is common for long scans like comprehensive nmap, sqlmap, or nuclei scans.`;

return tool({
description: `Execute a command on behalf of the user.
If you have this tool, note that you DO have the ability to run commands directly in the sandbox environment.
Expand All @@ -57,8 +51,6 @@ In using these tools, adhere to the following guidelines:
3. For ANY commands that would require user interaction, ASSUME THE USER IS NOT AVAILABLE TO INTERACT and PASS THE NON-INTERACTIVE FLAGS (e.g. --yes for npx).
4. If the command would use a pager, append \` | cat\` to the command.
5. For commands that are long running/expected to run indefinitely until interruption, please run them in the background. To run jobs in the background, set \`is_background\` to true rather than changing the details of the command. EXCEPTION: Never use background mode if you plan to retrieve the output file immediately afterward.
- ${waitForProcessInstruction}
- ${timeoutWaitInstruction}
6. Dont include any newlines in the command.
7. Handle large outputs and save scan results to files:
- For complex and long-running scans (e.g., nmap, dirb, gobuster), save results to files using appropriate output flags (e.g., -oN for nmap) if the tool supports it, otherwise use redirect with > operator.
Expand Down Expand Up @@ -109,17 +101,32 @@ If you are generating files:
.describe(
"Whether the command should be run in the background. Set to FALSE if you need to retrieve output files immediately after with get_terminal_files. Only use TRUE for indefinite processes where you don't need immediate file access.",
),
timeout: z
.number()
.optional()
.default(DEFAULT_STREAM_TIMEOUT_SECONDS)
.describe(
`Timeout in seconds to wait for command execution. On timeout, command continues running in background. Capped at ${MAX_TIMEOUT_SECONDS} seconds. Defaults to ${DEFAULT_STREAM_TIMEOUT_SECONDS} seconds.`,
),
}),
execute: async (
{
command,
is_background,
timeout,
}: {
command: string;
is_background: boolean;
timeout?: number;
},
{ toolCallId, abortSignal },
) => {
// Calculate effective stream timeout (capped at MAX_TIMEOUT_SECONDS)
// This controls how long we wait for output, not how long the command runs
const effectiveStreamTimeout = Math.min(
timeout ?? DEFAULT_STREAM_TIMEOUT_SECONDS,
MAX_TIMEOUT_SECONDS,
);
// Check guardrails before executing the command
const guardrailResult = checkCommandGuardrails(
command,
Expand Down Expand Up @@ -266,36 +273,10 @@ If you are generating files:
});
}

// For wait commands (tail --pid or while ps -p loops), use MAX_COMMAND_EXECUTION_TIME
// instead of STREAM_TIMEOUT_SECONDS since they're designed to wait for long-running processes
const isTailWait = command.trim().startsWith("tail --pid");
const isWhilePsWait = /while\s+ps\s+-p\s+\d+/.test(command);
const isWaitCommand = isTailWait || isWhilePsWait;
const streamTimeout = isWaitCommand
? Math.floor(MAX_COMMAND_EXECUTION_TIME / 1000)
: STREAM_TIMEOUT_SECONDS;

// Extract PID from wait command for user-friendly messages
let waitingForPid: number | null = null;
if (isWaitCommand) {
// Try tail --pid pattern first
let pidMatch = command.match(/tail\s+--pid[=\s]+(\d+)/);
// If not found, try while ps -p pattern
if (!pidMatch) {
pidMatch = command.match(/while\s+ps\s+-p\s+(\d+)/);
}
if (pidMatch) {
waitingForPid = parseInt(pidMatch[1], 10);
createTerminalWriter(
`Waiting for process ${waitingForPid} to complete...\n`,
);
}
}

handler = createTerminalHandler(
(output) => createTerminalWriter(output),
{
timeoutSeconds: streamTimeout,
timeoutSeconds: effectiveStreamTimeout,
onTimeout: async () => {
if (resolved) {
return;
Expand All @@ -313,21 +294,12 @@ If you are generating files:
processId = await findProcessPid(sandboxInstance, command);
}

// Only show "continues in background" for STREAM_TIMEOUT_SECONDS (60s)
// For MAX_COMMAND_EXECUTION_TIME (10min), the process is killed by e2b
const isContinuingInBackground =
streamTimeout === STREAM_TIMEOUT_SECONDS;

if (isContinuingInBackground) {
createTerminalWriter(
TIMEOUT_MESSAGE(streamTimeout, processId ?? undefined),
);
} else {
// Max execution time reached - process will be killed by e2b
createTerminalWriter(
`\n\nCommand timed out after ${streamTimeout} seconds and was terminated.`,
);
}
createTerminalWriter(
TIMEOUT_MESSAGE(
effectiveStreamTimeout,
processId ?? undefined,
),
);

resolved = true;
const result = handler
Expand Down Expand Up @@ -461,13 +433,6 @@ If you are generating files:
);
}

// Add completion message for tail --pid commands
if (waitingForPid) {
createTerminalWriter(
`Process ${waitingForPid} completed\n`,
);
}

resolve({
result: is_background
? {
Expand Down
20 changes: 2 additions & 18 deletions lib/ai/tools/utils/hybrid-sandbox-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { ConvexSandbox } from "./convex-sandbox";
import { ensureSandboxConnection } from "./sandbox";
import { ConvexHttpClient } from "convex/browser";
import { api } from "@/convex/_generated/api";
import { PREINSTALLED_PENTESTING_TOOLS } from "@/lib/system-prompt";

type SandboxType = Sandbox | ConvexSandbox;

Expand Down Expand Up @@ -326,24 +327,7 @@ Container Environment:
- Mode: Docker container
- Network: Host network (--network host)

Pre-installed Pentesting Tools:
- Network Scanning: nmap, masscan (high-speed port scanner), naabu (port scanner), httpx (HTTP prober), hping3
- Subdomain/DNS: subfinder, dnsrecon, dnsenum, theHarvester (OSINT/email discovery)
- Web Fuzzing: ffuf (fast fuzzer), dirsearch (directory/file discovery), arjun (parameter discovery)
- Web Scanners: nikto (web server scanner), whatweb (web technology identifier), wpscan (WordPress scanner), wapiti (web vulnerability scanner), wafw00f (WAF detection), dalfox (XSS scanner)
- Injection: commix (command injection), sqlmap (SQL injection)
- SSL/TLS Testing: testssl (comprehensive HTTPS/SSL/TLS testing)
- Auth/Bruteforce: hydra (login bruteforcer)
- SMB/NetBIOS: smbclient, smbmap, nbtscan, python3-impacket, enum4linux
- SNMP/Discovery: arp-scan, ike-scan, onesixtyone, snmpcheck, netdiscover
- Web Recon: gospider, subjack
- Privilege Escalation: linpeas (Linux privilege escalation enumeration)
- Tunneling/Pivoting: chisel (fast TCP/UDP tunnel)
- Git/Repository Analysis: gitdumper, gitextractor (dump/extract git repos)
- Secret Scanning: trufflehog (find credentials in git/filesystems)
- Cryptography: rsactftool (RSA cryptography/CTF tool)
- Forensics: steghide, binwalk, foremost (steganography/file carving)
- Utilities: gobuster, socat, proxychains4, nuclei (vulnerability scanner with templates), interactsh-client (OOB interaction), SecLists
${PREINSTALLED_PENTESTING_TOOLS}
</sandbox_environment>`;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/ai/tools/utils/sandbox-command-options.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { AnySandbox } from "@/types";
import { isE2BSandbox } from "./sandbox-types";

const MAX_COMMAND_EXECUTION_TIME = 7 * 60 * 1000; // 7 minutes
export const MAX_COMMAND_EXECUTION_TIME = 10 * 60 * 1000; // 10 minutes

/**
* Build command options for sandbox execution.
Expand Down
41 changes: 20 additions & 21 deletions lib/system-prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@ const DATE_FORMAT_OPTIONS: Intl.DateTimeFormatOptions = {
// Cache the current date to avoid repeated Date creation
export const currentDateTime = `${new Date().toLocaleDateString("en-US", DATE_FORMAT_OPTIONS)}`;

// Shared pentesting tools list for sandbox environments
export const PREINSTALLED_PENTESTING_TOOLS = `Pre-installed Pentesting Tools:
- Network Scanning: nmap (network mapping/port scanning), naabu (fast port scanner), httpx (HTTP prober)
- Subdomain/DNS: subfinder (subdomain enumeration), dnsrecon, dnsenum
- Web Fuzzing: ffuf (fast fuzzer), dirsearch (directory/file discovery), arjun (parameter discovery)
- Web Scanners: nikto (web server scanner), whatweb (web technology identifier), wpscan (WordPress scanner), wapiti (web vulnerability scanner), wafw00f (WAF detection)
- Injection: sqlmap (SQL injection detection/exploitation)
- Auth/Bruteforce: hydra (login bruteforcer)
- SMB/NetBIOS: smbclient, smbmap, nbtscan, python3-impacket, enum4linux
- Network Discovery: arp-scan
- Web Recon: gospider (web spider/crawler), katana (advanced web crawler)
- Git/Repository Analysis: gitdumper, gitextractor (dump/extract git repos)
- Secret Scanning: trufflehog (find credentials in git/filesystems)
- Vulnerability Assessment: nuclei (vulnerability scanner with templates), trivy (container/dependency scanner), zaproxy (OWASP ZAP), vulnx/cvemap (CVE vulnerability mapping)
- Forensics: binwalk, foremost (file carving)
- Utilities: gobuster, socat, proxychains4, hashid, libimage-exiftool-perl (exiftool), cewl
- Specialized: jwt_tool (JWT manipulation), interactsh-client (OOB interaction testing), SecLists (/home/user/SecLists or /usr/share/seclists)
- Documents: reportlab, python-docx, openpyxl, python-pptx, pandas, pypandoc, pandoc, odfpy`;
Comment on lines +20 to +37
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# First, let's check if the Dockerfile exists and examine its structure
find . -name "Dockerfile" -type f | head -5

Repository: hackerai-tech/hackerai

Length of output: 85


🏁 Script executed:

# Look for the docker directory and list its contents
ls -la docker/ 2>/dev/null || echo "No docker directory found"

Repository: hackerai-tech/hackerai

Length of output: 361


🏁 Script executed:

# If Dockerfile is elsewhere, search for it
fd -name "Dockerfile" -type f

Repository: hackerai-tech/hackerai

Length of output: 296


🏁 Script executed:

# Read the Dockerfile to check tool installations
cat -n ./docker/Dockerfile | head -100

Repository: hackerai-tech/hackerai

Length of output: 3609


🏁 Script executed:

# Check specifically for vulnx and cvemap installations
rg "vulnx|cvemap" ./docker/Dockerfile -A 2 -B 2

Repository: hackerai-tech/hackerai

Length of output: 614


🏁 Script executed:

# Check for SecLists installation
rg "SecLists|seclists" ./docker/Dockerfile -A 2 -B 2

Repository: hackerai-tech/hackerai

Length of output: 371


🏁 Script executed:

# Check for vulnx specifically in the entire Dockerfile
rg "vulnx" ./docker/Dockerfile

Repository: hackerai-tech/hackerai

Length of output: 48


🏁 Script executed:

# Get more context from the Dockerfile - read the rest of it
cat -n ./docker/Dockerfile | tail -100

Repository: hackerai-tech/hackerai

Length of output: 5107


🏁 Script executed:

# Check the complete list of Go tools installed
rg "go install" ./docker/Dockerfile

Repository: hackerai-tech/hackerai

Length of output: 294


🏁 Script executed:

# Check for all Python packages installed via pip
rg "pip3 install|pip install" ./docker/Dockerfile -A 3

Repository: hackerai-tech/hackerai

Length of output: 152


🏁 Script executed:

# Verify pypandoc and pandoc installations specifically
rg "pypandoc|pandoc" ./docker/Dockerfile

Repository: hackerai-tech/hackerai

Length of output: 93


Remove vulnx/ from the PREINSTALLED_PENTESTING_TOOLS list on line 33.

The Dockerfile installs only cvemap via Go (line 229), not vulnx. The documentation should list cvemap alone, not vulnx/cvemap. The SecLists path is correctly documented as /usr/share/seclists and matches the Dockerfile installation. All other tools in the list are properly installed and validated in the Dockerfile.

🤖 Prompt for AI Agents
In @lib/system-prompt.ts around lines 20 - 37, The PREINSTALLED_PENTESTING_TOOLS
constant currently lists "vulnx/cvemap" but the Dockerfile only installs cvemap
(not vulnx); update the string inside PREINSTALLED_PENTESTING_TOOLS to remove
"vulnx/" so it reads "cvemap" (or just "cvemap (CVE vulnerability mapping)") to
match actual installed tooling; modify the exported constant definition
(PREINSTALLED_PENTESTING_TOOLS) accordingly so docs and runtime prompt align
with the Dockerfile.


// Template sections for better organization
const getAgentModeInstructions = (mode: ChatMode): string => {
return mode === "agent"
Expand Down Expand Up @@ -45,27 +64,7 @@ Development Environment:
- Node.js 20.19.4 (commands: node, npm)
- Golang 1.24.2 (commands: go)

Pre-installed Pentesting Tools:
- Network Scanning: nmap, masscan (high-speed port scanner), naabu (port scanner), httpx (HTTP prober), hping3
- Subdomain/DNS: subfinder, dnsrecon, dnsenum, theHarvester (OSINT/email discovery)
- Web Fuzzing: ffuf (fast fuzzer), dirsearch (directory/file discovery), arjun (parameter discovery)
- Web Scanners: nikto (web server scanner), whatweb (web technology identifier), wpscan (WordPress scanner), wapiti (web vulnerability scanner), wafw00f (WAF detection), dalfox (XSS scanner)
- Injection: commix (command injection), sqlmap (SQL injection)
- SSL/TLS Testing: testssl (comprehensive HTTPS/SSL/TLS testing)
- Auth/Bruteforce: hydra (login bruteforcer)
- SMB/NetBIOS: smbclient, smbmap, nbtscan, python3-impacket, enum4linux
- SNMP/Discovery: arp-scan, ike-scan, onesixtyone, snmpcheck, netdiscover
- Web Recon: gospider, subjack
- WebDAV: cadaver, davtest
- Privilege Escalation: linpeas (Linux privilege escalation enumeration)
- Tunneling/Pivoting: chisel (fast TCP/UDP tunnel)
- Git/Repository Analysis: gitdumper, gitextractor (dump/extract git repos)
- Secret Scanning: trufflehog (find credentials in git/filesystems)
- Cryptography: rsactftool (RSA cryptography/CTF tool)
- Forensics: steghide, binwalk, foremost (steganography/file carving)
- Utilities: gobuster, socat, proxychains4, hashid, libimage-exiftool-perl (exiftool), cewl
- Specialized: jwt_tool (JWT manipulation), nuclei (vulnerability scanner with templates), interactsh-client (OOB interaction), SecLists (/home/user/SecLists or /usr/share/seclists)
- Documents: reportlab, python-docx, openpyxl, python-pptx, pandas, pypandoc, pandoc, odfpy
${PREINSTALLED_PENTESTING_TOOLS}
</sandbox_environment>`;

const getAgentModeSection = (
Expand Down