Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 160 additions & 3 deletions src/browser/cdp.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

const { MockWebSocket } = vi.hoisted(() => {
class MockWebSocket {
static OPEN = 1;
readyState = 1;
private handlers = new Map<string, Array<(...args: any[]) => void>>();
/** Sent messages (for inspecting CDP commands) */
sentMessages: string[] = [];

constructor(_url: string) {
queueMicrotask(() => this.emit('open'));
Expand All @@ -16,12 +18,19 @@ const { MockWebSocket } = vi.hoisted(() => {
this.handlers.set(event, handlers);
}

send(_message: string): void {}
send(message: string): void {
this.sentMessages.push(message);
}

close(): void {
this.readyState = 3;
}

/** Simulate receiving a CDP message from the browser */
simulateMessage(msg: Record<string, unknown>): void {
this.emit('message', Buffer.from(JSON.stringify(msg)));
}

private emit(event: string, ...args: any[]): void {
for (const handler of this.handlers.get(event) ?? []) {
handler(...args);
Expand All @@ -36,7 +45,7 @@ vi.mock('ws', () => ({
WebSocket: MockWebSocket,
}));

import { CDPBridge } from './cdp.js';
import { CDPBridge, type NetworkCaptureEntry } from './cdp.js';

describe('CDPBridge cookies', () => {
beforeEach(() => {
Expand Down Expand Up @@ -64,3 +73,151 @@ describe('CDPBridge cookies', () => {
]);
});
});

describe('CDPPage network capture', () => {
let bridge: CDPBridge;
let ws: InstanceType<typeof MockWebSocket>;

beforeEach(async () => {
vi.stubEnv('OPENCLI_CDP_ENDPOINT', 'ws://127.0.0.1:9222/devtools/page/1');
bridge = new CDPBridge();
// Mock send to auto-resolve CDP commands
vi.spyOn(bridge, 'send').mockImplementation(async (method: string) => {
if (method === 'Network.enable') return {};
if (method === 'Page.enable') return {};
if (method === 'Page.addScriptToEvaluateOnNewDocument') return {};
if (method === 'Network.getResponseBody') return { body: '{"items":[1,2,3]}' };
return {};
});
});

afterEach(async () => {
vi.unstubAllEnvs();
await bridge.close();
});

it('startNetworkCapture enables Network domain', async () => {
const page = await bridge.connect();
await page.startNetworkCapture!();
expect(bridge.send).toHaveBeenCalledWith('Network.enable', { maxPostDataLength: 0 });
});

it('captures requests via CDP events and returns them via readNetworkCapture', async () => {
const page = await bridge.connect();
await page.startNetworkCapture!();

// Simulate CDP events
const requestId = 'req-1';

// 1. Request sent
const requestHandler = (bridge as any)._eventListeners.get('Network.requestWillBeSent');
expect(requestHandler).toBeDefined();
for (const fn of requestHandler) {
fn({
requestId,
request: { url: 'https://api.example.com/data', method: 'GET', headers: { 'accept': 'application/json' } },
wallTime: Date.now() / 1000,
});
}

// 2. Response received
const responseHandler = (bridge as any)._eventListeners.get('Network.responseReceived');
for (const fn of responseHandler) {
fn({
requestId,
response: { status: 200, mimeType: 'application/json', headers: { 'content-type': 'application/json' }, encodedDataLength: 1234 },
});
}

// 3. Loading finished
const finishedHandler = (bridge as any)._eventListeners.get('Network.loadingFinished');
for (const fn of finishedHandler) {
fn({ requestId });
}

// Read captured entries
const entries = await page.readNetworkCapture!() as NetworkCaptureEntry[];
expect(entries.length).toBe(1);
expect(entries[0].url).toBe('https://api.example.com/data');
expect(entries[0].method).toBe('GET');
expect(entries[0].status).toBe(200);
expect(entries[0].responseContentType).toBe('application/json');
expect(entries[0].size).toBe(1234);
expect(entries[0].requestHeaders).toEqual({ 'accept': 'application/json' });
});

it('readNetworkCapture drains buffer (second read returns empty)', async () => {
const page = await bridge.connect();
await page.startNetworkCapture!();

const requestHandler = (bridge as any)._eventListeners.get('Network.requestWillBeSent');
const responseHandler = (bridge as any)._eventListeners.get('Network.responseReceived');
for (const fn of requestHandler) {
fn({ requestId: 'drain-1', request: { url: 'https://api.com/x', method: 'GET', headers: {} }, wallTime: 1 });
}
for (const fn of responseHandler) {
fn({ requestId: 'drain-1', response: { status: 200, mimeType: 'text/html', headers: {} } });
}

const first = await page.readNetworkCapture!();
expect(first.length).toBe(1);

// Second read should be empty (buffer drained)
const second = await page.readNetworkCapture!();
expect(second).toEqual([]);
});

it('readNetworkCapture returns empty array when no capture started', async () => {
const page = await bridge.connect();
const entries = await page.readNetworkCapture!();
expect(entries).toEqual([]);
});

it('startNetworkCapture is idempotent', async () => {
const page = await bridge.connect();
await page.startNetworkCapture!();
await page.startNetworkCapture!(); // second call should be no-op
// Network.enable should only be called once (plus Page.enable from connect)
const networkEnableCalls = (bridge.send as any).mock.calls.filter(
(c: unknown[]) => c[0] === 'Network.enable'
);
expect(networkEnableCalls.length).toBe(1);
});

it('skips response body for non-textual content types', async () => {
const page = await bridge.connect();

// Override send to track getResponseBody calls
const getResponseBodyCalls: string[] = [];
(bridge.send as any).mockImplementation(async (method: string, params?: Record<string, unknown>) => {
if (method === 'Network.getResponseBody') {
getResponseBodyCalls.push(String(params?.requestId ?? ''));
return { body: 'binary data' };
}
return {};
});

await page.startNetworkCapture!();

// Simulate an image request
const requestHandler = (bridge as any)._eventListeners.get('Network.requestWillBeSent');
for (const fn of requestHandler) {
fn({ requestId: 'img-1', request: { url: 'https://example.com/logo.png', method: 'GET', headers: {} }, wallTime: Date.now() / 1000 });
}
const responseHandler = (bridge as any)._eventListeners.get('Network.responseReceived');
for (const fn of responseHandler) {
fn({ requestId: 'img-1', response: { status: 200, mimeType: 'image/png', headers: {}, encodedDataLength: 5000 } });
}
const finishedHandler = (bridge as any)._eventListeners.get('Network.loadingFinished');
for (const fn of finishedHandler) {
fn({ requestId: 'img-1' });
}

// Should NOT fetch response body for images
expect(getResponseBodyCalls).not.toContain('img-1');

const entries = await page.readNetworkCapture!() as NetworkCaptureEntry[];
expect(entries.length).toBe(1);
expect(entries[0].responseBody).toBeUndefined();
});
});
131 changes: 131 additions & 0 deletions src/browser/cdp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,143 @@ export class CDPBridge implements IBrowserFactory {
}
}

/** Entry captured by session-level passive network capture. */
export interface NetworkCaptureEntry {
url: string;
method: string;
status?: number;
requestHeaders?: Record<string, string>;
responseHeaders?: Record<string, string>;
responseContentType?: string;
responseBody?: string;
/** Size in bytes (encoded). */
size?: number;
timestamp: number;
}

/** Maximum entries to keep in a single capture session. */
const MAX_CAPTURE_ENTRIES = 200;
/** Maximum response body preview size. */
const MAX_RESPONSE_PREVIEW = 10_000;

class CDPPage extends BasePage {
private _pageEnabled = false;
private _captureEntries: NetworkCaptureEntry[] = [];
private _captureActive = false;
private _captureRequestMap = new Map<string, { index: number; method: string; url: string; headers: Record<string, string>; timestamp: number }>();
private _captureHandlers: Array<{ event: string; handler: (params: unknown) => void }> = [];

constructor(private bridge: CDPBridge) {
super();
}

// ── Session-level passive network capture ──────────────────────────────

async startNetworkCapture(_pattern?: string): Promise<void> {
if (this._captureActive) return;
this._captureActive = true;
this._captureEntries = [];
this._captureRequestMap.clear();

await this.bridge.send('Network.enable', { maxPostDataLength: 0 });

const onRequestWillBeSent = (params: unknown) => {
const p = params as Record<string, unknown>;
const requestId = String(p.requestId ?? '');
const request = p.request as Record<string, unknown> | undefined;
if (!requestId || !request) return;

const url = String(request.url ?? '');
const method = String(request.method ?? 'GET');
const headers = (request.headers ?? {}) as Record<string, string>;
const timestamp = typeof p.wallTime === 'number' ? p.wallTime * 1000 : Date.now();

this._captureRequestMap.set(requestId, {
index: -1, method, url, headers, timestamp,
});
};

const onResponseReceived = (params: unknown) => {
const p = params as Record<string, unknown>;
const requestId = String(p.requestId ?? '');
const response = p.response as Record<string, unknown> | undefined;
const pending = this._captureRequestMap.get(requestId);
if (!pending || !response) return;

if (this._captureEntries.length >= MAX_CAPTURE_ENTRIES) {
this._captureRequestMap.delete(requestId);
return;
}

const responseHeaders = (response.headers ?? {}) as Record<string, string>;
const contentType = String(response.mimeType ?? responseHeaders['content-type'] ?? '');

const entry: NetworkCaptureEntry = {
url: pending.url,
method: pending.method,
status: typeof response.status === 'number' ? response.status : undefined,
requestHeaders: pending.headers,
responseHeaders,
responseContentType: contentType,
size: typeof response.encodedDataLength === 'number' ? response.encodedDataLength : undefined,
timestamp: pending.timestamp,
};

const idx = this._captureEntries.length;
this._captureEntries.push(entry);
pending.index = idx;
};

const onLoadingFinished = (params: unknown) => {
const p = params as Record<string, unknown>;
const requestId = String(p.requestId ?? '');
const pending = this._captureRequestMap.get(requestId);
if (!pending || pending.index < 0) {
this._captureRequestMap.delete(requestId);
return;
}

const entry = this._captureEntries[pending.index];
if (!entry) { this._captureRequestMap.delete(requestId); return; }

// Only fetch response body for JSON/text content types (skip images, fonts, etc.)
const ct = (entry.responseContentType ?? '').toLowerCase();
const isTextual = ct.includes('json') || ct.includes('text') || ct.includes('xml') || ct.includes('javascript');
if (isTextual) {
this.bridge.send('Network.getResponseBody', { requestId }).then((result) => {
const r = result as Record<string, unknown> | undefined;
if (r && typeof r.body === 'string') {
entry.responseBody = r.body.length > MAX_RESPONSE_PREVIEW
? r.body.slice(0, MAX_RESPONSE_PREVIEW) + `...[truncated, ${r.body.length - MAX_RESPONSE_PREVIEW} chars omitted]`
: r.body;
}
}).catch(() => { /* response body unavailable */ });
}

this._captureRequestMap.delete(requestId);
};

// Register handlers
this._captureHandlers = [
{ event: 'Network.requestWillBeSent', handler: onRequestWillBeSent },
{ event: 'Network.responseReceived', handler: onResponseReceived },
{ event: 'Network.loadingFinished', handler: onLoadingFinished },
];
for (const { event, handler } of this._captureHandlers) {
this.bridge.on(event, handler);
}
}

async readNetworkCapture(): Promise<unknown[]> {
// Give a brief moment for pending response bodies to resolve
await new Promise(resolve => setTimeout(resolve, 200));
// Drain buffer: return accumulated entries and clear, matching daemon/extension contract
const entries = this._captureEntries;
this._captureEntries = [];
this._captureRequestMap.clear();
return entries;
}

async goto(url: string, options?: { waitUntil?: 'load' | 'none'; settleMs?: number }): Promise<void> {
if (!this._pageEnabled) {
await this.bridge.send('Page.enable');
Expand Down
24 changes: 21 additions & 3 deletions src/explore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ export interface ExploreBundle {
auth: ExploreAuthSummary;
}

function tryParseJson(str: string): unknown {
try { return JSON.parse(str); } catch { return str; }
}

/**
* Parse raw network output from browser page.
* Handles text format: [GET] url => [200]
Expand All @@ -159,8 +163,9 @@ function parseNetworkRequests(raw: unknown): NetworkEntry[] {
method: (e.method ?? 'GET').toUpperCase(),
url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
status: e.status ?? e.statusCode ?? null,
contentType: e.contentType ?? e.response?.contentType ?? '',
responseBody: e.responseBody, requestHeaders: e.requestHeaders,
contentType: e.contentType ?? e.responseContentType ?? e.response?.contentType ?? '',
responseBody: e.responseBody ? (typeof e.responseBody === 'string' ? tryParseJson(e.responseBody) : e.responseBody) : undefined,
requestHeaders: e.requestHeaders,
}));
}
return [];
Expand Down Expand Up @@ -358,6 +363,12 @@ export async function exploreUrl(

return browserSession(opts.BrowserFactory, async (page) => {
return runWithTimeout((async () => {
// Step 0: Start session-level network capture before navigation (if available)
const hasCapture = typeof page.startNetworkCapture === 'function';
if (hasCapture) {
await page.startNetworkCapture!().catch(() => {});
}

// Step 1: Navigate
await page.goto(url);
await page.wait(waitSeconds);
Expand Down Expand Up @@ -394,7 +405,14 @@ export async function exploreUrl(
const metadata = await readPageMetadata(page);

// Step 4: Capture network traffic
const rawNetwork = await page.networkRequests(false);
// Prefer session-level capture (has method/status/headers/body); fallback to performance API
let rawNetwork: unknown;
if (hasCapture) {
rawNetwork = await page.readNetworkCapture!().catch(() => null);
}
if (!rawNetwork || (Array.isArray(rawNetwork) && rawNetwork.length === 0)) {
rawNetwork = await page.networkRequests(false);
}
const networkEntries = parseNetworkRequests(rawNetwork);

// Step 5: For JSON endpoints missing a body, carefully re-fetch in-browser via a pristine iframe
Expand Down
Loading