Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 6 additions & 23 deletions src/analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,30 +150,13 @@ export function detectAuthFromContent(url: string, body: unknown): string[] {
return indicators;
}

// ── Shared scoring helpers ───────────────────────────────────────────────────

/** URL-based score adjustments shared by explore and record scoring. */
export function applyUrlScoreAdjustments(url: string, score: number): number {
let s = score;
if (url.includes('/api/') || url.includes('/x/')) s += 3;
if (url.match(/\/(track|log|analytics|beacon|pixel|stats|metric)/i)) s -= 10;
if (url.match(/\/(ping|heartbeat|keep.?alive)/i)) s -= 10;
return s;
}
// ── Noise filtering ─────────────────────────────────────────────────────────

/** Score an array response based on item count and detected field roles. */
export function scoreArrayResponse(arrayResult: ArrayDiscovery | null): number {
if (!arrayResult) return 0;
let s = 10;
s += Math.min(arrayResult.items.length, 10);
const sample = arrayResult.items[0];
if (sample && typeof sample === 'object') {
const keys = Object.keys(sample as object).map(k => k.toLowerCase());
for (const aliases of Object.values(FIELD_ROLES)) {
if (aliases.some(a => keys.includes(a))) s += 2;
}
}
return s;
const NOISE_URL_PATTERN = /\/(track|log|analytics|beacon|pixel|ping|heartbeat|keep.?alive)\b/i;

/** Check whether a URL looks like tracking/telemetry noise rather than a business API. */
export function isNoiseUrl(url: string): boolean {
return NOISE_URL_PATTERN.test(url);
}

// ── Query param classification ──────────────────────────────────────────────
Expand Down
55 changes: 34 additions & 21 deletions src/explore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
inferStrategy,
detectAuthFromHeaders,
classifyQueryParams,
isNoiseUrl,
} from './analysis.js';

// ── Site name detection ────────────────────────────────────────────────────
Expand Down Expand Up @@ -67,14 +68,14 @@ interface NetworkEntry {

interface AnalyzedEndpoint {
pattern: string; method: string; url: string; status: number | null;
contentType: string; queryParams: string[]; score: number;
contentType: string; queryParams: string[];
hasSearchParam: boolean; hasPaginationParam: boolean; hasLimitParam: boolean;
authIndicators: string[];
responseAnalysis: { itemPath: string | null; itemCount: number; detectedFields: Record<string, string>; sampleFields: string[] } | null;
}

interface InferredCapability {
name: string; description: string; strategy: string; confidence: number;
name: string; description: string; strategy: string;
endpoint: string; itemPath: string | null;
recommendedColumns: string[];
recommendedArgs: Array<{ name: string; type: string; required: boolean; default?: unknown }>;
Expand Down Expand Up @@ -104,7 +105,6 @@ export interface ExploreEndpointArtifact {
url: string;
status: number | null;
contentType: string;
score: number;
queryParams: string[];
itemPath: string | null;
itemCount: number;
Expand Down Expand Up @@ -194,17 +194,29 @@ function isBooleanRecord(value: unknown): value is Record<string, boolean> {
&& Object.values(value as Record<string, unknown>).every(v => typeof v === 'boolean');
}

function scoreEndpoint(ep: { contentType: string; responseAnalysis: AnalyzedEndpoint['responseAnalysis']; pattern: string; status: number | null; hasSearchParam: boolean; hasPaginationParam: boolean; hasLimitParam: boolean }): number {
let s = 0;
if (ep.contentType.includes('json')) s += 10;
if (ep.responseAnalysis) { s += 5; s += Math.min(ep.responseAnalysis.itemCount, 10); s += Object.keys(ep.responseAnalysis.detectedFields).length * 2; }
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/')) s += 3;
if (ep.hasSearchParam) s += 3;
if (ep.hasPaginationParam) s += 2;
if (ep.hasLimitParam) s += 2;
if (ep.status === 200) s += 2;
if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json')) s -= 3;
return s;
/**
* Deterministic sort key for endpoint ordering — transparent, observable signals only.
* Used by generate/synthesize to pick a stable default candidate.
* Not exposed externally; AI agents see the raw metadata and decide for themselves.
*/
function endpointSortKey(ep: AnalyzedEndpoint): number {
let k = 0;
// Prefer endpoints with array data (list APIs are more useful for automation)
const items = ep.responseAnalysis?.itemCount ?? 0;
if (items > 0) k += 100 + Math.min(items, 50);
// Prefer endpoints with detected semantic fields
k += Object.keys(ep.responseAnalysis?.detectedFields ?? {}).length * 10;
// Prefer API-style paths
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/')) k += 5;
// Prefer endpoints with query params (more likely to be parameterized APIs)
if (ep.hasSearchParam || ep.hasPaginationParam || ep.hasLimitParam) k += 5;
return k;
}

/** Check whether an endpoint carries useful structured data (any JSON response, not noise). */
function isUsefulEndpoint(ep: AnalyzedEndpoint): boolean {
if (isNoiseUrl(ep.url)) return false;
return ep.contentType.includes('json');
}


Expand All @@ -229,7 +241,7 @@ const INTERACT_FUZZ_JS = interactFuzz.toString();

// ── Analysis helpers (extracted from exploreUrl) ───────────────────────────

/** Filter, deduplicate, and score network endpoints. */
/** Filter and deduplicate network endpoints, keeping only useful structured-data APIs. */
function analyzeEndpoints(networkEntries: NetworkEntry[]): { analyzed: AnalyzedEndpoint[]; totalCount: number } {
const seen = new Map<string, AnalyzedEndpoint>();
for (const entry of networkEntries) {
Expand All @@ -251,13 +263,14 @@ function analyzeEndpoints(networkEntries: NetworkEntry[]): { analyzed: AnalyzedE
hasLimitParam: hasLimit || qp.some(p => LIMIT_PARAMS.has(p)),
authIndicators: detectAuthFromHeaders(entry.requestHeaders),
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
score: 0,
};
ep.score = scoreEndpoint(ep);
seen.set(key, ep);
}

const analyzed = [...seen.values()].filter(ep => ep.score >= 5).sort((a, b) => b.score - a.score);
// Filter to useful endpoints; deterministic ordering by observable metadata signals
const analyzed = [...seen.values()]
.filter(isUsefulEndpoint)
.sort((a, b) => endpointSortKey(b) - endpointSortKey(a));
return { analyzed, totalCount: seen.size };
}

Expand Down Expand Up @@ -305,7 +318,7 @@ function inferCapabilitiesFromEndpoints(
capabilities.push({
name: capName, description: `${opts.site ?? detectSiteName(opts.url)} ${capName}`,
strategy: storeHint ? 'store-action' : epStrategy,
confidence: Math.min(ep.score / 20, 1.0), endpoint: ep.pattern,
endpoint: ep.pattern,
itemPath: ep.responseAnalysis?.itemPath ?? null,
recommendedColumns: cols.length ? cols : ['title', 'url'],
recommendedArgs: args,
Expand Down Expand Up @@ -337,7 +350,7 @@ async function writeExploreArtifacts(
}, null, 2)),
fs.promises.writeFile(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
contentType: ep.contentType, score: ep.score, queryParams: ep.queryParams,
contentType: ep.contentType, queryParams: ep.queryParams,
itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
})), null, 2)),
Expand Down Expand Up @@ -485,7 +498,7 @@ export function renderExploreSummary(result: ExploreResult): string {
];
for (const cap of (result.capabilities ?? []).slice(0, 5)) {
const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
lines.push(` • ${cap.name} (${cap.strategy}, ${(cap.confidence * 100).toFixed(0)}%)${storeInfo}`);
lines.push(` • ${cap.name} (${cap.strategy})${storeInfo}`);
}
const fw = result.framework ?? {};
const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
Expand Down
7 changes: 3 additions & 4 deletions src/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export interface GenerateCliResult {
};
synthesize: {
candidate_count: number;
candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy' | 'confidence'>>;
candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy'>>;
};
}

Expand Down Expand Up @@ -71,7 +71,7 @@ function normalizeGoal(goal?: string | null): string | null {
*/
function selectCandidate(candidates: SynthesizeResult['candidates'], goal?: string | null): SynthesizeCandidateSummary | null {
if (!candidates.length) return null;
if (!goal) return candidates[0]; // highest confidence first
if (!goal) return candidates[0];

const normalized = normalizeGoal(goal);
if (normalized) {
Expand Down Expand Up @@ -127,7 +127,6 @@ export async function generateCliFromUrl(opts: GenerateCliOptions): Promise<Gene
candidates: (synthesizeResult.candidates ?? []).map((c) => ({
name: c.name,
strategy: c.strategy,
confidence: c.confidence,
})),
},
};
Expand All @@ -150,7 +149,7 @@ export function renderGenerateSummary(r: GenerateCliResult): string {
];

for (const c of r.synthesize?.candidates ?? []) {
lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}%)`);
lines.push(` • ${c.name} (${c.strategy})`);
}

const fw = r.explore?.framework ?? {};
Expand Down
45 changes: 13 additions & 32 deletions src/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ import {
inferStrategy,
detectAuthFromContent,
classifyQueryParams,
applyUrlScoreAdjustments,
scoreArrayResponse,
isNoiseUrl,
} from './analysis.js';

// ── Types ──────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -64,7 +63,6 @@ type RecordedCandidateKind = 'read' | 'write';
export interface RecordedCandidate {
kind: RecordedCandidateKind;
req: RecordedRequest;
score: number;
arrayResult: ReturnType<typeof findArrayPath> | null;
}

Expand All @@ -75,18 +73,11 @@ interface GeneratedRecordedCandidate {
yaml: unknown;
}

/** Keep the stronger candidate when multiple recordings share one bucket. */
function preferRecordedCandidate(current: RecordedCandidate, next: RecordedCandidate): RecordedCandidate {
if (next.score > current.score) return next;
if (next.score < current.score) return current;
/** Keep the later candidate when multiple recordings share one bucket (prefer fresher data). */
function preferRecordedCandidate(_current: RecordedCandidate, next: RecordedCandidate): RecordedCandidate {
return next;
}

/** Apply shared endpoint score tweaks. */
function applyCommonEndpointScoreAdjustments(req: RecordedRequest, score: number): number {
return applyUrlScoreAdjustments(req.url, score);
}

/** Build a candidate-level dedupe key. */
function getRecordedCandidateKey(candidate: RecordedCandidate): string {
return `${candidate.kind} ${getRecordedRequestKey(candidate.req)}`;
Expand Down Expand Up @@ -327,10 +318,6 @@ function generateReadRecordedJs(): string {

// ── Analysis helpers ───────────────────────────────────────────────────────

function scoreRequest(req: RecordedRequest, arrayResult: ReturnType<typeof findArrayPath> | null): number {
return applyCommonEndpointScoreAdjustments(req, scoreArrayResponse(arrayResult));
}

/** Check whether one recorded request is safe to treat as a write candidate. */
function isWriteCandidate(req: RecordedRequest): boolean {
return ['POST', 'PUT', 'PATCH'].includes(req.method)
Expand All @@ -343,24 +330,18 @@ function isWriteCandidate(req: RecordedRequest): boolean {
&& !Array.isArray(req.responseBody);
}

/** Score replayable write requests while keeping tracking and heartbeat traffic suppressed. */
function scoreWriteRequest(req: RecordedRequest): number {
return applyCommonEndpointScoreAdjustments(req, 6);
}

/** Analyze recorded requests into read and write candidates. */
/** Analyze recorded requests into read and write candidates, filtering out noise. */
export function analyzeRecordedRequests(requests: RecordedRequest[]): { candidates: RecordedCandidate[] } {
const candidates: RecordedCandidate[] = [];
for (const req of requests) {
if (isNoiseUrl(req.url)) continue;
const arrayResult = findArrayPath(req.responseBody);
if (isWriteCandidate(req)) {
const score = scoreWriteRequest(req);
if (score > 0) candidates.push({ kind: 'write', req, score, arrayResult: null });
candidates.push({ kind: 'write', req, arrayResult: null });
continue;
}
if (arrayResult) {
const score = scoreRequest(req, arrayResult);
if (score > 0) candidates.push({ kind: 'read', req, score, arrayResult });
candidates.push({ kind: 'read', req, arrayResult });
}
}
return { candidates };
Expand Down Expand Up @@ -532,9 +513,9 @@ export function generateRecordedCandidates(
deduped.set(key, current ? preferRecordedCandidate(current, candidate) : candidate);
}

// Sort reads by array item count (richer data first), then take top 5
const selected = [...deduped.values()]
.filter((candidate) => candidate.kind === 'read' ? candidate.score >= 8 : candidate.score >= 6)
.sort((a, b) => b.score - a.score)
.sort((a, b) => (b.arrayResult?.items.length ?? 0) - (a.arrayResult?.items.length ?? 0))
.slice(0, 5);

const usedNames = new Set<string>();
Expand Down Expand Up @@ -741,14 +722,14 @@ function analyzeAndWrite(
const candidates: RecordResult['candidates'] = [];
const usedNames = new Set<string>();

console.log(chalk.bold('\n Captured endpoints (scored):\n'));
console.log(chalk.bold('\n Captured endpoints:\n'));

for (const entry of analysis.candidates.sort((a, b) => b.score - a.score).slice(0, 8)) {
for (const entry of analysis.candidates.sort((a, b) => (b.arrayResult?.items.length ?? 0) - (a.arrayResult?.items.length ?? 0)).slice(0, 8)) {
const itemCount = entry.arrayResult?.items.length ?? 0;
const strategy = entry.kind === 'write'
? 'cookie'
: inferStrategy(detectAuthFromContent(entry.req.url, entry.req.responseBody));
const marker = entry.score >= 15 ? chalk.green('★') : entry.score >= 8 ? chalk.yellow('◆') : chalk.dim('·');
const marker = entry.kind === 'write' ? chalk.magenta('✎') : itemCount > 5 ? chalk.green('★') : chalk.dim('·');
console.log(
` ${marker} ${chalk.white(urlToPattern(entry.req.url))}` +
chalk.dim(` [${strategy}]`) +
Expand Down Expand Up @@ -777,7 +758,7 @@ function analyzeAndWrite(
}

if (candidates.length === 0) {
console.log(chalk.yellow(' No high-confidence candidates found.'));
console.log(chalk.yellow(' No candidates found.'));
console.log(chalk.dim(' Tip: make sure you triggered JSON API calls (open lists, search, scroll).'));
}

Expand Down
14 changes: 8 additions & 6 deletions src/synthesize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ export interface SynthesizeCapability {
name: string;
description: string;
strategy: string;
confidence?: number;
endpoint?: string;
itemPath?: string | null;
recommendedColumns?: string[];
Expand Down Expand Up @@ -67,7 +66,6 @@ export interface SynthesizeCandidateSummary {
name: string;
path: string;
strategy: string;
confidence?: number;
}

export interface SynthesizeResult {
Expand Down Expand Up @@ -98,7 +96,6 @@ export function synthesizeFromExplore(

const site = bundle.manifest.site;
const capabilities = (bundle.capabilities ?? [])
.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))
.slice(0, opts.top ?? 3);
const candidates: SynthesizeCandidateSummary[] = [];

Expand All @@ -108,7 +105,7 @@ export function synthesizeFromExplore(
const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
const filePath = path.join(targetDir, `${candidate.name}.yaml`);
fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy });
}

const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
Expand All @@ -119,7 +116,7 @@ export function synthesizeFromExplore(

export function renderSynthesizeSummary(result: SynthesizeResult): string {
const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
for (const c of result.candidates ?? []) lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
for (const c of result.candidates ?? []) lines.push(` • ${c.name} (${c.strategy}) → ${c.path}`);
return lines.join('\n');
}

Expand Down Expand Up @@ -147,7 +144,12 @@ function chooseEndpoint(cap: SynthesizeCapability, endpoints: ExploreEndpointArt
const match = endpoints.find((endpoint) => endpoint.pattern === endpointPattern || endpoint.url?.includes(endpointPattern));
if (match) return match;
}
return [...endpoints].sort((a, b) => (b.score ?? 0) - (a.score ?? 0))[0];
// Fallback: prefer endpoint with most data (item count + detected fields)
return [...endpoints].sort((a, b) => {
const aKey = (a.itemCount ?? 0) * 10 + Object.keys(a.detectedFields ?? {}).length;
const bKey = (b.itemCount ?? 0) * 10 + Object.keys(b.detectedFields ?? {}).length;
return bKey - aKey;
})[0];
}

// ── URL templating ─────────────────────────────────────────────────────────
Expand Down