Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ docs/.vitepress/cache
# Database files
*.db
autoresearch/results/
autoresearch-results.tsv
2 changes: 1 addition & 1 deletion autoresearch/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ async function modify(ctx: ModifyContext, config: AutoResearchConfig): Promise<s
`claude -p --dangerously-skip-permissions --allowedTools "Bash(npm:*),Bash(npx:*),Bash(git:*),Read,Edit,Write,Glob,Grep" --output-format text --no-session-persistence "${prompt.replace(/"/g, '\\"')}"`,
{
cwd: ROOT,
timeout: 180_000,
timeout: 300_000,
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
env: process.env,
Expand Down
249 changes: 249 additions & 0 deletions autoresearch/eval-save.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
#!/usr/bin/env npx tsx
/**
* Layer 4: Save as CLI Testing — "Save as CLI" Pipeline
*
* Tests the full operate init → write adapter → operate verify flow.
* Validates that browser exploration can be crystallized into reusable CLI adapters.
*
* Usage:
* npx tsx autoresearch/eval-save.ts # Run all tasks
* npx tsx autoresearch/eval-save.ts --task hn-top # Run single task
*/

import { execSync } from 'node:child_process';
import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync, rmSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { homedir } from 'node:os';

const __dirname = dirname(fileURLToPath(import.meta.url));
const TASKS_FILE = join(__dirname, 'save-tasks.json');
const RESULTS_DIR = join(__dirname, 'results');
const USER_CLIS_DIR = join(homedir(), '.opencli', 'clis');

interface SaveTask {
name: string;
site: string;
command: string;
/** Inline adapter code (simple tasks) */
adapter?: string;
/** Path to adapter file relative to autoresearch/ dir (complex tasks — avoids JSON escape issues) */
adapterFile?: string;
judge: JudgeCriteria;
set?: 'test';
note?: string;
}

type JudgeCriteria =
| { type: 'contains'; value: string }
| { type: 'arrayMinLength'; minLength: number }
| { type: 'nonEmpty' }
| { type: 'matchesPattern'; pattern: string };

interface TaskResult {
name: string;
phase: 'init' | 'write' | 'verify' | 'judge';
passed: boolean;
duration: number;
error?: string;
set: 'train' | 'test';
}

function judge(criteria: JudgeCriteria, output: string): boolean {
try {
switch (criteria.type) {
case 'contains':
return output.toLowerCase().includes(criteria.value.toLowerCase());
case 'arrayMinLength': {
// operate verify outputs table text; try JSON parse first, then count non-empty lines
try {
const arr = JSON.parse(output);
if (Array.isArray(arr)) return arr.length >= criteria.minLength;
} catch { /* not JSON — try line counting */ }
// Table output: count data rows (skip header, separator, empty lines)
const lines = output.split('\n').filter(l => l.trim() && !l.startsWith('─') && !l.startsWith('┌') && !l.startsWith('└') && !l.startsWith('├'));
// Subtract header row
const dataLines = lines.length > 1 ? lines.length - 1 : 0;
return dataLines >= criteria.minLength;
}
case 'nonEmpty':
return output.trim().length > 0 && output.trim() !== 'null' && output.trim() !== 'undefined';
case 'matchesPattern':
return new RegExp(criteria.pattern).test(output);
default:
return false;
}
} catch {
return false;
}
}

const PROJECT_ROOT = join(__dirname, '..');

/** Run a command, using local dist/main.js instead of global opencli for consistency */
function runCommand(cmd: string, timeout = 30000): string {
// Use local build so tests always run against the current source
const localCmd = cmd.replace(/^opencli /, `node dist/main.js `);
try {
return execSync(localCmd, {
cwd: PROJECT_ROOT,
timeout,
encoding: 'utf-8',
env: process.env,
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
} catch (err: any) {
return err.stdout?.trim() || err.stderr?.trim() || '';
}
}

function cleanupAdapter(site: string, command: string): void {
const siteDir = join(USER_CLIS_DIR, site);
const filePath = join(siteDir, `${command}.ts`);
try {
if (existsSync(filePath)) rmSync(filePath);
// Remove site dir if empty
if (existsSync(siteDir)) {
const remaining = readdirSync(siteDir);
if (remaining.length === 0) rmSync(siteDir, { recursive: true });
}
} catch { /* best effort */ }
}

function runTask(task: SaveTask): TaskResult {
const start = Date.now();
const { site, command } = task;
const adapterDir = join(USER_CLIS_DIR, site);
const adapterPath = join(adapterDir, `${command}.ts`);

// Cleanup any leftover from previous runs
cleanupAdapter(site, command);

try {
// Phase 1: init — create scaffold
const initOutput = runCommand(`opencli operate init ${site}/${command}`);
if (!existsSync(adapterPath)) {
return {
name: task.name, phase: 'init', passed: false,
duration: Date.now() - start,
error: `init failed: file not created. Output: ${initOutput.slice(0, 100)}`,
set: task.set === 'test' ? 'test' : 'train',
};
}

// Phase 2: write — overwrite scaffold with real adapter code
if (task.adapterFile) {
// Read from file (complex adapters — avoids JSON string escape issues)
const srcPath = join(__dirname, task.adapterFile);
const code = readFileSync(srcPath, 'utf-8');
writeFileSync(adapterPath, code, 'utf-8');
} else if (task.adapter) {
writeFileSync(adapterPath, task.adapter, 'utf-8');
}

// Phase 3: verify — run the adapter via operate verify
const verifyOutput = runCommand(
`opencli operate verify ${site}/${command}`,
45000, // longer timeout for network calls
);

if (verifyOutput.includes('✗ Adapter failed')) {
return {
name: task.name, phase: 'verify', passed: false,
duration: Date.now() - start,
error: `verify failed: ${verifyOutput.slice(0, 200)}`,
set: task.set === 'test' ? 'test' : 'train',
};
}

// Phase 4: judge — check output quality
const passed = judge(task.judge, verifyOutput);

return {
name: task.name,
phase: 'judge',
passed,
duration: Date.now() - start,
error: passed ? undefined : `Judge failed on output: ${verifyOutput.slice(0, 150)}`,
set: task.set === 'test' ? 'test' : 'train',
};
} catch (err: any) {
return {
name: task.name, phase: 'verify', passed: false,
duration: Date.now() - start,
error: err.message?.slice(0, 150),
set: task.set === 'test' ? 'test' : 'train',
};
} finally {
// Always cleanup test adapters
cleanupAdapter(site, command);
}
}

function main() {
const args = process.argv.slice(2);
const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null;

const allTasks: SaveTask[] = JSON.parse(readFileSync(TASKS_FILE, 'utf-8'));
const tasks = singleTask ? allTasks.filter(t => t.name === singleTask) : allTasks;

if (tasks.length === 0) {
console.error(`Task "${singleTask}" not found.`);
process.exit(1);
}

console.log(`\n🧪 Layer 4: Save as CLI — ${tasks.length} tasks\n`);

const results: TaskResult[] = [];

for (let i = 0; i < tasks.length; i++) {
const task = tasks[i];
process.stdout.write(` [${i + 1}/${tasks.length}] ${task.name}...`);

const result = runTask(task);
results.push(result);

const icon = result.passed ? '✓' : '✗';
const phase = result.passed ? '' : ` (${result.phase})`;
console.log(` ${icon}${phase} (${(result.duration / 1000).toFixed(1)}s)`);
}

// Summary
const trainResults = results.filter(r => r.set === 'train');
const testResults = results.filter(r => r.set === 'test');
const totalPassed = results.filter(r => r.passed).length;
const trainPassed = trainResults.filter(r => r.passed).length;
const testPassed = testResults.filter(r => r.passed).length;
const totalDuration = results.reduce((s, r) => s + r.duration, 0);

console.log(`\n${'─'.repeat(50)}`);
console.log(` Score: ${totalPassed}/${results.length} (train: ${trainPassed}/${trainResults.length}, test: ${testPassed}/${testResults.length})`);
console.log(` Time: ${Math.round(totalDuration / 1000)}s`);

const failures = results.filter(r => !r.passed);
if (failures.length > 0) {
console.log(`\n Failures:`);
for (const f of failures) {
console.log(` ✗ ${f.name} [${f.phase}]: ${f.error ?? 'unknown'}`);
}
}
console.log('');

// Save result
mkdirSync(RESULTS_DIR, { recursive: true });
const existing = readdirSync(RESULTS_DIR).filter(f => f.startsWith('save-')).length;
const roundNum = String(existing + 1).padStart(3, '0');
const resultPath = join(RESULTS_DIR, `save-${roundNum}.json`);
writeFileSync(resultPath, JSON.stringify({
timestamp: new Date().toISOString(),
score: `${totalPassed}/${results.length}`,
trainScore: `${trainPassed}/${trainResults.length}`,
testScore: `${testPassed}/${testResults.length}`,
duration: `${Math.round(totalDuration / 1000)}s`,
tasks: results,
}, null, 2), 'utf-8');
console.log(` Results saved to: ${resultPath}`);
console.log(`\nSCORE=${totalPassed}/${results.length}`);
}

main();
3 changes: 3 additions & 0 deletions autoresearch/presets/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ export { skillQuality } from './skill-quality.js';
export { v2exReliability } from './v2ex-reliability.js';
export { zhihuReliability } from './zhihu-reliability.js';
export { combinedReliability } from './combined-reliability.js';
export { saveReliability } from './save-reliability.js';

import type { AutoResearchConfig } from '../config.js';
import { operateReliability } from './operate-reliability.js';
import { skillQuality } from './skill-quality.js';
import { v2exReliability } from './v2ex-reliability.js';
import { zhihuReliability } from './zhihu-reliability.js';
import { combinedReliability } from './combined-reliability.js';
import { saveReliability } from './save-reliability.js';

export const PRESETS: Record<string, AutoResearchConfig> = {
'operate-reliability': operateReliability,
'skill-quality': skillQuality,
'v2ex-reliability': v2exReliability,
'zhihu-reliability': zhihuReliability,
'combined': combinedReliability,
'save-reliability': saveReliability,
};
26 changes: 26 additions & 0 deletions autoresearch/presets/save-reliability.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/**
* Preset: Save as CLI Reliability
*
* Optimizes the "Save as CLI" pipeline: operate init → write adapter → run.
* Covers PUBLIC (no auth) and COOKIE (browser session) strategies.
* Metric: number of passing save-tasks.
*/

import type { AutoResearchConfig } from '../config.js';

export const saveReliability: AutoResearchConfig = {
goal: 'Increase "Save as CLI" pipeline pass rate to 100%. The flow is: operate init creates a scaffold, user writes adapter code, opencli discovers and runs it. Covers both PUBLIC (fetch API) and COOKIE (browser session) strategies. Focus on: init template correctness, user CLI discovery, adapter loading, verify command robustness, and browser session handling.',
scope: [
'src/cli.ts',
'src/discovery.ts',
'src/registry.ts',
'skills/opencli-operate/SKILL.md',
'autoresearch/save-tasks.json',
'autoresearch/save-adapters/*.ts',
],
metric: 'pass_count',
direction: 'higher',
verify: 'npx tsx autoresearch/eval-save.ts 2>&1 | tail -1',
guard: 'npm run build',
minDelta: 1,
};
11 changes: 11 additions & 0 deletions autoresearch/run-save.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Layer 4: Save as CLI — test the full save pipeline
# Tests: operate init → write adapter → operate verify
set -euo pipefail
cd "$(dirname "$0")/.."

echo "=== Layer 4: Save as CLI ==="
echo "Testing: init → write → verify pipeline"
echo ""

npx tsx autoresearch/eval-save.ts "$@"
64 changes: 64 additions & 0 deletions autoresearch/save-adapters/xhs-explore-deep.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { cli, Strategy } from '@jackwener/opencli/registry';

cli({
site: 'test-xhs',
name: 'explore-deep',
description: '小红书探索页深度提取 + 去重 + 按互动排序',
domain: 'www.xiaohongshu.com',
strategy: Strategy.COOKIE,
browser: true,
args: [
{ name: 'limit', type: 'int', default: 15, help: 'Number of items' },
],
columns: ['rank', 'title', 'author', 'likes', 'url'],
func: async (page, kwargs) => {
const limit = kwargs.limit ?? 15;
// Step 1: Navigate to explore page
await page.goto('https://www.xiaohongshu.com/explore');
// Step 2: Wait for initial content via MutationObserver
await page.evaluate(`new Promise(function(resolve) {
var check = function() { return document.querySelectorAll('section.note-item').length > 0; };
if (check()) return resolve(true);
var observer = new MutationObserver(function(m, obs) { if (check()) { obs.disconnect(); resolve(true); } });
observer.observe(document.body, { childList: true, subtree: true });
setTimeout(function() { observer.disconnect(); resolve(false); }, 8000);
})`);
// Step 3: Multi-round adaptive scroll (early stop when no new content)
let prevCount = 0;
for (let round = 0; round < 5; round++) {
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.wait(1.5);
const count = await page.evaluate('document.querySelectorAll("section.note-item").length') as number;
if (count >= limit * 2 || count === prevCount) break;
prevCount = count;
}
// Step 4: Extract with noteId deduplication + parse likes as integers
const result = await page.evaluate(`(function() {
var seen = {};
var items = [];
document.querySelectorAll('section.note-item').forEach(function(el) {
var linkEl = el.querySelector('a[href]');
var href = linkEl ? linkEl.getAttribute('href') || '' : '';
var m = href.match(/explore\\/([a-f0-9]+)/);
var noteId = m ? m[1] : '';
if (!noteId || seen[noteId]) return;
seen[noteId] = true;
var titleEl = el.querySelector('.title span') || el.querySelector('a.title');
var authorEl = el.querySelector('.author-wrapper .name') || el.querySelector('.author .name');
var likesEl = el.querySelector('.like-wrapper .count') || el.querySelector('.interact-container .count');
var title = (titleEl ? titleEl.textContent || '' : '').trim();
var author = (authorEl ? authorEl.textContent || '' : '').trim();
var likesRaw = (likesEl ? likesEl.textContent || '0' : '0').trim();
var likes = parseInt(likesRaw.replace(/[^0-9]/g, '')) || 0;
items.push({ title: title, author: author, likes: likes, url: 'https://www.xiaohongshu.com/explore/' + noteId });
});
return items;
})()`);
// Step 5: Sort by likes descending
const sorted = (result as any[] || []).sort((a: any, b: any) => b.likes - a.likes);
// Step 6: Slice and format
return sorted.slice(0, limit).map((item: any, i: number) => ({
rank: i + 1, title: item.title, author: item.author, likes: String(item.likes), url: item.url,
}));
},
});
Loading