jackwener · jackwener · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/autoresearch/command-specs.json b/autoresearch/command-specs.json
@@ -0,0 +1,48 @@
+{
+  "version": 1,
+  "kind": "command_incident",
+  "specs": [
+    {
+      "name": "weibo-hot-smoke",
+      "command": "opencli weibo hot --limit 5 -f json",
+      "safety": "read_only",
+      "prerequisites": {},
+      "verify": [
+        { "type": "exitCode", "expected": 0 },
+        { "type": "stdoutContains", "value": "[" },
+        { "type": "jsonField", "path": "[0].title", "matcher": "nonEmpty" }
+      ],
+      "repairScope": ["clis/weibo/**/*.ts", "clis/weibo/**/*.yaml"],
+      "forbidden": ["autoresearch/**", "tests/**"]
+    },
+    {
+      "name": "xiaohongshu-search-smoke",
+      "command": "opencli xiaohongshu search 美食 --limit 3 -f json",
+      "safety": "read_only",
+      "prerequisites": {},
+      "verify": [
+        { "type": "exitCode", "expected": 0 },
+        { "type": "stdoutContains", "value": "[" },
+        { "type": "jsonField", "path": "length", "matcher": "gte", "value": "1" }
+      ],
+      "repairScope": ["clis/xiaohongshu/**/*.ts", "clis/xiaohongshu/**/*.yaml"],
+      "forbidden": ["autoresearch/**", "tests/**"]
+    },
+    {
+      "name": "twitter-reply-fill-smoke",
+      "command": "opencli twitter reply https://x.com/elonmusk/status/1909633658498916828 'opencli smoke test'",
+      "safety": "fill_only",
+      "prerequisites": {
+        "auth": ["twitter"]
+      },
+      "verify": [
+        { "type": "exitCode", "expected": 0 },
+        { "type": "stdoutContains", "value": "dry_run" },
+        { "type": "jsonField", "path": "[0].status", "matcher": "contains", "value": "dry_run" }
+      ],
+      "cleanup": [],
+      "repairScope": ["clis/twitter/reply.ts", "clis/twitter/*.yaml"],
+      "forbidden": ["autoresearch/**", "tests/**"]
+    }
+  ]
+}
diff --git a/autoresearch/commands/fix.ts b/autoresearch/commands/fix.ts
@@ -15,7 +15,9 @@
 import { execSync } from 'node:child_process';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { readFileSync } from 'node:fs';
 import { parseArgs } from '../config.js';
+import type { CommandSpecsFile } from '../config.js';
 import { Engine, type ModifyContext } from '../engine.js';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -77,10 +79,143 @@ function detectBrokenState(): { verify: string; errors: number; description: str
   return null; // all clean
 }
 
+/** Build incident-mode config for a specific command spec */
+function buildIncidentConfig(specName: string, maxIterations: number) {
+  const specsFile: CommandSpecsFile = JSON.parse(
+    readFileSync(join(__dirname, '..', 'command-specs.json'), 'utf-8')
+  );
+  const spec = specsFile.specs.find(s => s.name === specName);
+  if (!spec) {
+    console.error(`Spec "${specName}" not found in command-specs.json`);
+    process.exit(1);
+  }
+
+  // Use REGRESSIONS=N (direction: lower, goal: 0) instead of SCORE=X/Y.
+  // This ensures infra/precondition failures don't pollute the metric.
+  // grep for REGRESSIONS= to extract only the regression count line.
+  return {
+    config: {
+      goal: `Fix command regression: ${spec.command}`,
+      scope: [...spec.repairScope, 'src/**/*.ts'],
+      metric: 'regression_count',
+      direction: 'lower' as const,
+      verify: `npx tsx autoresearch/eval-cli.ts --spec ${specName} 2>&1 | grep "^REGRESSIONS=" | tail -1`,
+      guard: 'npm run build && npm test',
+      iterations: maxIterations,
+      minDelta: 1,
+    },
+    spec,
+  };
+}
+
+function buildIncidentPrompt(specName: string, ctx: ModifyContext): string {
+  const specsFile: CommandSpecsFile = JSON.parse(
+    readFileSync(join(__dirname, '..', 'command-specs.json'), 'utf-8')
+  );
+  const spec = specsFile.specs.find(s => s.name === specName);
+  if (!spec) return 'Fix the failing command.';
+
+  const forbidden = spec.forbidden.length > 0
+    ? `Do NOT modify: ${spec.forbidden.join(', ')}`
+    : '';
+
+  return `Command \`${spec.command}\` is failing (regression).
+
+Current regression count: ${ctx.currentMetric}. Goal: 0 regressions.
+
+The command implementation is at: ${spec.repairScope.join(', ')}
+Read the adapter code, understand why the command fails against the live site, and fix it.
+
+Common causes:
+- Site updated DOM selectors
+- URL pattern changed
+- Response format changed
+- Auth/cookie handling broke
+
+${forbidden}
+Fix ONE issue at a time.
+
+${ctx.stuckHint ? `STUCK HINT: ${ctx.stuckHint}` : ''}`;
+}
+
 async function main() {
   const args = parseArgs(process.argv.slice(2));
   const maxIterations = args.iterations ?? 20;
+  const mode = args.mode ?? 'repo';
+  const specName = args.spec;
+
+  if (mode === 'incident') {
+    if (!specName) {
+      console.error('Incident mode requires --spec <name>');
+      process.exit(1);
+    }
+
+    console.log(`\n🔧 AutoResearch Fix — Incident Mode: ${specName}\n`);
+
+    // Pre-flight: run eval-cli once to check if spec has actual regressions
+    const preflight = exec(`npx tsx autoresearch/eval-cli.ts --spec ${specName} 2>&1`);
+    const regressionsMatch = preflight.output.match(/REGRESSIONS=(\d+)/);
+    const regressionCount = regressionsMatch ? parseInt(regressionsMatch[1], 10) : 0;
+
+    if (regressionCount === 0) {
+      // Check if it's because of infra/precondition (exit code 2) or actually passing
+      if (preflight.output.includes('failed_infrastructure')) {
+        console.log('  ⚡ Cannot run: infrastructure failure (browser bridge not connected?)');
+        console.log('  Fix the infrastructure issue first, then retry.\n');
+        process.exit(1);
+      }
+      if (preflight.output.includes('failed_precondition')) {
+        console.log('  ⊘ Cannot run: prerequisite not met (auth/env missing?)');
+        console.log('  Ensure prerequisites are satisfied, then retry.\n');
+        process.exit(1);
+      }
+      console.log('  ✓ Spec already passing — nothing to fix!\n');
+      return;
+    }
+
+    console.log(`  Found: ${regressionCount} regression(s)`);
+
+    const { config } = buildIncidentConfig(specName, maxIterations);
+
+    console.log(`  Command spec: ${specName}`);
+    console.log(`  Verify: ${config.verify}`);
+    console.log(`  Scope: ${config.scope.join(', ')}\n`);
+
+    const logPath = join(ROOT, 'autoresearch-results.tsv');
+    const engine = new Engine(config, logPath, {
+      modify: async (ctx: ModifyContext) => {
+        const prompt = buildIncidentPrompt(specName, ctx);
+        try {
+          // Pass prompt via stdin to avoid shell metacharacter expansion
+          const result = execSync(
+            'claude -p --dangerously-skip-permissions --allowedTools "Bash(npm:*),Bash(npx:*),Read,Edit,Write,Glob,Grep" --output-format text --no-session-persistence',
+            { cwd: ROOT, timeout: 180_000, encoding: 'utf-8', input: prompt, stdio: ['pipe', 'pipe', 'pipe'] }
+          ).trim();
+          const lines = result.split('\n').filter(l => l.trim());
+          return lines[lines.length - 1]?.trim()?.slice(0, 120) || 'incident fix attempt';
+        } catch {
+          return null;
+        }
+      },
+      onStatus: (msg) => console.log(msg),
+    });
+
+    try {
+      const results = await engine.run();
+      const finalMetric = results[results.length - 1]?.metric ?? regressionCount;
+      if (finalMetric === 0) {
+        console.log(`\n✅ Command spec "${specName}" — all regressions fixed!\n`);
+      } else {
+        console.log(`\n⚠ Command spec "${specName}" — ${finalMetric} regression(s) remaining after ${maxIterations} iterations.\n`);
+      }
+    } catch (err: any) {
+      console.error(`\n❌ ${err.message}`);
+      process.exit(1);
+    }
+    return;
+  }
 
+  // ── Repo mode (default, existing behavior) ──
   console.log('\n🔧 AutoResearch Fix — Detecting broken state...\n');
 
   const broken = detectBrokenState();

diff --git a/autoresearch/config.ts b/autoresearch/config.ts
@@ -43,8 +43,8 @@ export interface IterationResult {
 }
 
 /** Parse CLI args into a partial config (missing fields filled by preset or prompts) */
-export function parseArgs(argv: string[]): Partial<AutoResearchConfig> & { preset?: string; task?: string } {
-  const config: Partial<AutoResearchConfig> & { preset?: string; task?: string } = {};
+export function parseArgs(argv: string[]): Partial<AutoResearchConfig> & { preset?: string; task?: string; mode?: string; spec?: string } {
+  const config: Partial<AutoResearchConfig> & { preset?: string; task?: string; mode?: string; spec?: string } = {};
   for (let i = 0; i < argv.length; i++) {
     const arg = argv[i];
     const next = argv[i + 1];
@@ -59,17 +59,69 @@ export function parseArgs(argv: string[]): Partial<AutoResearchConfig> & { prese
       case '--iterations': config.iterations = parseInt(next, 10); i++; break;
       case '--min-delta': config.minDelta = parseFloat(next); i++; break;
       case '--task': config.task = next; i++; break;
+      case '--mode': config.mode = next; i++; break;
+      case '--spec': config.spec = next; i++; break;
     }
   }
   return config;
 }
 
+/* ── Command Incident Spec (v1) ── */
+
+export type VerifyCheck =
+  | { type: 'exitCode'; expected: number }
+  | { type: 'stdoutContains'; value: string }
+  | { type: 'jsonField'; path: string; matcher: 'nonEmpty' | 'contains' | 'gte' | 'matches'; value?: string }
+  | { type: 'pageEval'; js: string; matcher: 'contains' | 'truthy' | 'equals'; value?: string };
+
+export interface CommandIncidentSpec {
+  name: string;
+  command: string;
+  safety: 'read_only' | 'fill_only' | 'publish';
+  prerequisites?: {
+    auth?: string[];
+    env?: Record<string, string>;
+    browserProfile?: string;
+  };
+  setup?: string[];
+  verify: VerifyCheck[];
+  cleanup?: string[];
+  repairScope: string[];
+  forbidden: string[];
+}
+
+export interface CommandSpecsFile {
+  version: number;
+  kind: 'command_incident';
+  specs: CommandIncidentSpec[];
+}
+
+export type SpecClassification =
+  | 'passed'
+  | 'failed_regression'
+  | 'failed_precondition'
+  | 'failed_infrastructure'
+  | 'skipped';
+
+export interface SpecResult {
+  name: string;
+  classification: SpecClassification;
+  duration: number;
+  failedChecks?: string[];
+  stdout?: string;
+  stderr?: string;
+  exitCode?: number;
+}
+
 /** Extract a number from command output using common patterns */
 export function extractMetric(output: string): number | null {
   // Try: last line that looks like a number
   const lines = output.trim().split('\n');
   for (let i = lines.length - 1; i >= 0; i--) {
     const line = lines[i].trim();
+    // Match REGRESSIONS=N → N (for incident mode)
+    const regMatch = line.match(/REGRESSIONS[=:]\s*(\d+)/i);
+    if (regMatch) return parseFloat(regMatch[1]);
     // Match standalone numbers: "56", "95.2", "SCORE=56/59" → 56
     const scoreMatch = line.match(/SCORE[=:]\s*(\d+)/i);
     if (scoreMatch) return parseFloat(scoreMatch[1]);