Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions src/sdk/analyze.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { describe, it, expect } from 'vitest';
import { filterOutOfRangeFindings } from './analyze.js';
import type { Finding } from '../types/index.js';

function makeFinding(startLine: number, id = `f-${startLine}`): Finding {
return {
id,
severity: 'medium',
confidence: 'high',
title: `Finding at line ${startLine}`,
description: 'test',
location: { path: 'file.ts', startLine },
};
}

function makeGeneralFinding(id = 'general'): Finding {
return {
id,
severity: 'low',
title: 'General finding',
description: 'no location',
};
}

describe('filterOutOfRangeFindings', () => {
const hunkRange = { start: 10, end: 20 };

it('preserves finding within hunk range', () => {
const findings = [makeFinding(15)];
const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toEqual(findings);
expect(dropped).toEqual([]);
});

it('preserves findings at range boundaries', () => {
const findings = [makeFinding(10, 'at-start'), makeFinding(20, 'at-end')];
const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toHaveLength(2);
expect(dropped).toEqual([]);
});

it('drops finding below hunk start', () => {
const findings = [makeFinding(5)];
const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toEqual([]);
expect(dropped).toEqual(findings);
});

it('drops finding above hunk end', () => {
const findings = [makeFinding(25)];
const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toEqual([]);
expect(dropped).toEqual(findings);
});

it('preserves finding with no location', () => {
const findings = [makeGeneralFinding()];
const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toEqual(findings);
expect(dropped).toEqual([]);
});

it('filters mixed set correctly', () => {
const inRange = makeFinding(15, 'in-range');
const belowRange = makeFinding(3, 'below');
const aboveRange = makeFinding(50, 'above');
const general = makeGeneralFinding('general');
const findings = [inRange, belowRange, aboveRange, general];

const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
expect(filtered).toEqual([inRange, general]);
expect(dropped).toEqual([belowRange, aboveRange]);
});

it('returns empty arrays for empty input', () => {
const { filtered, dropped } = filterOutOfRangeFindings([], hunkRange);
expect(filtered).toEqual([]);
expect(dropped).toEqual([]);
});
});
62 changes: 53 additions & 9 deletions src/sdk/analyze.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { query, type SDKResultMessage } from '@anthropic-ai/claude-agent-sdk';
import type { SkillDefinition } from '../config/schema.js';
import type { Finding, RetryConfig } from '../types/index.js';
import type { HunkWithContext } from '../diff/index.js';
import { getHunkLineRange, type HunkWithContext } from '../diff/index.js';
import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from '../sentry.js';
import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js';
import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js';
Expand Down Expand Up @@ -79,6 +79,33 @@ async function parseHunkOutput(
};
}

/**
* Filter findings whose startLine falls outside the hunk line range.
* Findings without a location are kept (general findings).
*/
export function filterOutOfRangeFindings(
findings: Finding[],
hunkRange: { start: number; end: number }
): { filtered: Finding[]; dropped: Finding[] } {
const filtered: Finding[] = [];
const dropped: Finding[] = [];

function isWithinHunk(finding: Finding): boolean {
if (!finding.location) return true;
const { startLine } = finding.location;
return startLine >= hunkRange.start && startLine <= hunkRange.end;
}

for (const finding of findings) {
if (isWithinHunk(finding)) {
filtered.push(finding);
} else {
dropped.push(finding);
}
}
return { filtered, dropped };
}

/** Buffered data for a single SDK turn, flushed into gen_ai.chat child spans. */
interface TurnData {
toolUses: { id: string; name: string }[];
Expand Down Expand Up @@ -325,7 +352,7 @@ async function analyzeHunk(
callbacks?: HunkAnalysisCallbacks,
prContext?: PRPromptContext
): Promise<HunkAnalysisResult> {
const lineRange = callbacks?.lineRange ?? getHunkLineRange(hunkCtx);
const lineRange = callbacks?.lineRange ?? formatHunkLineRange(hunkCtx);

return Sentry.startSpan(
{
Expand Down Expand Up @@ -419,13 +446,30 @@ async function analyzeHunk(

const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);

// Filter findings outside hunk line range (defense-in-depth)
const hunkRange = getHunkLineRange(hunkCtx.hunk);
const { filtered: filteredFindings, dropped } = filterOutOfRangeFindings(parseResult.findings, hunkRange);
if (dropped.length > 0) {
Sentry.addBreadcrumb({
category: 'finding.out_of_range',
message: `Dropped ${dropped.length} finding(s) outside hunk range ${hunkRange.start}-${hunkRange.end}`,
level: 'warning',
data: {
skill: skill.name,
filename: hunkCtx.filename,
hunkRange,
droppedLines: dropped.map((f) => f.location?.startLine),
},
});
}

// Emit extraction metrics
emitExtractionMetrics(skill.name, parseResult.extractionMethod, parseResult.findings.length);
emitExtractionMetrics(skill.name, parseResult.extractionMethod, filteredFindings.length);

// Notify about extraction result (debug mode)
callbacks?.onExtractionResult?.(
callbacks.lineRange,
parseResult.findings.length,
filteredFindings.length,
parseResult.extractionMethod
);

Expand All @@ -439,10 +483,10 @@ async function analyzeHunk(
}

span.setAttribute('hunk.failed', false);
span.setAttribute('finding.count', parseResult.findings.length);
span.setAttribute('finding.count', filteredFindings.length);

return {
findings: parseResult.findings,
findings: filteredFindings,
usage: aggregateUsage(accumulatedUsage),
failed: false,
extractionFailed: parseResult.extractionFailed,
Expand Down Expand Up @@ -528,9 +572,9 @@ async function analyzeHunk(
}

/**
* Get line range string for a hunk.
* Format a hunk's line range as a display string (e.g. "10-20" or "10").
*/
function getHunkLineRange(hunk: HunkWithContext): string {
function formatHunkLineRange(hunk: HunkWithContext): string {
const start = hunk.hunk.newStart;
const end = start + hunk.hunk.newCount - 1;
return start === end ? `${start}` : `${start}-${end}`;
Expand Down Expand Up @@ -578,7 +622,7 @@ export async function analyzeFile(
for (const [hunkIndex, hunk] of file.hunks.entries()) {
if (abortController?.signal.aborted) break;

const lineRange = getHunkLineRange(hunk);
const lineRange = formatHunkLineRange(hunk);
callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);

const hunkCallbacks: HunkAnalysisCallbacks | undefined = callbacks
Expand Down
5 changes: 4 additions & 1 deletion src/sdk/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ You are a code analysis agent for Warden. You evaluate code changes against spec
You have access to these tools to gather context:
- **Read**: Check related files to understand context
- **Grep**: Search for patterns to trace data flow or find related code

Use these tools to gather context that helps you evaluate changes within the hunk. All findings must still reference lines within the hunk being analyzed.
</tools>`,

`<skill_instructions>
Expand Down Expand Up @@ -76,12 +78,13 @@ Requirements:
- Return ONLY valid JSON starting with {"findings":
- "findings" array can be empty if no issues found
- "location.path" is auto-filled from context - just provide startLine (and optionally endLine). Omit location entirely for general findings not about a specific line.
- "location.startLine" MUST be within the hunk line range (shown in the "## Hunk" header). If the issue originates in surrounding code, anchor to the nearest changed line in the hunk and note the actual location in the description.
- "confidence" reflects how certain you are this is a real issue given the codebase context
- "suggestedFix" is optional - only include when you can provide a complete, correct fix **to the file being analyzed**. Omit suggestedFix if:
- The fix would be incomplete or you're uncertain about the correct solution
- The fix requires changes to a different file or a new file (describe the fix in the description field instead)
- Keep descriptions SHORT (1-2 sentences max) - avoid lengthy explanations
- Be concise - focus only on the changes shown
- Focus your analysis on the code changes in the hunk. Surrounding context and tool results are for understanding only -- all findings must reference lines within the hunk range.
</output_format>`,
];

Expand Down