getsentry · dcramer · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/src/sdk/analyze.test.ts b/src/sdk/analyze.test.ts
@@ -0,0 +1,80 @@
+import { describe, it, expect } from 'vitest';
+import { filterOutOfRangeFindings } from './analyze.js';
+import type { Finding } from '../types/index.js';
+
+function makeFinding(startLine: number, id = `f-${startLine}`): Finding {
+  return {
+    id,
+    severity: 'medium',
+    confidence: 'high',
+    title: `Finding at line ${startLine}`,
+    description: 'test',
+    location: { path: 'file.ts', startLine },
+  };
+}
+
+function makeGeneralFinding(id = 'general'): Finding {
+  return {
+    id,
+    severity: 'low',
+    title: 'General finding',
+    description: 'no location',
+  };
+}
+
+describe('filterOutOfRangeFindings', () => {
+  const hunkRange = { start: 10, end: 20 };
+
+  it('preserves finding within hunk range', () => {
+    const findings = [makeFinding(15)];
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toEqual(findings);
+    expect(dropped).toEqual([]);
+  });
+
+  it('preserves findings at range boundaries', () => {
+    const findings = [makeFinding(10, 'at-start'), makeFinding(20, 'at-end')];
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toHaveLength(2);
+    expect(dropped).toEqual([]);
+  });
+
+  it('drops finding below hunk start', () => {
+    const findings = [makeFinding(5)];
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toEqual([]);
+    expect(dropped).toEqual(findings);
+  });
+
+  it('drops finding above hunk end', () => {
+    const findings = [makeFinding(25)];
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toEqual([]);
+    expect(dropped).toEqual(findings);
+  });
+
+  it('preserves finding with no location', () => {
+    const findings = [makeGeneralFinding()];
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toEqual(findings);
+    expect(dropped).toEqual([]);
+  });
+
+  it('filters mixed set correctly', () => {
+    const inRange = makeFinding(15, 'in-range');
+    const belowRange = makeFinding(3, 'below');
+    const aboveRange = makeFinding(50, 'above');
+    const general = makeGeneralFinding('general');
+    const findings = [inRange, belowRange, aboveRange, general];
+
+    const { filtered, dropped } = filterOutOfRangeFindings(findings, hunkRange);
+    expect(filtered).toEqual([inRange, general]);
+    expect(dropped).toEqual([belowRange, aboveRange]);
+  });
+
+  it('returns empty arrays for empty input', () => {
+    const { filtered, dropped } = filterOutOfRangeFindings([], hunkRange);
+    expect(filtered).toEqual([]);
+    expect(dropped).toEqual([]);
+  });
+});
diff --git a/src/sdk/analyze.ts b/src/sdk/analyze.ts
@@ -1,7 +1,7 @@
 import { query, type SDKResultMessage } from '@anthropic-ai/claude-agent-sdk';
 import type { SkillDefinition } from '../config/schema.js';
 import type { Finding, RetryConfig } from '../types/index.js';
-import type { HunkWithContext } from '../diff/index.js';
+import { getHunkLineRange, type HunkWithContext } from '../diff/index.js';
 import { Sentry, emitExtractionMetrics, emitRetryMetric, emitDedupMetrics } from '../sentry.js';
 import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage } from './errors.js';
 import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js';
@@ -79,6 +79,33 @@ async function parseHunkOutput(
   };
 }
 
+/**
+ * Filter findings whose startLine falls outside the hunk line range.
+ * Findings without a location are kept (general findings).
+ */
+export function filterOutOfRangeFindings(
+  findings: Finding[],
+  hunkRange: { start: number; end: number }
+): { filtered: Finding[]; dropped: Finding[] } {
+  const filtered: Finding[] = [];
+  const dropped: Finding[] = [];
+
+  function isWithinHunk(finding: Finding): boolean {
+    if (!finding.location) return true;
+    const { startLine } = finding.location;
+    return startLine >= hunkRange.start && startLine <= hunkRange.end;
+  }
+
+  for (const finding of findings) {
+    if (isWithinHunk(finding)) {
+      filtered.push(finding);
+    } else {
+      dropped.push(finding);
+    }
+  }
+  return { filtered, dropped };
+}
+
 /** Buffered data for a single SDK turn, flushed into gen_ai.chat child spans. */
 interface TurnData {
   toolUses: { id: string; name: string }[];
@@ -325,7 +352,7 @@ async function analyzeHunk(
   callbacks?: HunkAnalysisCallbacks,
   prContext?: PRPromptContext
 ): Promise<HunkAnalysisResult> {
-  const lineRange = callbacks?.lineRange ?? getHunkLineRange(hunkCtx);
+  const lineRange = callbacks?.lineRange ?? formatHunkLineRange(hunkCtx);
 
   return Sentry.startSpan(
     {
@@ -419,13 +446,30 @@ async function analyzeHunk(
 
           const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, apiKey);
 
+          // Filter findings outside hunk line range (defense-in-depth)
+          const hunkRange = getHunkLineRange(hunkCtx.hunk);
+          const { filtered: filteredFindings, dropped } = filterOutOfRangeFindings(parseResult.findings, hunkRange);
+          if (dropped.length > 0) {
+            Sentry.addBreadcrumb({
+              category: 'finding.out_of_range',
+              message: `Dropped ${dropped.length} finding(s) outside hunk range ${hunkRange.start}-${hunkRange.end}`,
+              level: 'warning',
+              data: {
+                skill: skill.name,
+                filename: hunkCtx.filename,
+                hunkRange,
+                droppedLines: dropped.map((f) => f.location?.startLine),
+              },
+            });
+          }
+
           // Emit extraction metrics
-          emitExtractionMetrics(skill.name, parseResult.extractionMethod, parseResult.findings.length);
+          emitExtractionMetrics(skill.name, parseResult.extractionMethod, filteredFindings.length);
 
           // Notify about extraction result (debug mode)
           callbacks?.onExtractionResult?.(
             callbacks.lineRange,
-            parseResult.findings.length,
+            filteredFindings.length,
             parseResult.extractionMethod
           );
 
@@ -439,10 +483,10 @@ async function analyzeHunk(
           }
 
           span.setAttribute('hunk.failed', false);
-          span.setAttribute('finding.count', parseResult.findings.length);
+          span.setAttribute('finding.count', filteredFindings.length);
 
           return {
-            findings: parseResult.findings,
+            findings: filteredFindings,
             usage: aggregateUsage(accumulatedUsage),
             failed: false,
             extractionFailed: parseResult.extractionFailed,
@@ -528,9 +572,9 @@ async function analyzeHunk(
 }
 
 /**
- * Get line range string for a hunk.
+ * Format a hunk's line range as a display string (e.g. "10-20" or "10").
  */
-function getHunkLineRange(hunk: HunkWithContext): string {
+function formatHunkLineRange(hunk: HunkWithContext): string {
   const start = hunk.hunk.newStart;
   const end = start + hunk.hunk.newCount - 1;
   return start === end ? `${start}` : `${start}-${end}`;
@@ -578,7 +622,7 @@ export async function analyzeFile(
       for (const [hunkIndex, hunk] of file.hunks.entries()) {
         if (abortController?.signal.aborted) break;
 
-        const lineRange = getHunkLineRange(hunk);
+        const lineRange = formatHunkLineRange(hunk);
         callbacks?.onHunkStart?.(hunkIndex + 1, file.hunks.length, lineRange);
 
         const hunkCallbacks: HunkAnalysisCallbacks | undefined = callbacks

diff --git a/src/sdk/prompt.ts b/src/sdk/prompt.ts
@@ -36,6 +36,8 @@ You are a code analysis agent for Warden. You evaluate code changes against spec
 You have access to these tools to gather context:
 - **Read**: Check related files to understand context
 - **Grep**: Search for patterns to trace data flow or find related code
+
+Use these tools to gather context that helps you evaluate changes within the hunk. All findings must still reference lines within the hunk being analyzed.
 </tools>`,
 
     `<skill_instructions>
@@ -76,12 +78,13 @@ Requirements:
 - Return ONLY valid JSON starting with {"findings":
 - "findings" array can be empty if no issues found
 - "location.path" is auto-filled from context - just provide startLine (and optionally endLine). Omit location entirely for general findings not about a specific line.
+- "location.startLine" MUST be within the hunk line range (shown in the "## Hunk" header). If the issue originates in surrounding code, anchor to the nearest changed line in the hunk and note the actual location in the description.
 - "confidence" reflects how certain you are this is a real issue given the codebase context
 - "suggestedFix" is optional - only include when you can provide a complete, correct fix **to the file being analyzed**. Omit suggestedFix if:
   - The fix would be incomplete or you're uncertain about the correct solution
   - The fix requires changes to a different file or a new file (describe the fix in the description field instead)
 - Keep descriptions SHORT (1-2 sentences max) - avoid lengthy explanations
-- Be concise - focus only on the changes shown
+- Focus your analysis on the code changes in the hunk. Surrounding context and tool results are for understanding only -- all findings must reference lines within the hunk range.
 </output_format>`,
   ];