Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .changeset/fix-thinking-block-handling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
"@centralinc/browseragent": patch
---

Fix thinking block handling to prevent 400 errors when using extended thinking

**Problem:** Using `thinkingBudget` caused 400 errors from Anthropic's API with the message: "Expected thinking or redacted_thinking, but found text. When thinking is enabled, a final assistant message must start with a thinking block."

**Root Cause:** The `BetaThinkingBlock` type incorrectly defined `thinking` as a config object instead of a string containing the actual thinking content.

**Changes:**
- Fixed `BetaThinkingBlock` type: `thinking` is now correctly typed as `string`
- Added `BetaRedactedThinkingBlock` type for handling redacted thinking responses
- Updated `responseToParams` to properly parse both `thinking` and `redacted_thinking` blocks
- Added explicit block ordering when constructing assistant messages to ensure thinking blocks always come first (API requirement)
- Added test examples for extended thinking validation

**Usage:** Extended thinking now works correctly across multi-turn conversations:

```typescript
const result = await agent.execute(
"Complex task requiring reasoning",
undefined,
{
thinkingBudget: 4096,
maxTokens: 16384,
}
);
```
111 changes: 111 additions & 0 deletions examples/test-extended-thinking-multi-turn.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/**
* Test Extended Thinking with Multi-Turn Tool Use
*
* This test exercises the thinking block handling across multiple tool calls,
* which is where the original 400 error would occur (issue #12).
*
* The agent needs to:
* 1. Navigate to a page (tool use)
* 2. Take screenshot and analyze (tool use)
* 3. Click on something (tool use)
* 4. Verify the result (tool use)
*
* Each turn with thinking enabled requires proper thinking block ordering.
*/

import { config } from "dotenv";
config({ path: "./examples/.env" });

import { chromium } from "playwright";
import { ComputerUseAgent } from "../index";
import { SimpleLogger } from "../utils/logger";

async function main() {
console.log("=== Extended Thinking Multi-Turn Test ===\n");

if (!process.env.ANTHROPIC_API_KEY) {
console.error("ERROR: ANTHROPIC_API_KEY environment variable is required");
process.exit(1);
}

console.log("1. Launching browser...");
const browser = await chromium.launch({ headless: false });
const context = await browser.newContext({
viewport: { width: 1280, height: 800 },
});
const page = await context.newPage();
console.log(" ✓ Browser launched\n");

console.log("2. Navigating to Wikipedia...");
await page.goto("https://en.wikipedia.org/wiki/Main_Page");
await page.waitForLoadState("networkidle");
console.log(" ✓ Page loaded\n");

console.log("3. Creating agent with extended thinking...");
const logger = new SimpleLogger();
const agent = new ComputerUseAgent({
apiKey: process.env.ANTHROPIC_API_KEY!,
page,
logger,
});
console.log(" ✓ Agent created\n");

console.log("4. Executing multi-step task with thinkingBudget...\n");
console.log(" Task: Find and click the 'Random article' link, then report the article title\n");
console.log("=" .repeat(70));

const startTime = Date.now();

try {
const result = await agent.execute(
`You are on Wikipedia's main page. Please do the following:
1. First, take a screenshot to see the current page
2. Find and click on the "Random article" link (it's usually in the left sidebar)
3. Wait for the new page to load
4. Take another screenshot
5. Tell me the title of the random article you landed on

Be thorough in your reasoning.`,
undefined,
{
thinkingBudget: 4096,
maxTokens: 16384,
}
);

const elapsed = ((Date.now() - startTime) / 1000).toFixed(2);

console.log("=" .repeat(70));
console.log(`\n5. Result (completed in ${elapsed}s):\n`);
console.log(result);
console.log("\n✅ Multi-turn extended thinking test PASSED!");

} catch (error) {
console.log("=" .repeat(70));
console.error("\n❌ Multi-turn extended thinking test FAILED!");
console.error("\nError details:");
if (error instanceof Error) {
console.error(` Message: ${error.message}`);

// Check for the specific thinking block error
if (error.message.includes("thinking") || error.message.includes("redacted_thinking")) {
console.error("\n 🔴 This is a THINKING BLOCK HANDLING issue!");
console.error(" The fix in issue #12 may not be complete.");
}

if (error.message.includes("400")) {
console.error("\n 🔴 400 error from Anthropic API");
console.error(" This usually indicates improper message construction.");
}
} else {
console.error(error);
}
process.exit(1);
} finally {
console.log("\n6. Closing browser...");
await browser.close();
console.log(" ✓ Browser closed");
}
}

main().catch(console.error);
82 changes: 82 additions & 0 deletions examples/test-extended-thinking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/**
* Test Extended Thinking / thinkingBudget functionality
*
* This example verifies that extended thinking works correctly
* without causing 400 errors from improper thinking block handling.
*/

import { config } from "dotenv";
config({ path: "./examples/.env" });
import { chromium } from "playwright";
import { ComputerUseAgent } from "../index";
import { SimpleLogger } from "../utils/logger";

const logger = new SimpleLogger();

async function main() {
console.log("=== Extended Thinking Test ===\n");

if (!process.env.ANTHROPIC_API_KEY) {
console.error("ERROR: ANTHROPIC_API_KEY environment variable is required");
process.exit(1);
}

console.log("1. Launching browser...");
const browser = await chromium.launch({ headless: false });
const context = await browser.newContext({
viewport: { width: 1280, height: 800 },
});
const page = await context.newPage();
console.log(" ✓ Browser launched\n");

console.log("2. Navigating to test page...");
await page.goto("https://example.com");
console.log(" ✓ Page loaded\n");

console.log("3. Creating agent with extended thinking enabled...");
const agent = new ComputerUseAgent({
apiKey: process.env.ANTHROPIC_API_KEY!,
page,
logger,
});
console.log(" ✓ Agent created\n");

console.log("4. Executing task with thinkingBudget...\n");
console.log("=" .repeat(60));

try {
const result = await agent.execute(
"Look at this page and tell me: What is the title of the page and what is the main heading? Provide a brief summary.",
undefined,
{
thinkingBudget: 2048,
maxTokens: 8192,
}
);

console.log("=" .repeat(60));
console.log("\n5. Result:\n");
console.log(result);
console.log("\n✅ Extended thinking test PASSED!");
} catch (error) {
console.log("=" .repeat(60));
console.error("\n❌ Extended thinking test FAILED!");
console.error("\nError details:");
if (error instanceof Error) {
console.error(` Message: ${error.message}`);
if (error.message.includes("thinking")) {
console.error("\n This appears to be a thinking block handling issue.");
console.error(" The fix may not be complete.");
}
} else {
console.error(error);
}
process.exit(1);
} finally {
console.log("\n6. Closing browser...");
await browser.close();
console.log(" ✓ Browser closed");
}
}

main().catch(console.error);
16 changes: 15 additions & 1 deletion loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,23 @@ ${capabilityDocs}`,
// Log LLM response
logger.llmResponse(response.stop_reason ?? "unknown", stepIndex, loggableContent);

// Ensure proper block ordering for extended thinking:
// thinking/redacted_thinking blocks must come first in assistant messages
const orderedContent = [...responseParams].sort((a, b) => {
const order: Record<string, number> = {
thinking: 0,
redacted_thinking: 1,
text: 2,
tool_use: 3,
};
const aOrder = order[a.type] ?? 99;
const bOrder = order[b.type] ?? 99;
return aOrder - bOrder;
});

messages.push({
role: "assistant",
content: responseParams,
content: orderedContent,
});

if (response.stop_reason === "end_turn") {
Expand Down
19 changes: 10 additions & 9 deletions types/beta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ export interface BetaToolUseBlock {

export interface BetaThinkingBlock {
type: "thinking";
thinking:
| {
type: "enabled";
budget_tokens: number;
}
| {
type: "disabled";
};
signature?: string;
thinking: string;
signature: string;
id?: string;
cache_control?: { type: "ephemeral" };
}

export interface BetaRedactedThinkingBlock {
type: "redacted_thinking";
data: string;
id?: string;
cache_control?: { type: "ephemeral" };
}
Expand All @@ -74,4 +74,5 @@ export type BetaLocalContentBlock =
| BetaImageBlock
| BetaToolUseBlock
| BetaThinkingBlock
| BetaRedactedThinkingBlock
| BetaToolResultBlock;
13 changes: 11 additions & 2 deletions utils/message-processing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,17 @@ export function responseToParams(response: BetaMessage): BetaContentBlock[] {
};
}
if (block.type === "thinking") {
const { thinking, signature, ...rest } = block;
return { ...rest, thinking, signature: signature || "" };
return {
type: "thinking" as const,
thinking: block.thinking,
signature: block.signature,
};
}
if (block.type === "redacted_thinking") {
return {
type: "redacted_thinking" as const,
data: block.data,
};
}
return block as BetaContentBlock;
});
Expand Down