Skip to content

Commit 656beb8

Browse files
fix(anthropic): token limits for streaming with tool calls (#3084)
* remove for bedrock since they handle on their own * fix * fix inference config reference * add to docs * make it min between max tokens
1 parent f7c3de0 commit 656beb8

File tree

5 files changed

+41
-21
lines changed

5 files changed

+41
-21
lines changed

.cursor/commands/council.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Based on the given area of interest, please:
2+
3+
1. Dig around the codebase in terms of that given area of interest, gather general information such as keywords and architecture overview.
4+
2. Spawn off n=10 (unless specified otherwise) task agents to dig deeper into the codebase in terms of that given area of interest, some of them should be out of the box for variance.
5+
3. Once the task agents are done, use the information to do what the user wants.
6+
7+
If user is in plan mode, use the information to create the plan.

apps/docs/content/docs/en/blocks/agent.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Controls response randomness and creativity:
5858

5959
### Max Output Tokens
6060

61-
Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. For long-form content generation via API, explicitly set a higher value.
61+
Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. When using tools with Anthropic models, intermediate tool-calling requests use a capped limit of 8,192 tokens to avoid SDK timeout errors, regardless of your configured max tokens—the final streaming response uses your full configured limit. This only affects Anthropic's direct API; AWS Bedrock handles this automatically. For long-form content generation via API, explicitly set a higher value.
6262

6363
### API Key
6464

apps/sim/providers/anthropic/index.ts

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -302,13 +302,21 @@ export const anthropicProvider: ProviderConfig = {
302302
const providerStartTime = Date.now()
303303
const providerStartTimeISO = new Date(providerStartTime).toISOString()
304304

305+
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
306+
// but allow users to set lower values if desired
307+
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
308+
const nonStreamingMaxTokens = request.maxTokens
309+
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
310+
: nonStreamingLimit
311+
const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens }
312+
305313
try {
306314
const initialCallTime = Date.now()
307-
const originalToolChoice = payload.tool_choice
315+
const originalToolChoice = intermediatePayload.tool_choice
308316
const forcedTools = preparedTools?.forcedTools || []
309317
let usedForcedTools: string[] = []
310318

311-
let currentResponse = await anthropic.messages.create(payload)
319+
let currentResponse = await anthropic.messages.create(intermediatePayload)
312320
const firstResponseTime = Date.now() - initialCallTime
313321

314322
let content = ''
@@ -491,7 +499,7 @@ export const anthropicProvider: ProviderConfig = {
491499
toolsTime += thisToolsTime
492500

493501
const nextPayload = {
494-
...payload,
502+
...intermediatePayload,
495503
messages: currentMessages,
496504
}
497505

@@ -674,13 +682,21 @@ export const anthropicProvider: ProviderConfig = {
674682
const providerStartTime = Date.now()
675683
const providerStartTimeISO = new Date(providerStartTime).toISOString()
676684

685+
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
686+
// but allow users to set lower values if desired
687+
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
688+
const toolLoopMaxTokens = request.maxTokens
689+
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
690+
: nonStreamingLimit
691+
const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens }
692+
677693
try {
678694
const initialCallTime = Date.now()
679-
const originalToolChoice = payload.tool_choice
695+
const originalToolChoice = toolLoopPayload.tool_choice
680696
const forcedTools = preparedTools?.forcedTools || []
681697
let usedForcedTools: string[] = []
682698

683-
let currentResponse = await anthropic.messages.create(payload)
699+
let currentResponse = await anthropic.messages.create(toolLoopPayload)
684700
const firstResponseTime = Date.now() - initialCallTime
685701

686702
let content = ''
@@ -867,7 +883,7 @@ export const anthropicProvider: ProviderConfig = {
867883
toolsTime += thisToolsTime
868884

869885
const nextPayload = {
870-
...payload,
886+
...toolLoopPayload,
871887
messages: currentMessages,
872888
}
873889

apps/sim/providers/bedrock/index.ts

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,7 @@ import {
2020
generateToolUseId,
2121
getBedrockInferenceProfileId,
2222
} from '@/providers/bedrock/utils'
23-
import {
24-
getMaxOutputTokensForModel,
25-
getProviderDefaultModel,
26-
getProviderModels,
27-
} from '@/providers/models'
23+
import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
2824
import type {
2925
ProviderConfig,
3026
ProviderRequest,
@@ -261,11 +257,11 @@ export const bedrockProvider: ProviderConfig = {
261257

262258
const systemPromptWithSchema = systemContent
263259

264-
const inferenceConfig = {
260+
const inferenceConfig: { temperature: number; maxTokens?: number } = {
265261
temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
266-
maxTokens:
267-
Number.parseInt(String(request.maxTokens)) ||
268-
getMaxOutputTokensForModel(request.model, request.stream ?? false),
262+
}
263+
if (request.maxTokens != null) {
264+
inferenceConfig.maxTokens = Number.parseInt(String(request.maxTokens))
269265
}
270266

271267
const shouldStreamToolCalls = request.streamToolCalls ?? false

apps/sim/providers/models.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,15 @@ export interface ModelCapabilities {
3434
toolUsageControl?: boolean
3535
computerUse?: boolean
3636
nativeStructuredOutputs?: boolean
37+
/**
38+
* Max output tokens configuration for Anthropic SDK's streaming timeout workaround.
39+
* The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes.
40+
* This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
41+
*/
3742
maxOutputTokens?: {
3843
/** Maximum tokens for streaming requests */
3944
max: number
40-
/** Safe default for non-streaming requests (to avoid timeout issues) */
45+
/** Safe default for non-streaming requests (to avoid Anthropic SDK timeout errors) */
4146
default: number
4247
}
4348
reasoningEffort?: {
@@ -1709,7 +1714,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
17091714
capabilities: {
17101715
temperature: { min: 0, max: 1 },
17111716
nativeStructuredOutputs: true,
1712-
maxOutputTokens: { max: 64000, default: 8192 },
17131717
},
17141718
contextWindow: 200000,
17151719
},
@@ -1723,7 +1727,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
17231727
capabilities: {
17241728
temperature: { min: 0, max: 1 },
17251729
nativeStructuredOutputs: true,
1726-
maxOutputTokens: { max: 64000, default: 8192 },
17271730
},
17281731
contextWindow: 200000,
17291732
},
@@ -1737,7 +1740,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
17371740
capabilities: {
17381741
temperature: { min: 0, max: 1 },
17391742
nativeStructuredOutputs: true,
1740-
maxOutputTokens: { max: 64000, default: 8192 },
17411743
},
17421744
contextWindow: 200000,
17431745
},
@@ -1751,7 +1753,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
17511753
capabilities: {
17521754
temperature: { min: 0, max: 1 },
17531755
nativeStructuredOutputs: true,
1754-
maxOutputTokens: { max: 64000, default: 8192 },
17551756
},
17561757
contextWindow: 200000,
17571758
},

0 commit comments

Comments
 (0)