Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/api/providers/__tests__/vercel-ai-gateway.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ vi.mock("../fetchers/modelCache", () => ({
cacheReadsPrice: 0.25,
description: "GPT-4o",
},
"anthropic/claude-opus-4.6": {
maxTokens: 128000,
contextWindow: 1000000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
inputPrice: 5,
outputPrice: 25,
cacheWritesPrice: 6.25,
cacheReadsPrice: 0.5,
description: "Claude Opus 4.6",
},
})
}),
getModelsFromCache: vi.fn().mockReturnValue(undefined),
Expand Down Expand Up @@ -232,6 +244,36 @@ describe("VercelAiGatewayHandler", () => {
)
})

it("passes gateway reasoning effort when enabled", async () => {
mockStreamText.mockReturnValue(createMockStreamResult())

const handler = new VercelAiGatewayHandler({
...mockOptions,
vercelAiGatewayModelId: "anthropic/claude-opus-4.6",
enableReasoningEffort: true,
reasoningEffort: "high",
})

await handler.createMessage("test", []).next()

expect(mockStreamText).toHaveBeenCalledWith(
expect.objectContaining({
reasoning: {
enabled: true,
effort: "high",
},
providerOptions: {
anthropic: {
thinking: {
type: "enabled",
budgetTokens: 102400,
},
},
},
}),
)
})

it("sets correct maxOutputTokens", async () => {
mockStreamText.mockReturnValue(createMockStreamResult())

Expand Down
15 changes: 15 additions & 0 deletions src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,21 @@ describe("Vercel AI Gateway Fetchers", () => {
)
})

it("sets supportsReasoningEffort when model tags include reasoning", () => {
const reasoningModel = {
...baseModel,
id: "anthropic/claude-opus-4.6",
tags: ["tool-use", "reasoning", "vision"],
}

const result = parseVercelAiGatewayModel({
id: reasoningModel.id,
model: reasoningModel,
})

expect(result.supportsReasoningEffort).toBe(true)
})

it("handles missing cache pricing", () => {
const modelNoCachePricing = {
...baseModel,
Expand Down
10 changes: 10 additions & 0 deletions src/api/providers/fetchers/vercel-ai-gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const vercelAiGatewayModelSchema = z.object({
context_window: z.number(),
max_tokens: z.number(),
type: z.string(),
tags: z.array(z.string()).optional(),
pricing: vercelAiGatewayPricingSchema,
})

Expand Down Expand Up @@ -91,6 +92,14 @@ export async function getVercelAiGatewayModels(options?: ApiHandlerOptions): Pro
*/

export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: VercelAiGatewayModel }): ModelInfo => {
const tags = model.tags ?? []
const supportsReasoningEffort =
tags.includes("reasoning") ||
id.startsWith("openai/o") ||
id.startsWith("openai/gpt-5") ||
id.startsWith("openai/gpt-oss") ||
id.startsWith("xai/grok-3-mini")

const cacheWritesPrice = model.pricing?.input_cache_write
? parseApiPrice(model.pricing?.input_cache_write)
: undefined
Expand All @@ -105,6 +114,7 @@ export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: Ve
maxTokens: model.max_tokens,
contextWindow: model.context_window,
supportsImages,
...(supportsReasoningEffort ? { supportsReasoningEffort: true } : {}),
supportsPromptCache,
inputPrice: parseApiPrice(model.pricing?.input),
outputPrice: parseApiPrice(model.pricing?.output),
Expand Down
86 changes: 74 additions & 12 deletions src/api/providers/vercel-ai-gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import {
} from "../transform/ai-sdk"
import { applyToolCacheOptions } from "../transform/cache-breakpoints"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"
import type { OpenAiReasoningParams } from "../transform/reasoning"

import { DEFAULT_HEADERS } from "./constants"
import { BaseProvider } from "./base-provider"
Expand All @@ -29,6 +31,15 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
import type { RooMessage } from "../../core/task-persistence/rooMessage"
import { sanitizeMessagesForProvider } from "../transform/sanitize-messages"

type ModelSelection = {
id: string
info: ModelInfo
maxTokens?: number
temperature?: number
reasoning?: OpenAiReasoningParams
reasoningBudget?: number
}

/**
* Vercel AI Gateway provider using the built-in AI SDK gateway support.
* Uses `createGateway` from the `ai` package to communicate with the
Expand All @@ -50,20 +61,41 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
})
}

override getModel(): { id: string; info: ModelInfo } {
override getModel(): ModelSelection {
const id = this.options.vercelAiGatewayModelId ?? vercelAiGatewayDefaultModelId
const resolveModel = (modelInfo: ModelInfo) => ({
id,
info: modelInfo,
...getModelParams({
format: "openai",
modelId: id,
model: modelInfo,
settings: this.options,
defaultTemperature: VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE,
}),
})

if (this.models[id]) {
return { id, info: this.models[id] }
return resolveModel(this.models[id])
}

const cachedModels = getModelsFromCache(this.name)
if (cachedModels?.[id]) {
this.models = cachedModels
return { id, info: cachedModels[id] }
return resolveModel(cachedModels[id])
}

return { id: vercelAiGatewayDefaultModelId, info: vercelAiGatewayDefaultModelInfo }
return {
id: vercelAiGatewayDefaultModelId,
info: vercelAiGatewayDefaultModelInfo,
...getModelParams({
format: "openai",
modelId: vercelAiGatewayDefaultModelId,
model: vercelAiGatewayDefaultModelInfo,
settings: this.options,
defaultTemperature: VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE,
}),
}
}

public async fetchModel() {
Expand Down Expand Up @@ -115,7 +147,7 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
messages: RooMessage[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const { id: modelId, info } = await this.fetchModel()
const { id: modelId, info, temperature, reasoning, reasoningBudget } = await this.fetchModel()
const languageModel = this.getLanguageModel(modelId)

const aiSdkMessages = sanitizeMessagesForProvider(messages)
Expand All @@ -124,18 +156,33 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
applyToolCacheOptions(aiSdkTools as Parameters<typeof applyToolCacheOptions>[0], metadata?.toolProviderOptions)

const temperature = this.supportsTemperature(modelId)
? (this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE)
const resolvedTemperature = this.supportsTemperature(modelId)
? (this.options.modelTemperature ?? temperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE)
: undefined

const reasoningConfig = reasoning ? { enabled: true, effort: reasoning.reasoning_effort } : undefined
const anthropicProviderOptions =
modelId.startsWith("anthropic/") && reasoningConfig
? {
anthropic: {
thinking: {
type: "enabled" as const,
budgetTokens: reasoningBudget ?? Math.floor((info.maxTokens ?? 0) * 0.8),
},
},
}
: undefined
Comment on lines +164 to +174
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anthropic requires temperature = 1.0 when extended thinking is enabled. The existing getModelParams enforces this for budget-based models (via shouldUseReasoningBudget), but Gateway Anthropic models only have supportsReasoningEffort: true -- they lack supportsReasoningBudget -- so the budget path is never entered and the temperature stays at 0.7 (VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE). If the Vercel AI Gateway does not transparently override the temperature for Anthropic thinking requests, this will cause the API to reject the call. The same applies to the identical block in completePrompt. Consider forcing resolvedTemperature = 1.0 when anthropicProviderOptions is defined (and no explicit user temperature is set).

Fix it with Roo Code or mention @roomote and request a fix.


const result = streamText({
model: languageModel,
system: systemPrompt || undefined,
messages: aiSdkMessages,
temperature,
temperature: resolvedTemperature,
maxOutputTokens: info.maxTokens ?? undefined,
tools: aiSdkTools,
toolChoice: mapToolChoice(metadata?.tool_choice),
...(reasoningConfig ? { reasoning: reasoningConfig } : {}),
...(anthropicProviderOptions ? { providerOptions: anthropicProviderOptions } : {}),
})

try {
Expand Down Expand Up @@ -170,19 +217,34 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
}

async completePrompt(prompt: string): Promise<string> {
const { id: modelId, info } = await this.fetchModel()
const { id: modelId, info, temperature, reasoning, reasoningBudget } = await this.fetchModel()
const languageModel = this.getLanguageModel(modelId)

const temperature = this.supportsTemperature(modelId)
? (this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE)
const resolvedTemperature = this.supportsTemperature(modelId)
? (this.options.modelTemperature ?? temperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE)
: undefined

const reasoningConfig = reasoning ? { enabled: true, effort: reasoning.reasoning_effort } : undefined
const anthropicProviderOptions =
modelId.startsWith("anthropic/") && reasoningConfig
? {
anthropic: {
thinking: {
type: "enabled" as const,
budgetTokens: reasoningBudget ?? Math.floor((info.maxTokens ?? 0) * 0.8),
},
},
}
: undefined
Comment on lines +227 to +238
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reasoningConfig + anthropicProviderOptions block is identical to the one in createMessage (lines 163-174). Extracting both into a small private helper (e.g. buildReasoningOptions(modelId, info, reasoning, reasoningBudget)) would eliminate the duplication and ensure future fixes (like the temperature override) only need to be applied once.

Fix it with Roo Code or mention @roomote and request a fix.


try {
const { text } = await generateText({
model: languageModel,
prompt,
maxOutputTokens: info.maxTokens ?? undefined,
temperature,
temperature: resolvedTemperature,
...(reasoningConfig ? { reasoning: reasoningConfig } : {}),
...(anthropicProviderOptions ? { providerOptions: anthropicProviderOptions } : {}),
})

return text
Expand Down
Loading