Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,61 @@
*--------------------------------------------------------------------------------------------*/

import Anthropic from '@anthropic-ai/sdk';
import * as ai from 'ai';
import * as vscode from 'vscode';
import { getAllModelDefinitions } from '../../modelDefinitions.js';
import { createModelInfo, markDefaultModel } from '../../modelResolutionHelpers.js';
import { ModelCapabilities } from '../base/modelProviderTypes.js';
import { ModelProviderLogger } from '../base/modelProviderLogger.js';

/**
* Checks if an error is a rate limit error (HTTP 429) from the native Anthropic SDK
* and throws a user-friendly error with retry-after information if available.
*
* @param error - The error to check
* @param providerName - The name of the provider for the error message prefix
* @returns true if the error was handled (and thrown), false otherwise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this and the vercel version of this still return boolean? the true case isn't returned for either. Maybe we can just not return anything?

*/
export function handleNativeSdkRateLimitError(error: unknown, providerName: string): boolean {
if (error instanceof Anthropic.APIError && error.status === 429) {
const retryAfter = error.headers?.get('retry-after');
if (retryAfter) {
throw new Error(`[${providerName}] Rate limit exceeded. Please retry after ${retryAfter} seconds.`);
}
throw new Error(`[${providerName}] Rate limit exceeded. Please try again later.`);
Comment on lines +25 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add some logging here and in the vercel version below so the error is logged in output as well?

}
return false;
}

/**
* Checks if an error is a rate limit error (HTTP 429) from the Vercel AI SDK
* and throws a user-friendly error with retry-after information if available.
*
* Handles both direct APICallError and RetryError (which wraps multiple attempts).
* When the SDK exhausts retries, it throws a RetryError containing the lastError.
*
* @param error - The error to check
* @param providerName - The name of the provider for the error message prefix
* @returns true if the error was handled (and thrown), false otherwise
*/
export function handleVercelSdkRateLimitError(error: unknown, providerName: string): boolean {
// Check for RetryError first - the Vercel SDK wraps retried errors in this type
// when maxRetries is exceeded
let apiError: unknown = error;
if (ai.RetryError.isInstance(error) && error.lastError) {
apiError = error.lastError;
}

if (ai.APICallError.isInstance(apiError) && apiError.statusCode === 429) {
const retryAfter = apiError.responseHeaders?.['retry-after'];
if (retryAfter) {
throw new Error(`[${providerName}] Rate limit exceeded. Please retry after ${retryAfter} seconds.`);
}
throw new Error(`[${providerName}] Rate limit exceeded. Please try again later.`);
}
return false;
}

export const DEFAULT_ANTHROPIC_MODEL_NAME = 'Claude Sonnet 4';
export const DEFAULT_ANTHROPIC_MODEL_MATCH = 'claude-sonnet-4';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ import {
DEFAULT_ANTHROPIC_MODEL_NAME,
DEFAULT_ANTHROPIC_MODEL_MATCH,
fetchAnthropicModelsFromApi,
getAnthropicModelsFromConfig
getAnthropicModelsFromConfig,
handleNativeSdkRateLimitError
} from './anthropicModelUtils.js';

// Re-export for consumers that import from this file
Expand Down Expand Up @@ -229,14 +230,18 @@ export class AnthropicModelProvider extends ModelProvider implements positron.ai
} catch (error) {
if (error instanceof Anthropic.APIError) {
this.logger.warn(`Error in messages.stream [${stream.request_id}]: ${error.message}`);

// Check for rate limit error with retry-after header
handleNativeSdkRateLimitError(error, this.providerName);

let data: any;
try {
data = JSON.parse(error.message);
} catch {
// Ignore JSON parse errors.
}
if (data?.error?.type === 'overloaded_error') {
throw new Error(`[${this.providerName}] Anthropic's API is temporarily overloaded.`);
throw new Error(`[${this.providerName}] API is temporarily overloaded.`);
}
} else if (error instanceof Anthropic.AnthropicError) {
this.logger.warn(`Error in messages.stream [${stream.request_id}]: ${error.message}`);
Expand Down Expand Up @@ -277,7 +282,7 @@ export class AnthropicModelProvider extends ModelProvider implements positron.ai
try {
const data = JSON.parse(error.message);
if (data?.error?.type === 'overloaded_error') {
return `Anthropic's API is temporarily overloaded.`;
return `API is temporarily overloaded.`;
}
} catch { /* ignore */ }
} else if (error instanceof Anthropic.AnthropicError) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import {
DEFAULT_ANTHROPIC_MODEL_NAME,
DEFAULT_ANTHROPIC_MODEL_MATCH,
fetchAnthropicModelsFromApi,
getAnthropicModelsFromConfig
getAnthropicModelsFromConfig,
handleVercelSdkRateLimitError
} from './anthropicModelUtils.js';

/**
Expand Down Expand Up @@ -151,4 +152,19 @@ export class AnthropicAIModelProvider extends VercelModelProvider implements pos
{ toolResultExperimentalContent, anthropicCacheBreakpoint: true }
);
}

/**
* Handles Anthropic-specific errors during stream processing.
*
* Checks for rate limit errors (429) and extracts the retry-after header
* to provide a more helpful error message to the user.
*
* @param error - The error that occurred during streaming
* @throws A transformed error with retry information if rate limited
*/
protected override handleStreamError(error: unknown): never {
// Check for rate limit error with retry-after header
handleVercelSdkRateLimitError(error, this.providerName);
throw error;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,26 @@ export abstract class VercelModelProvider extends ModelProvider {
abortSignal: signal,
});

await this.handleStreamResponse(result, model, progress, token, requestId);
try {
await this.handleStreamResponse(result, model, progress, token, requestId);
} catch (error) {
// Allow subclasses to handle provider-specific errors
this.handleStreamError(error);
}
}

/**
* Handles errors that occur during stream processing.
*
* Subclasses can override this method to handle provider-specific errors
* (e.g., rate limiting with retry-after headers). The default implementation
* simply re-throws the error.
*
* @param error - The error that occurred during streaming
* @throws The original error or a transformed error with additional context
*/
protected handleStreamError(error: unknown): never {
throw error;
}

/**
Expand Down Expand Up @@ -387,6 +406,45 @@ export abstract class VercelModelProvider extends ModelProvider {
this.logger.info(`[vercel]: End request ${requestId}; usage: ${tokens.inputTokens} input tokens (+${tokens.cachedTokens} cached), ${tokens.outputTokens} output tokens`);
}

/**
* Parses provider errors to extract user-friendly messages.
*
* Overrides the base implementation to handle Vercel AI SDK-specific errors,
* particularly RetryError which wraps the actual API error when maxRetries
* is exceeded.
*
* @param error - The error object from the provider
* @returns A user-friendly error message, or undefined if not specifically handled
*/
override async parseProviderError(error: any): Promise<string | undefined> {
// Handle RetryError - the Vercel SDK wraps retried errors in this type
// when maxRetries is exceeded. Extract the lastError for processing.
if (ai.RetryError.isInstance(error) && error.lastError) {
if (ai.APICallError.isInstance(error.lastError)) {
const lastError = error.lastError;

// Check for rate limit error (429) with retry-after header
if (lastError.statusCode === 429) {
const retryAfter = lastError.responseHeaders?.['retry-after'];
if (retryAfter) {
return `Rate limit exceeded. Please retry after ${retryAfter} seconds.`;
}
return 'Rate limit exceeded. Please try again later.';
}

// Try to get the message from the parsed data on the lastError
const errorData = lastError.data as { error?: { message?: string } } | undefined;
if (errorData?.error?.message) {
return errorData.error.message;
}
}
// Delegate to base class with the unwrapped error
return super.parseProviderError(error.lastError);
}

return super.parseProviderError(error);
}

/**
* Retrieves models from user configuration.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { deleteConfiguration, ModelConfig, SecretStorage } from '../../config';
import { DEFAULT_MAX_TOKEN_OUTPUT } from '../../constants';
import { log, recordRequestTokenUsage, recordTokenUsage } from '../../extension.js';
import { isCacheControlOptions, toAnthropicMessages, toAnthropicSystem, toAnthropicToolChoice, toAnthropicTools, toTokenUsage } from '../anthropic/anthropicProvider.js';
import { handleNativeSdkRateLimitError, handleVercelSdkRateLimitError } from '../anthropic/anthropicModelUtils.js';
import { VercelModelProvider } from '../base/vercelModelProvider.js';

export const DEFAULT_POSITAI_MODEL_NAME = 'Claude Sonnet 4.5';
Expand Down Expand Up @@ -416,14 +417,18 @@ export class PositModelProvider extends VercelModelProvider {
} catch (error) {
if (error instanceof Anthropic.APIError) {
this.logger.warn(`Error in messages.stream [${stream.request_id}]: ${error.message}`);

// Check for rate limit error with retry-after header
handleNativeSdkRateLimitError(error, this.providerName);

let data: any;
try {
data = JSON.parse(error.message);
} catch {
// Ignore JSON parse errors.
}
if (data?.error?.type === 'overloaded_error') {
throw new Error(`API is temporarily overloaded.`);
throw new Error(`[${this.providerName}] API is temporarily overloaded.`);
}
} else if (error instanceof Anthropic.AnthropicError) {
this.logger.warn(`Error in messages.stream [${stream.request_id}]: ${error.message}`);
Expand Down Expand Up @@ -497,6 +502,21 @@ export class PositModelProvider extends VercelModelProvider {
progress.report(new vscode.LanguageModelTextPart(textDelta));
}

/**
* Handles Posit AI-specific errors during stream processing (Vercel SDK path).
*
* Checks for rate limit errors (429) and extracts the retry-after header
* to provide a more helpful error message to the user.
*
* @param error - The error that occurred during streaming
* @throws A transformed error with retry information if rate limited
*/
protected override handleStreamError(error: unknown): never {
// Check for rate limit error with retry-after header
handleVercelSdkRateLimitError(error, this.providerName);
throw error;
}

/**
* Retrieves models from configuration.
* Overrides base implementation to use Posit AI specific default model matching.
Expand Down
67 changes: 66 additions & 1 deletion extensions/positron-assistant/src/test/anthropic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ import { EMPTY_TOOL_RESULT_PLACEHOLDER, languageModelCacheBreakpointPart } from
import { DEFAULT_MODEL_CAPABILITIES } from '../constants.js';
import Anthropic from '@anthropic-ai/sdk';
import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream.js';
import { mock } from './utils.js';
import {
mock,
createNativeRateLimitError,
assertRateLimitErrorWithRetry,
assertRateLimitErrorWithoutRetry
} from './utils.js';
import * as modelDefinitionsModule from '../modelDefinitions.js';
import * as helpersModule from '../modelResolutionHelpers.js';

Expand Down Expand Up @@ -839,4 +844,64 @@ suite('AnthropicModelProvider', () => {
});
});
});

suite('Rate limit error handling', () => {
test('throws error with retry-after when rate limited with header', async () => {
const rateLimitError = createNativeRateLimitError('30');

// Configure mock to reject with rate limit error
mockClient.messages.stream.returns(mock<MessageStream>({
on: () => mock<MessageStream>({}),
abort: () => { },
done: () => Promise.reject(rateLimitError),
finalMessage: () => Promise.resolve(mock<Anthropic.Message>({})),
request_id: 'test-request-id'
}));

const messages = [vscode.LanguageModelChatMessage.User('Test message')];

await assert.rejects(
() => model.provideLanguageModelChatResponse(
mockModelInfo,
messages,
{ requestInitiator: 'test', toolMode: vscode.LanguageModelChatToolMode.Auto },
progress,
cancellationToken
),
(error: Error) => {
assertRateLimitErrorWithRetry(error, '30');
return true;
}
);
});

test('throws error without retry-after when rate limited without header', async () => {
const rateLimitError = createNativeRateLimitError();

// Configure mock to reject with rate limit error
mockClient.messages.stream.returns(mock<MessageStream>({
on: () => mock<MessageStream>({}),
abort: () => { },
done: () => Promise.reject(rateLimitError),
finalMessage: () => Promise.resolve(mock<Anthropic.Message>({})),
request_id: 'test-request-id'
}));

const messages = [vscode.LanguageModelChatMessage.User('Test message')];

await assert.rejects(
() => model.provideLanguageModelChatResponse(
mockModelInfo,
messages,
{ requestInitiator: 'test', toolMode: vscode.LanguageModelChatToolMode.Auto },
progress,
cancellationToken
),
(error: Error) => {
assertRateLimitErrorWithoutRetry(error);
return true;
}
);
});
});
});
Loading