Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 103 additions & 13 deletions src/managers/common/nativePythonFinder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,56 @@ const RESOLVE_TIMEOUT_MS = 30_000; // 30 seconds for single resolve
// Restart/recovery constants
const MAX_RESTART_ATTEMPTS = 3;
const RESTART_BACKOFF_BASE_MS = 1_000; // 1 second base, exponential: 1s, 2s, 4s
const MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL = 2; // Kill on the 2nd consecutive timeout

/**
* Computes the configure timeout with exponential backoff.
* @param retryCount Number of consecutive configure timeouts so far
* @returns Timeout in milliseconds: 30s, 60s, 120s, ... capped at REFRESH_TIMEOUT_MS
*/
export function getConfigureTimeoutMs(retryCount: number): number {
return Math.min(CONFIGURE_TIMEOUT_MS * Math.pow(2, retryCount), REFRESH_TIMEOUT_MS);
}

/**
* Encapsulates the configure retry state machine.
* Tracks consecutive timeout count and decides whether to kill the process.
*/
export class ConfigureRetryState {
private _timeoutCount: number = 0;

get timeoutCount(): number {
return this._timeoutCount;
}

/** Returns the timeout duration for the current attempt (with exponential backoff). */
getTimeoutMs(): number {
return getConfigureTimeoutMs(this._timeoutCount);
}

/** Call after a successful configure. Resets the timeout counter. */
onSuccess(): void {
this._timeoutCount = 0;
}

/**
* Call after a configure timeout. Increments the counter and returns
* whether the process should be killed (true = kill, false = let it continue).
*/
onTimeout(): boolean {
this._timeoutCount++;
if (this._timeoutCount >= MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL) {
this._timeoutCount = 0;
return true; // Kill the process
}
return false; // Let PET continue
}

/** Call after a non-timeout error or process restart. Resets the counter. */
reset(): void {
this._timeoutCount = 0;
}
}

export async function getNativePythonToolsPath(): Promise<string> {
const envsExt = getExtension(ENVS_EXTENSION_ID);
Expand Down Expand Up @@ -119,14 +169,27 @@ interface RefreshOptions {
searchPaths?: string[];
}

/**
* Error thrown when a JSON-RPC request times out.
*/
export class RpcTimeoutError extends Error {
constructor(
public readonly method: string,
timeoutMs: number,
) {
super(`Request '${method}' timed out after ${timeoutMs}ms`);
this.name = this.constructor.name;
}
}

/**
* Wraps a JSON-RPC sendRequest call with a timeout.
* @param connection The JSON-RPC connection
* @param method The RPC method name
* @param params The parameters to send
* @param timeoutMs Timeout in milliseconds
* @returns The result of the request
* @throws Error if the request times out
* @throws RpcTimeoutError if the request times out
*/
async function sendRequestWithTimeout<T>(
connection: rpc.MessageConnection,
Expand All @@ -138,7 +201,7 @@ async function sendRequestWithTimeout<T>(
const timeoutPromise = new Promise<never>((_, reject) => {
const timer = setTimeout(() => {
cts.cancel();
reject(new Error(`Request '${method}' timed out after ${timeoutMs}ms`));
reject(new RpcTimeoutError(method, timeoutMs));
}, timeoutMs);
// Clear timeout if the CancellationTokenSource is disposed
cts.token.onCancellationRequested(() => clearTimeout(timer));
Expand All @@ -161,6 +224,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
private startFailed: boolean = false;
private restartAttempts: number = 0;
private isRestarting: boolean = false;
private readonly configureRetry = new ConfigureRetryState();

constructor(
private readonly outputChannel: LogOutputChannel,
Expand Down Expand Up @@ -192,8 +256,9 @@ class NativePythonFinderImpl implements NativePythonFinder {
this.restartAttempts = 0;
return environment;
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
// On resolve timeout (not configure — configure handles its own timeout),
// kill the hung process so next request triggers restart
if (ex instanceof RpcTimeoutError && ex.method !== 'configure') {
this.outputChannel.warn('[pet] Resolve request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
Expand Down Expand Up @@ -260,6 +325,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
this.processExited = false;
this.startFailed = false;
this.lastConfiguration = undefined; // Force reconfiguration
this.configureRetry.reset();

// Start fresh
this.connection = this.start();
Expand Down Expand Up @@ -544,8 +610,9 @@ class NativePythonFinderImpl implements NativePythonFinder {
// Reset restart attempts on successful refresh
this.restartAttempts = 0;
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
// On refresh timeout (not configure — configure handles its own timeout),
// kill the hung process so next request triggers restart
if (ex instanceof RpcTimeoutError && ex.method !== 'configure') {
this.outputChannel.warn('[pet] Request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
Expand Down Expand Up @@ -583,17 +650,40 @@ class NativePythonFinderImpl implements NativePythonFinder {
return;
}
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
// Exponential backoff: 30s, 60s on retry. Capped at REFRESH_TIMEOUT_MS.
const timeoutMs = this.configureRetry.getTimeoutMs();
if (this.configureRetry.timeoutCount > 0) {
this.outputChannel.info(
`[pet] configure: Using extended timeout of ${timeoutMs}ms (retry ${this.configureRetry.timeoutCount})`,
);
}
try {
await sendRequestWithTimeout(this.connection, 'configure', options, timeoutMs);
// Only cache after success so failed/timed-out calls will retry
this.lastConfiguration = options;
await sendRequestWithTimeout(this.connection, 'configure', options, CONFIGURE_TIMEOUT_MS);
this.configureRetry.onSuccess();
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
this.outputChannel.warn('[pet] Configure request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
// Clear cached config so the next call retries instead of short-circuiting via configurationEquals
this.lastConfiguration = undefined;
if (ex instanceof RpcTimeoutError) {
const shouldKill = this.configureRetry.onTimeout();
if (shouldKill) {
this.outputChannel.error(
'[pet] Configure timed out on consecutive attempts, killing hung process for restart',
);
this.killProcess();
this.processExited = true;
} else {
this.outputChannel.warn(
`[pet] Configure request timed out (attempt ${this.configureRetry.timeoutCount}/${MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL}), ` +
'will retry on next request without killing process',
);
}
} else {
// Non-timeout errors reset the counter so only consecutive timeouts are counted
this.configureRetry.reset();
this.outputChannel.error('[pet] configure: Configuration error', ex);
}
this.outputChannel.error('[pet] configure: Configuration error', ex);
throw ex;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import assert from 'node:assert';
import {
ConfigureRetryState,
RpcTimeoutError,
getConfigureTimeoutMs,
} from '../../../managers/common/nativePythonFinder';

suite('RpcTimeoutError', () => {
test('has correct name property', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.strictEqual(error.name, 'RpcTimeoutError');
});

test('has correct method property', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.strictEqual(error.method, 'configure');
});

test('message includes method name and timeout', () => {
const error = new RpcTimeoutError('resolve', 5000);
assert.strictEqual(error.message, "Request 'resolve' timed out after 5000ms");
});

test('is instanceof Error', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.ok(error instanceof Error);
assert.ok(error instanceof RpcTimeoutError);
});
});

suite('getConfigureTimeoutMs', () => {
test('returns base timeout (30s) on first attempt', () => {
assert.strictEqual(getConfigureTimeoutMs(0), 30_000);
});

test('doubles timeout on first retry (60s)', () => {
assert.strictEqual(getConfigureTimeoutMs(1), 60_000);
});

test('doubles again on second retry (120s)', () => {
assert.strictEqual(getConfigureTimeoutMs(2), 120_000);
});

test('caps at REFRESH_TIMEOUT_MS (120s) for higher retries', () => {
// 30_000 * 2^3 = 240_000, but capped at 120_000
assert.strictEqual(getConfigureTimeoutMs(3), 120_000);
assert.strictEqual(getConfigureTimeoutMs(10), 120_000);
});
});

suite('ConfigureRetryState', () => {
let state: ConfigureRetryState;

setup(() => {
state = new ConfigureRetryState();
});

test('initial timeout count is 0', () => {
assert.strictEqual(state.timeoutCount, 0);
});

test('initial timeout is base (30s)', () => {
assert.strictEqual(state.getTimeoutMs(), 30_000);
});

test('onSuccess resets timeout count', () => {
state.onTimeout(); // count = 1
state.onSuccess();
assert.strictEqual(state.timeoutCount, 0);
assert.strictEqual(state.getTimeoutMs(), 30_000);
});

test('first timeout does not kill (returns false)', () => {
const shouldKill = state.onTimeout();
assert.strictEqual(shouldKill, false);
assert.strictEqual(state.timeoutCount, 1);
});

test('first timeout increases next timeout to 60s', () => {
state.onTimeout();
assert.strictEqual(state.getTimeoutMs(), 60_000);
});

test('second consecutive timeout kills (returns true)', () => {
state.onTimeout(); // count = 1
const shouldKill = state.onTimeout(); // count = 2 → kill → reset to 0
assert.strictEqual(shouldKill, true);
assert.strictEqual(state.timeoutCount, 0); // Reset after kill
});

test('non-timeout error resets counter via reset()', () => {
state.onTimeout(); // count = 1
state.reset(); // simulates non-timeout error
assert.strictEqual(state.timeoutCount, 0);
// Next timeout should not trigger kill
const shouldKill = state.onTimeout();
assert.strictEqual(shouldKill, false);
});

test('interleaved non-timeout error prevents premature kill', () => {
state.onTimeout(); // count = 1
state.reset(); // non-timeout error resets
state.onTimeout(); // count = 1 again (not 2)
assert.strictEqual(state.timeoutCount, 1);
// Still shouldn't kill — only 1 consecutive timeout
assert.strictEqual(state.getTimeoutMs(), 60_000);
});

test('reset after kill allows fresh retry cycle', () => {
state.onTimeout();
state.onTimeout(); // kill → reset
// Counter was reset by onTimeout when it returned true
assert.strictEqual(state.timeoutCount, 0);
assert.strictEqual(state.getTimeoutMs(), 30_000);
// First timeout of new cycle should not kill
const shouldKill = state.onTimeout();
assert.strictEqual(shouldKill, false);
});
});
Loading