Skip to content

Commit c16e671

Browse files
committed
feat: implement timeout handling for JSON-RPC requests in NativePythonFinder
1 parent 149dc8e commit c16e671

File tree

1 file changed

+227
-19
lines changed

1 file changed

+227
-19
lines changed

src/managers/common/nativePythonFinder.ts

Lines changed: 227 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
import { ChildProcess } from 'child_process';
12
import * as fs from 'fs-extra';
23
import * as path from 'path';
34
import { PassThrough } from 'stream';
4-
import { Disposable, ExtensionContext, LogOutputChannel, Uri } from 'vscode';
5+
import { CancellationTokenSource, Disposable, ExtensionContext, LogOutputChannel, Uri } from 'vscode';
56
import * as rpc from 'vscode-jsonrpc/node';
67
import { PythonProjectApi } from '../../api';
78
import { spawnProcess } from '../../common/childProcess.apis';
@@ -14,6 +15,15 @@ import { createRunningWorkerPool, WorkerPool } from '../../common/utils/workerPo
1415
import { getConfiguration, getWorkspaceFolders } from '../../common/workspace.apis';
1516
import { noop } from './utils';
1617

18+
// Timeout constants for JSON-RPC requests (in milliseconds)
19+
const CONFIGURE_TIMEOUT_MS = 30_000; // 30 seconds for configuration
20+
const REFRESH_TIMEOUT_MS = 120_000; // 2 minutes for full refresh
21+
const RESOLVE_TIMEOUT_MS = 30_000; // 30 seconds for single resolve
22+
23+
// Restart/recovery constants
24+
const MAX_RESTART_ATTEMPTS = 3;
25+
const RESTART_BACKOFF_BASE_MS = 1_000; // 1 second base, exponential: 1s, 2s, 4s
26+
1727
export async function getNativePythonToolsPath(): Promise<string> {
1828
const envsExt = getExtension(ENVS_EXTENSION_ID);
1929
if (envsExt) {
@@ -104,11 +114,48 @@ interface RefreshOptions {
104114
searchPaths?: string[];
105115
}
106116

117+
/**
118+
* Wraps a JSON-RPC sendRequest call with a timeout.
119+
* @param connection The JSON-RPC connection
120+
* @param method The RPC method name
121+
* @param params The parameters to send
122+
* @param timeoutMs Timeout in milliseconds
123+
* @returns The result of the request
124+
* @throws Error if the request times out
125+
*/
126+
async function sendRequestWithTimeout<T>(
127+
connection: rpc.MessageConnection,
128+
method: string,
129+
params: unknown,
130+
timeoutMs: number,
131+
): Promise<T> {
132+
const cts = new CancellationTokenSource();
133+
const timeoutPromise = new Promise<never>((_, reject) => {
134+
const timer = setTimeout(() => {
135+
cts.cancel();
136+
reject(new Error(`Request '${method}' timed out after ${timeoutMs}ms`));
137+
}, timeoutMs);
138+
// Clear timeout if the CancellationTokenSource is disposed
139+
cts.token.onCancellationRequested(() => clearTimeout(timer));
140+
});
141+
142+
try {
143+
return await Promise.race([connection.sendRequest<T>(method, params, cts.token), timeoutPromise]);
144+
} finally {
145+
cts.dispose();
146+
}
147+
}
148+
107149
class NativePythonFinderImpl implements NativePythonFinder {
108-
private readonly connection: rpc.MessageConnection;
150+
private connection: rpc.MessageConnection;
109151
private readonly pool: WorkerPool<NativePythonEnvironmentKind | Uri[] | undefined, NativeInfo[]>;
110152
private cache: Map<string, NativeInfo[]> = new Map();
111-
private readonly startDisposables: Disposable[] = [];
153+
private startDisposables: Disposable[] = [];
154+
private proc: ChildProcess | undefined;
155+
private processExited: boolean = false;
156+
private startFailed: boolean = false;
157+
private restartAttempts: number = 0;
158+
private isRestarting: boolean = false;
112159

113160
constructor(
114161
private readonly outputChannel: LogOutputChannel,
@@ -125,13 +172,123 @@ class NativePythonFinderImpl implements NativePythonFinder {
125172
}
126173

127174
public async resolve(executable: string): Promise<NativeEnvInfo> {
128-
await this.configure();
129-
const environment = await this.connection.sendRequest<NativeEnvInfo>('resolve', {
130-
executable,
131-
});
175+
await this.ensureProcessRunning();
176+
try {
177+
await this.configure();
178+
const environment = await sendRequestWithTimeout<NativeEnvInfo>(
179+
this.connection,
180+
'resolve',
181+
{ executable },
182+
RESOLVE_TIMEOUT_MS,
183+
);
184+
185+
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
186+
// Reset restart attempts on successful request
187+
this.restartAttempts = 0;
188+
return environment;
189+
} catch (ex) {
190+
// On timeout, kill the hung process so next request triggers restart
191+
if (ex instanceof Error && ex.message.includes('timed out')) {
192+
this.outputChannel.warn('[pet] Resolve request timed out, killing hung process for restart');
193+
this.killProcess();
194+
this.processExited = true;
195+
}
196+
throw ex;
197+
}
198+
}
199+
200+
/**
201+
* Ensures the PET process is running. If it has exited or failed, attempts to restart
202+
* with exponential backoff up to MAX_RESTART_ATTEMPTS times.
203+
* @throws Error if the process cannot be started after all retry attempts
204+
*/
205+
private async ensureProcessRunning(): Promise<void> {
206+
// Process is running fine
207+
if (!this.startFailed && !this.processExited) {
208+
return;
209+
}
210+
211+
// Already in the process of restarting (prevent recursive restarts)
212+
if (this.isRestarting) {
213+
throw new Error('Python Environment Tools (PET) is currently restarting. Please try again.');
214+
}
215+
216+
// Check if we've exceeded max restart attempts
217+
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
218+
throw new Error(
219+
`Python Environment Tools (PET) failed after ${MAX_RESTART_ATTEMPTS} restart attempts. ` +
220+
'Please reload the window or check the output channel for details.',
221+
);
222+
}
223+
224+
// Attempt restart with exponential backoff
225+
await this.restart();
226+
}
227+
228+
/**
229+
* Kills the current PET process (if running) and starts a fresh one.
230+
* Implements exponential backoff between restart attempts.
231+
*/
232+
private async restart(): Promise<void> {
233+
this.isRestarting = true;
234+
this.restartAttempts++;
235+
236+
const backoffMs = RESTART_BACKOFF_BASE_MS * Math.pow(2, this.restartAttempts - 1);
237+
this.outputChannel.warn(
238+
`[pet] Restarting Python Environment Tools (attempt ${this.restartAttempts}/${MAX_RESTART_ATTEMPTS}, ` +
239+
`waiting ${backoffMs}ms)`,
240+
);
241+
242+
try {
243+
// Kill existing process if still running
244+
this.killProcess();
245+
246+
// Dispose existing connection and streams
247+
this.startDisposables.forEach((d) => d.dispose());
248+
this.startDisposables = [];
249+
250+
// Wait with exponential backoff before restarting
251+
await new Promise((resolve) => setTimeout(resolve, backoffMs));
132252

133-
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
134-
return environment;
253+
// Reset state flags
254+
this.processExited = false;
255+
this.startFailed = false;
256+
this.lastConfiguration = undefined; // Force reconfiguration
257+
258+
// Start fresh
259+
this.connection = this.start();
260+
261+
this.outputChannel.info('[pet] Python Environment Tools restarted successfully');
262+
263+
// Reset restart attempts on successful start (process didn't immediately fail)
264+
// We'll reset this only after a successful request completes
265+
} catch (ex) {
266+
this.outputChannel.error('[pet] Failed to restart Python Environment Tools:', ex);
267+
throw ex;
268+
} finally {
269+
this.isRestarting = false;
270+
}
271+
}
272+
273+
/**
274+
* Attempts to kill the PET process. Used during restart and timeout recovery.
275+
*/
276+
private killProcess(): void {
277+
if (this.proc && this.proc.exitCode === null) {
278+
try {
279+
this.outputChannel.info('[pet] Killing hung/crashed PET process');
280+
this.proc.kill('SIGTERM');
281+
// Give it a moment to terminate gracefully, then force kill
282+
setTimeout(() => {
283+
if (this.proc && this.proc.exitCode === null) {
284+
this.proc.kill('SIGKILL');
285+
}
286+
}, 500);
287+
} catch (ex) {
288+
this.outputChannel.error('[pet] Error killing process:', ex);
289+
}
290+
}
291+
this.proc = undefined;
135292
}
136293

137294
public async refresh(hardRefresh: boolean, options?: NativePythonEnvironmentKind | Uri[]): Promise<NativeInfo[]> {
@@ -224,12 +381,35 @@ class NativePythonFinderImpl implements NativePythonFinder {
224381
// we have got the exit event.
225382
const readable = new PassThrough();
226383
const writable = new PassThrough();
384+
227385
try {
228-
const proc = spawnProcess(this.toolPath, ['server'], { env: process.env, stdio: 'pipe' });
229-
proc.stdout.pipe(readable, { end: false });
230-
proc.stderr.on('data', (data) => this.outputChannel.error(`[pet] ${data.toString()}`));
231-
writable.pipe(proc.stdin, { end: false });
386+
this.proc = spawnProcess(this.toolPath, ['server'], { env: process.env, stdio: 'pipe' });
387+
388+
if (!this.proc.stdout || !this.proc.stderr || !this.proc.stdin) {
389+
throw new Error('Failed to create stdio streams for PET process');
390+
}
232391

392+
this.proc.stdout.pipe(readable, { end: false });
393+
this.proc.stderr.on('data', (data) => this.outputChannel.error(`[pet] ${data.toString()}`));
394+
writable.pipe(this.proc.stdin, { end: false });
395+
396+
// Handle process exit - mark as exited so pending requests fail fast
397+
this.proc.on('exit', (code, signal) => {
398+
this.processExited = true;
399+
if (code !== 0) {
400+
this.outputChannel.error(
401+
`[pet] Python Environment Tools exited unexpectedly with code ${code}, signal ${signal}`,
402+
);
403+
}
404+
});
405+
406+
// Handle process errors (e.g., ENOENT if executable not found)
407+
this.proc.on('error', (err) => {
408+
this.processExited = true;
409+
this.outputChannel.error('[pet] Process error:', err);
410+
});
411+
412+
const proc = this.proc;
233413
this.startDisposables.push({
234414
dispose: () => {
235415
try {
@@ -242,7 +422,11 @@ class NativePythonFinderImpl implements NativePythonFinder {
242422
},
243423
});
244424
} catch (ex) {
425+
// Mark start as failed so all subsequent requests fail immediately
426+
this.startFailed = true;
245427
this.outputChannel.error(`[pet] Error starting Python Finder ${this.toolPath} server`, ex);
428+
// Don't continue - throw so caller knows spawn failed
429+
throw ex;
246430
}
247431
const connection = rpc.createMessageConnection(
248432
new rpc.StreamMessageReader(readable),
@@ -287,6 +471,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
287471
}
288472

289473
private async doRefresh(options?: NativePythonEnvironmentKind | Uri[]): Promise<NativeInfo[]> {
474+
await this.ensureProcessRunning();
290475
const disposables: Disposable[] = [];
291476
const unresolved: Promise<void>[] = [];
292477
const nativeInfo: NativeInfo[] = [];
@@ -298,10 +483,12 @@ class NativePythonFinderImpl implements NativePythonFinder {
298483
this.outputChannel.info(`Discovered env: ${data.executable || data.prefix}`);
299484
if (data.executable && (!data.version || !data.prefix)) {
300485
unresolved.push(
301-
this.connection
302-
.sendRequest<NativeEnvInfo>('resolve', {
303-
executable: data.executable,
304-
})
486+
sendRequestWithTimeout<NativeEnvInfo>(
487+
this.connection,
488+
'resolve',
489+
{ executable: data.executable },
490+
RESOLVE_TIMEOUT_MS,
491+
)
305492
.then((environment: NativeEnvInfo) => {
306493
this.outputChannel.info(
307494
`Resolved environment during PET refresh: ${environment.executable}`,
@@ -321,9 +508,23 @@ class NativePythonFinderImpl implements NativePythonFinder {
321508
nativeInfo.push(data);
322509
}),
323510
);
324-
await this.connection.sendRequest<{ duration: number }>('refresh', refreshOptions);
511+
await sendRequestWithTimeout<{ duration: number }>(
512+
this.connection,
513+
'refresh',
514+
refreshOptions,
515+
REFRESH_TIMEOUT_MS,
516+
);
325517
await Promise.all(unresolved);
518+
519+
// Reset restart attempts on successful refresh
520+
this.restartAttempts = 0;
326521
} catch (ex) {
522+
// On timeout, kill the hung process so next request triggers restart
523+
if (ex instanceof Error && ex.message.includes('timed out')) {
524+
this.outputChannel.warn('[pet] Request timed out, killing hung process for restart');
525+
this.killProcess();
526+
this.processExited = true;
527+
}
327528
this.outputChannel.error('[pet] Error refreshing', ex);
328529
throw ex;
329530
} finally {
@@ -358,9 +559,16 @@ class NativePythonFinderImpl implements NativePythonFinder {
358559
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
359560
try {
360561
this.lastConfiguration = options;
361-
await this.connection.sendRequest('configure', options);
562+
await sendRequestWithTimeout(this.connection, 'configure', options, CONFIGURE_TIMEOUT_MS);
362563
} catch (ex) {
564+
// On timeout, kill the hung process so next request triggers restart
565+
if (ex instanceof Error && ex.message.includes('timed out')) {
566+
this.outputChannel.warn('[pet] Configure request timed out, killing hung process for restart');
567+
this.killProcess();
568+
this.processExited = true;
569+
}
363570
this.outputChannel.error('[pet] configure: Configuration error', ex);
571+
throw ex;
364572
}
365573
}
366574
}

0 commit comments

Comments
 (0)