1+ import { ChildProcess } from 'child_process' ;
12import * as fs from 'fs-extra' ;
23import * as path from 'path' ;
34import { PassThrough } from 'stream' ;
4- import { Disposable , ExtensionContext , LogOutputChannel , Uri } from 'vscode' ;
5+ import { CancellationTokenSource , Disposable , ExtensionContext , LogOutputChannel , Uri } from 'vscode' ;
56import * as rpc from 'vscode-jsonrpc/node' ;
67import { PythonProjectApi } from '../../api' ;
78import { spawnProcess } from '../../common/childProcess.apis' ;
@@ -14,6 +15,15 @@ import { createRunningWorkerPool, WorkerPool } from '../../common/utils/workerPo
1415import { getConfiguration , getWorkspaceFolders } from '../../common/workspace.apis' ;
1516import { noop } from './utils' ;
1617
18+ // Timeout constants for JSON-RPC requests (in milliseconds)
19+ const CONFIGURE_TIMEOUT_MS = 30_000 ; // 30 seconds for configuration
20+ const REFRESH_TIMEOUT_MS = 120_000 ; // 2 minutes for full refresh
21+ const RESOLVE_TIMEOUT_MS = 30_000 ; // 30 seconds for single resolve
22+
23+ // Restart/recovery constants
24+ const MAX_RESTART_ATTEMPTS = 3 ;
25+ const RESTART_BACKOFF_BASE_MS = 1_000 ; // 1 second base, exponential: 1s, 2s, 4s
26+
1727export async function getNativePythonToolsPath ( ) : Promise < string > {
1828 const envsExt = getExtension ( ENVS_EXTENSION_ID ) ;
1929 if ( envsExt ) {
@@ -104,11 +114,48 @@ interface RefreshOptions {
104114 searchPaths ?: string [ ] ;
105115}
106116
117+ /**
118+ * Wraps a JSON-RPC sendRequest call with a timeout.
119+ * @param connection The JSON-RPC connection
120+ * @param method The RPC method name
121+ * @param params The parameters to send
122+ * @param timeoutMs Timeout in milliseconds
123+ * @returns The result of the request
124+ * @throws Error if the request times out
125+ */
126+ async function sendRequestWithTimeout < T > (
127+ connection : rpc . MessageConnection ,
128+ method : string ,
129+ params : unknown ,
130+ timeoutMs : number ,
131+ ) : Promise < T > {
132+ const cts = new CancellationTokenSource ( ) ;
133+ const timeoutPromise = new Promise < never > ( ( _ , reject ) => {
134+ const timer = setTimeout ( ( ) => {
135+ cts . cancel ( ) ;
136+ reject ( new Error ( `Request '${ method } ' timed out after ${ timeoutMs } ms` ) ) ;
137+ } , timeoutMs ) ;
138+ // Clear timeout if the CancellationTokenSource is disposed
139+ cts . token . onCancellationRequested ( ( ) => clearTimeout ( timer ) ) ;
140+ } ) ;
141+
142+ try {
143+ return await Promise . race ( [ connection . sendRequest < T > ( method , params , cts . token ) , timeoutPromise ] ) ;
144+ } finally {
145+ cts . dispose ( ) ;
146+ }
147+ }
148+
107149class NativePythonFinderImpl implements NativePythonFinder {
108- private readonly connection : rpc . MessageConnection ;
150+ private connection : rpc . MessageConnection ;
109151 private readonly pool : WorkerPool < NativePythonEnvironmentKind | Uri [ ] | undefined , NativeInfo [ ] > ;
110152 private cache : Map < string , NativeInfo [ ] > = new Map ( ) ;
111- private readonly startDisposables : Disposable [ ] = [ ] ;
153+ private startDisposables : Disposable [ ] = [ ] ;
154+ private proc : ChildProcess | undefined ;
155+ private processExited : boolean = false ;
156+ private startFailed : boolean = false ;
157+ private restartAttempts : number = 0 ;
158+ private isRestarting : boolean = false ;
112159
113160 constructor (
114161 private readonly outputChannel : LogOutputChannel ,
@@ -125,13 +172,123 @@ class NativePythonFinderImpl implements NativePythonFinder {
125172 }
126173
127174 public async resolve ( executable : string ) : Promise < NativeEnvInfo > {
128- await this . configure ( ) ;
129- const environment = await this . connection . sendRequest < NativeEnvInfo > ( 'resolve' , {
130- executable,
131- } ) ;
175+ await this . ensureProcessRunning ( ) ;
176+ try {
177+ await this . configure ( ) ;
178+ const environment = await sendRequestWithTimeout < NativeEnvInfo > (
179+ this . connection ,
180+ 'resolve' ,
181+ { executable } ,
182+ RESOLVE_TIMEOUT_MS ,
183+ ) ;
184+
185+ this . outputChannel . info ( `Resolved Python Environment ${ environment . executable } ` ) ;
186+ // Reset restart attempts on successful request
187+ this . restartAttempts = 0 ;
188+ return environment ;
189+ } catch ( ex ) {
190+ // On timeout, kill the hung process so next request triggers restart
191+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
192+ this . outputChannel . warn ( '[pet] Resolve request timed out, killing hung process for restart' ) ;
193+ this . killProcess ( ) ;
194+ this . processExited = true ;
195+ }
196+ throw ex ;
197+ }
198+ }
199+
200+ /**
201+ * Ensures the PET process is running. If it has exited or failed, attempts to restart
202+ * with exponential backoff up to MAX_RESTART_ATTEMPTS times.
203+ * @throws Error if the process cannot be started after all retry attempts
204+ */
205+ private async ensureProcessRunning ( ) : Promise < void > {
206+ // Process is running fine
207+ if ( ! this . startFailed && ! this . processExited ) {
208+ return ;
209+ }
210+
211+ // Already in the process of restarting (prevent recursive restarts)
212+ if ( this . isRestarting ) {
213+ throw new Error ( 'Python Environment Tools (PET) is currently restarting. Please try again.' ) ;
214+ }
215+
216+ // Check if we've exceeded max restart attempts
217+ if ( this . restartAttempts >= MAX_RESTART_ATTEMPTS ) {
218+ throw new Error (
219+ `Python Environment Tools (PET) failed after ${ MAX_RESTART_ATTEMPTS } restart attempts. ` +
220+ 'Please reload the window or check the output channel for details.' ,
221+ ) ;
222+ }
223+
224+ // Attempt restart with exponential backoff
225+ await this . restart ( ) ;
226+ }
227+
228+ /**
229+ * Kills the current PET process (if running) and starts a fresh one.
230+ * Implements exponential backoff between restart attempts.
231+ */
232+ private async restart ( ) : Promise < void > {
233+ this . isRestarting = true ;
234+ this . restartAttempts ++ ;
235+
236+ const backoffMs = RESTART_BACKOFF_BASE_MS * Math . pow ( 2 , this . restartAttempts - 1 ) ;
237+ this . outputChannel . warn (
238+ `[pet] Restarting Python Environment Tools (attempt ${ this . restartAttempts } /${ MAX_RESTART_ATTEMPTS } , ` +
239+ `waiting ${ backoffMs } ms)` ,
240+ ) ;
241+
242+ try {
243+ // Kill existing process if still running
244+ this . killProcess ( ) ;
245+
246+ // Dispose existing connection and streams
247+ this . startDisposables . forEach ( ( d ) => d . dispose ( ) ) ;
248+ this . startDisposables = [ ] ;
249+
250+ // Wait with exponential backoff before restarting
251+ await new Promise ( ( resolve ) => setTimeout ( resolve , backoffMs ) ) ;
132252
133- this . outputChannel . info ( `Resolved Python Environment ${ environment . executable } ` ) ;
134- return environment ;
253+ // Reset state flags
254+ this . processExited = false ;
255+ this . startFailed = false ;
256+ this . lastConfiguration = undefined ; // Force reconfiguration
257+
258+ // Start fresh
259+ this . connection = this . start ( ) ;
260+
261+ this . outputChannel . info ( '[pet] Python Environment Tools restarted successfully' ) ;
262+
263+ // Reset restart attempts on successful start (process didn't immediately fail)
264+ // We'll reset this only after a successful request completes
265+ } catch ( ex ) {
266+ this . outputChannel . error ( '[pet] Failed to restart Python Environment Tools:' , ex ) ;
267+ throw ex ;
268+ } finally {
269+ this . isRestarting = false ;
270+ }
271+ }
272+
273+ /**
274+ * Attempts to kill the PET process. Used during restart and timeout recovery.
275+ */
276+ private killProcess ( ) : void {
277+ if ( this . proc && this . proc . exitCode === null ) {
278+ try {
279+ this . outputChannel . info ( '[pet] Killing hung/crashed PET process' ) ;
280+ this . proc . kill ( 'SIGTERM' ) ;
281+ // Give it a moment to terminate gracefully, then force kill
282+ setTimeout ( ( ) => {
283+ if ( this . proc && this . proc . exitCode === null ) {
284+ this . proc . kill ( 'SIGKILL' ) ;
285+ }
286+ } , 500 ) ;
287+ } catch ( ex ) {
288+ this . outputChannel . error ( '[pet] Error killing process:' , ex ) ;
289+ }
290+ }
291+ this . proc = undefined ;
135292 }
136293
137294 public async refresh ( hardRefresh : boolean , options ?: NativePythonEnvironmentKind | Uri [ ] ) : Promise < NativeInfo [ ] > {
@@ -224,12 +381,35 @@ class NativePythonFinderImpl implements NativePythonFinder {
224381 // we have got the exit event.
225382 const readable = new PassThrough ( ) ;
226383 const writable = new PassThrough ( ) ;
384+
227385 try {
228- const proc = spawnProcess ( this . toolPath , [ 'server' ] , { env : process . env , stdio : 'pipe' } ) ;
229- proc . stdout . pipe ( readable , { end : false } ) ;
230- proc . stderr . on ( 'data' , ( data ) => this . outputChannel . error ( `[pet] ${ data . toString ( ) } ` ) ) ;
231- writable . pipe ( proc . stdin , { end : false } ) ;
386+ this . proc = spawnProcess ( this . toolPath , [ 'server' ] , { env : process . env , stdio : 'pipe' } ) ;
387+
388+ if ( ! this . proc . stdout || ! this . proc . stderr || ! this . proc . stdin ) {
389+ throw new Error ( 'Failed to create stdio streams for PET process' ) ;
390+ }
232391
392+ this . proc . stdout . pipe ( readable , { end : false } ) ;
393+ this . proc . stderr . on ( 'data' , ( data ) => this . outputChannel . error ( `[pet] ${ data . toString ( ) } ` ) ) ;
394+ writable . pipe ( this . proc . stdin , { end : false } ) ;
395+
396+ // Handle process exit - mark as exited so pending requests fail fast
397+ this . proc . on ( 'exit' , ( code , signal ) => {
398+ this . processExited = true ;
399+ if ( code !== 0 ) {
400+ this . outputChannel . error (
401+ `[pet] Python Environment Tools exited unexpectedly with code ${ code } , signal ${ signal } ` ,
402+ ) ;
403+ }
404+ } ) ;
405+
406+ // Handle process errors (e.g., ENOENT if executable not found)
407+ this . proc . on ( 'error' , ( err ) => {
408+ this . processExited = true ;
409+ this . outputChannel . error ( '[pet] Process error:' , err ) ;
410+ } ) ;
411+
412+ const proc = this . proc ;
233413 this . startDisposables . push ( {
234414 dispose : ( ) => {
235415 try {
@@ -242,7 +422,11 @@ class NativePythonFinderImpl implements NativePythonFinder {
242422 } ,
243423 } ) ;
244424 } catch ( ex ) {
425+ // Mark start as failed so all subsequent requests fail immediately
426+ this . startFailed = true ;
245427 this . outputChannel . error ( `[pet] Error starting Python Finder ${ this . toolPath } server` , ex ) ;
428+ // Don't continue - throw so caller knows spawn failed
429+ throw ex ;
246430 }
247431 const connection = rpc . createMessageConnection (
248432 new rpc . StreamMessageReader ( readable ) ,
@@ -287,6 +471,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
287471 }
288472
289473 private async doRefresh ( options ?: NativePythonEnvironmentKind | Uri [ ] ) : Promise < NativeInfo [ ] > {
474+ await this . ensureProcessRunning ( ) ;
290475 const disposables : Disposable [ ] = [ ] ;
291476 const unresolved : Promise < void > [ ] = [ ] ;
292477 const nativeInfo : NativeInfo [ ] = [ ] ;
@@ -298,10 +483,12 @@ class NativePythonFinderImpl implements NativePythonFinder {
298483 this . outputChannel . info ( `Discovered env: ${ data . executable || data . prefix } ` ) ;
299484 if ( data . executable && ( ! data . version || ! data . prefix ) ) {
300485 unresolved . push (
301- this . connection
302- . sendRequest < NativeEnvInfo > ( 'resolve' , {
303- executable : data . executable ,
304- } )
486+ sendRequestWithTimeout < NativeEnvInfo > (
487+ this . connection ,
488+ 'resolve' ,
489+ { executable : data . executable } ,
490+ RESOLVE_TIMEOUT_MS ,
491+ )
305492 . then ( ( environment : NativeEnvInfo ) => {
306493 this . outputChannel . info (
307494 `Resolved environment during PET refresh: ${ environment . executable } ` ,
@@ -321,9 +508,23 @@ class NativePythonFinderImpl implements NativePythonFinder {
321508 nativeInfo . push ( data ) ;
322509 } ) ,
323510 ) ;
324- await this . connection . sendRequest < { duration : number } > ( 'refresh' , refreshOptions ) ;
511+ await sendRequestWithTimeout < { duration : number } > (
512+ this . connection ,
513+ 'refresh' ,
514+ refreshOptions ,
515+ REFRESH_TIMEOUT_MS ,
516+ ) ;
325517 await Promise . all ( unresolved ) ;
518+
519+ // Reset restart attempts on successful refresh
520+ this . restartAttempts = 0 ;
326521 } catch ( ex ) {
522+ // On timeout, kill the hung process so next request triggers restart
523+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
524+ this . outputChannel . warn ( '[pet] Request timed out, killing hung process for restart' ) ;
525+ this . killProcess ( ) ;
526+ this . processExited = true ;
527+ }
327528 this . outputChannel . error ( '[pet] Error refreshing' , ex ) ;
328529 throw ex ;
329530 } finally {
@@ -358,9 +559,16 @@ class NativePythonFinderImpl implements NativePythonFinder {
358559 this . outputChannel . info ( '[pet] configure: Sending configuration update:' , JSON . stringify ( options ) ) ;
359560 try {
360561 this . lastConfiguration = options ;
361- await this . connection . sendRequest ( 'configure' , options ) ;
562+ await sendRequestWithTimeout ( this . connection , 'configure' , options , CONFIGURE_TIMEOUT_MS ) ;
362563 } catch ( ex ) {
564+ // On timeout, kill the hung process so next request triggers restart
565+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
566+ this . outputChannel . warn ( '[pet] Configure request timed out, killing hung process for restart' ) ;
567+ this . killProcess ( ) ;
568+ this . processExited = true ;
569+ }
363570 this . outputChannel . error ( '[pet] configure: Configuration error' , ex ) ;
571+ throw ex ;
364572 }
365573 }
366574}
0 commit comments