Skip to content

Commit d18dc18

Browse files
authored
Add network info cleanup and missing file retry logic (#706)
* Clean up old `<pid>.json` files in the network info directory on monitor start (files older than 1 hour) * Search for a new SSH process after 5 consecutive failures to read the network info file (similar to existing stale file behavior) Closes #51
1 parent 4e215a0 commit d18dc18

File tree

2 files changed

+194
-27
lines changed

2 files changed

+194
-27
lines changed

src/remote/sshProcess.ts

Lines changed: 98 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ export interface SshProcessMonitorOptions {
3939
remoteSshExtensionId: string;
4040
}
4141

42+
// 1 hour cleanup threshold for old network info files
43+
const CLEANUP_MAX_AGE_MS = 60 * 60 * 1000;
44+
4245
/**
4346
* Monitors the SSH process for a Coder workspace connection and displays
4447
* network status in the VS Code status bar.
@@ -70,6 +73,50 @@ export class SshProcessMonitor implements vscode.Disposable {
7073
private pendingTimeout: NodeJS.Timeout | undefined;
7174
private lastStaleSearchTime = 0;
7275

76+
/**
77+
* Cleans up network info files older than the specified age.
78+
*/
79+
private static async cleanupOldNetworkFiles(
80+
networkInfoPath: string,
81+
maxAgeMs: number,
82+
logger: Logger,
83+
): Promise<void> {
84+
try {
85+
const files = await fs.readdir(networkInfoPath);
86+
const now = Date.now();
87+
88+
const deletedFiles: string[] = [];
89+
for (const file of files) {
90+
if (!file.endsWith(".json")) {
91+
continue;
92+
}
93+
94+
const filePath = path.join(networkInfoPath, file);
95+
try {
96+
const stats = await fs.stat(filePath);
97+
const ageMs = now - stats.mtime.getTime();
98+
99+
if (ageMs > maxAgeMs) {
100+
await fs.unlink(filePath);
101+
deletedFiles.push(file);
102+
}
103+
} catch (error) {
104+
if ((error as NodeJS.ErrnoException).code !== "ENOENT") {
105+
logger.debug(`Failed to clean up network info file ${file}`, error);
106+
}
107+
}
108+
}
109+
110+
if (deletedFiles.length > 0) {
111+
logger.debug(
112+
`Cleaned up ${deletedFiles.length} old network info file(s): ${deletedFiles.join(", ")}`,
113+
);
114+
}
115+
} catch {
116+
// Directory may not exist yet, ignore
117+
}
118+
}
119+
73120
private constructor(options: SshProcessMonitorOptions) {
74121
this.options = {
75122
...options,
@@ -91,6 +138,16 @@ export class SshProcessMonitor implements vscode.Disposable {
91138
*/
92139
public static start(options: SshProcessMonitorOptions): SshProcessMonitor {
93140
const monitor = new SshProcessMonitor(options);
141+
142+
// Clean up old network info files (non-blocking, fire-and-forget)
143+
SshProcessMonitor.cleanupOldNetworkFiles(
144+
options.networkInfoPath,
145+
CLEANUP_MAX_AGE_MS,
146+
options.logger,
147+
).catch(() => {
148+
// Ignore cleanup errors - they shouldn't affect monitoring
149+
});
150+
94151
monitor.searchForProcess().catch((err) => {
95152
options.logger.error("Error in SSH process monitor", err);
96153
});
@@ -284,48 +341,62 @@ export class SshProcessMonitor implements vscode.Disposable {
284341

285342
/**
286343
* Monitors network info and updates the status bar.
287-
* Checks file mtime to detect stale connections and trigger reconnection search.
344+
* Searches for a new process if the file is stale or unreadable.
288345
*/
289346
private async monitorNetwork(): Promise<void> {
290347
const { networkInfoPath, networkPollInterval, logger } = this.options;
291348
const staleThreshold = networkPollInterval * 5;
349+
const maxReadFailures = 5;
350+
let readFailures = 0;
292351

293352
while (!this.disposed && this.currentPid !== undefined) {
294-
const networkInfoFile = path.join(
295-
networkInfoPath,
296-
`${this.currentPid}.json`,
297-
);
353+
const filePath = path.join(networkInfoPath, `${this.currentPid}.json`);
354+
let search: { needed: true; reason: string } | { needed: false } = {
355+
needed: false,
356+
};
298357

299358
try {
300-
const stats = await fs.stat(networkInfoFile);
359+
const stats = await fs.stat(filePath);
301360
const ageMs = Date.now() - stats.mtime.getTime();
361+
readFailures = 0;
302362

303363
if (ageMs > staleThreshold) {
304-
// Prevent tight loop: if we just searched due to stale, wait before searching again
305-
const timeSinceLastSearch = Date.now() - this.lastStaleSearchTime;
306-
if (timeSinceLastSearch < staleThreshold) {
307-
await this.delay(staleThreshold - timeSinceLastSearch);
308-
continue;
309-
}
310-
311-
logger.debug(
312-
`Network info stale (${Math.round(ageMs / 1000)}s old), searching for new SSH process`,
313-
);
314-
315-
// searchForProcess will update PID if a different process is found
316-
this.lastStaleSearchTime = Date.now();
317-
await this.searchForProcess();
318-
return;
364+
search = {
365+
needed: true,
366+
reason: `Network info stale (${Math.round(ageMs / 1000)}s old)`,
367+
};
368+
} else {
369+
const content = await fs.readFile(filePath, "utf8");
370+
const network = JSON.parse(content) as NetworkInfo;
371+
const isStale = ageMs > networkPollInterval * 2;
372+
this.updateStatusBar(network, isStale);
319373
}
320-
321-
const content = await fs.readFile(networkInfoFile, "utf8");
322-
const network = JSON.parse(content) as NetworkInfo;
323-
const isStale = ageMs > this.options.networkPollInterval * 2;
324-
this.updateStatusBar(network, isStale);
325374
} catch (error) {
375+
readFailures++;
326376
logger.debug(
327-
`Failed to read network info: ${(error as Error).message}`,
377+
`Failed to read network info (attempt ${readFailures}): ${(error as Error).message}`,
328378
);
379+
if (readFailures >= maxReadFailures) {
380+
search = {
381+
needed: true,
382+
reason: `Network info missing for ${readFailures} attempts`,
383+
};
384+
}
385+
}
386+
387+
// Search for new process if needed (with throttling)
388+
if (search.needed) {
389+
const timeSinceLastSearch = Date.now() - this.lastStaleSearchTime;
390+
if (timeSinceLastSearch < staleThreshold) {
391+
await this.delay(staleThreshold - timeSinceLastSearch);
392+
continue;
393+
}
394+
395+
logger.debug(`${search.reason}, searching for new SSH process`);
396+
// searchForProcess will update PID if a different process is found
397+
this.lastStaleSearchTime = Date.now();
398+
await this.searchForProcess();
399+
return;
329400
}
330401

331402
await this.delay(networkPollInterval);

test/unit/remote/sshProcess.test.ts

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,102 @@ describe("SshProcessMonitor", () => {
423423
});
424424
});
425425

426+
describe("cleanup old network files", () => {
427+
const setOldMtime = (filePath: string) => {
428+
// Default cleanup is 1 hour; set mtime to 2 hours ago to mark as old
429+
const TWO_HOURS_AGO = Date.now() - 2 * 60 * 60 * 1000;
430+
vol.utimesSync(filePath, TWO_HOURS_AGO / 1000, TWO_HOURS_AGO / 1000);
431+
};
432+
433+
it("deletes old .json files but preserves recent and non-.json files", async () => {
434+
vol.fromJSON({
435+
"/logs/ms-vscode-remote.remote-ssh/1-Remote - SSH.log":
436+
"-> socksPort 12345 ->",
437+
"/network/old.json": "{}",
438+
"/network/recent.json": "{}",
439+
"/network/old.log": "{}",
440+
});
441+
setOldMtime("/network/old.json");
442+
setOldMtime("/network/old.log");
443+
444+
createMonitor({
445+
codeLogDir: "/logs/window1",
446+
networkInfoPath: "/network",
447+
});
448+
449+
await vi.waitFor(() => {
450+
const files = vol.readdirSync("/network");
451+
expect(files).toHaveLength(2);
452+
expect(files).toContain("old.log");
453+
expect(files).toContain("recent.json");
454+
});
455+
});
456+
457+
it("does not throw when network directory is missing or empty", () => {
458+
vol.fromJSON({
459+
"/logs/ms-vscode-remote.remote-ssh/1-Remote - SSH.log":
460+
"-> socksPort 12345 ->",
461+
});
462+
vol.mkdirSync("/empty-network", { recursive: true });
463+
464+
expect(() =>
465+
createMonitor({
466+
codeLogDir: "/logs/window1",
467+
networkInfoPath: "/nonexistent",
468+
}),
469+
).not.toThrow();
470+
471+
expect(() =>
472+
createMonitor({
473+
codeLogDir: "/logs/window1",
474+
networkInfoPath: "/empty-network",
475+
}),
476+
).not.toThrow();
477+
});
478+
});
479+
480+
describe("missing file retry logic", () => {
481+
beforeEach(() => vi.useFakeTimers());
482+
afterEach(() => vi.useRealTimers());
483+
484+
it("searches for new process after consecutive file read failures", async () => {
485+
vol.fromJSON({
486+
"/logs/ms-vscode-remote.remote-ssh/1-Remote - SSH.log":
487+
"-> socksPort 12345 ->",
488+
"/network/789.json": "{}",
489+
});
490+
// Set mtime far into the future so 789.json is always considered fresh
491+
const FRESH_MTIME = Date.now() + 1_000_000;
492+
vol.utimesSync(
493+
"/network/789.json",
494+
FRESH_MTIME / 1000,
495+
FRESH_MTIME / 1000,
496+
);
497+
498+
vi.mocked(find)
499+
.mockResolvedValueOnce([{ pid: 123, ppid: 1, name: "ssh", cmd: "ssh" }])
500+
.mockResolvedValueOnce([{ pid: 456, ppid: 1, name: "ssh", cmd: "ssh" }])
501+
.mockResolvedValueOnce([{ pid: 789, ppid: 1, name: "ssh", cmd: "ssh" }])
502+
// This will not be found since `789.json` is found and is not stale!
503+
.mockResolvedValue([{ pid: 999, ppid: 1, name: "ssh", cmd: "ssh" }]);
504+
505+
const pollInterval = 10;
506+
const monitor = createMonitor({
507+
codeLogDir: "/logs/window1",
508+
networkInfoPath: "/network",
509+
networkPollInterval: pollInterval,
510+
});
511+
512+
const pids: (number | undefined)[] = [];
513+
monitor.onPidChange((pid) => pids.push(pid));
514+
515+
// Advance enough time for the monitor to cycle through PIDs 123, 456, and find 789
516+
await vi.advanceTimersByTimeAsync(pollInterval * 100);
517+
518+
expect(pids).toEqual([123, 456, 789]);
519+
});
520+
});
521+
426522
describe("dispose", () => {
427523
it("disposes status bar", () => {
428524
const monitor = createMonitor();

0 commit comments

Comments
 (0)