Skip to content

Commit 8b60bbc

Browse files
fix: Try locating packages using importlib (#196)
Changes the logic for listing packages to use importlib which turns out to be much faster. https://ampcode.com/threads/T-20d560ec-6811-4993-b63e-8935fa3d07f5#message-122-block-0 Adds an "integration" test around the Python script so that we can be more confident that it works. The test has both global and virtualenv packages, and both are correctly located. Also tested against `rich` and looks like the results are unchanged. The code could probably be cleaned up a bit, but I'm prioritizing speed so that we can cut a release.
1 parent b9c2453 commit 8b60bbc

File tree

8 files changed

+340
-16
lines changed

8 files changed

+340
-16
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Project is primarily an addition to Pyright. At this time, there are no substant
66

77
## Pre-requisites
88

9+
scip-python only supports Python 3.10+.
10+
911
```
1012
$ # Install scip-python
1113
$ npm install -g @sourcegraph/scip-python

packages/pyright-scip/AGENT.md renamed to packages/pyright-scip/AGENTS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- `npm run check-snapshots` - Check snapshot tests
99
- `npm run update-snapshots` - Update snapshot tests
1010

11+
After making changes to the codebase, run tests with:
12+
1. `npm run build-agent` - Build the development version
13+
2. `npm run check-snapshots` - Run all tests including unit tests
14+
1115
### Building
1216

1317
- `npm run webpack` - Development build

packages/pyright-scip/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# scip-python CHANGELOG
22

3+
## v0.6.6
4+
5+
- Changes package listing to use Python's `importlib` instead of
6+
`pip show` by default. On versions of pip 24.0 and older, `pip show`
7+
is much slower. This also avoids parsing of unstructured data
8+
returned by pip in favor of JSON.
9+
310
## v0.6.5
411

512
- Fixes a crash when `pip show` returns more than 1MB of data. (#151)

packages/pyright-scip/package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/pyright-scip/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@sourcegraph/scip-python",
3-
"version": "0.6.5",
3+
"version": "0.6.6",
44
"description": "SCIP indexer for Python",
55
"main": "index.js",
66
"scripts": {

packages/pyright-scip/src/virtualenv/environment.ts

Lines changed: 151 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import * as fs from 'fs';
22
import * as child_process from 'child_process';
33
import * as os from 'os';
4+
import * as path from 'path';
45
import PythonPackage from './PythonPackage';
56
import PythonEnvironment from './PythonEnvironment';
67
import { withStatus } from 'src/status';
@@ -14,6 +15,11 @@ interface PipInformation {
1415
version: string;
1516
}
1617

18+
type PipBulkShowResult =
19+
| { success: true; data: string[] }
20+
| { success: false; error: 'timeout'; message: string }
21+
| { success: false; error: 'other'; message: string; code?: number };
22+
1723
let pipCommand: string | undefined;
1824
let getPipCommand = () => {
1925
if (pipCommand === undefined) {
@@ -22,21 +28,37 @@ let getPipCommand = () => {
2228
} else if (commandExistsSync('pip')) {
2329
pipCommand = 'pip';
2430
} else {
25-
throw new Error('Could not find valid pip command');
31+
throw new Error(`Could not find valid pip command. Searched PATH: ${process.env.PATH}`);
2632
}
2733
}
2834

2935
return pipCommand;
3036
};
3137

32-
function spawnSyncWithRetry(command: string, args: string[]): child_process.SpawnSyncReturns<string> {
38+
let pythonCommand: string | undefined;
39+
let getPythonCommand = () => {
40+
if (pythonCommand === undefined) {
41+
if (commandExistsSync('python3')) {
42+
pythonCommand = 'python3';
43+
} else if (commandExistsSync('python')) {
44+
pythonCommand = 'python';
45+
} else {
46+
throw new Error(`Could not find valid python command. Searched PATH: ${process.env.PATH}`);
47+
}
48+
}
49+
50+
return pythonCommand;
51+
};
52+
53+
function spawnSyncWithRetry(command: string, args: string[], timeout?: number): child_process.SpawnSyncReturns<string> {
3354
let maxBuffer = 1 * 1024 * 1024; // Start with 1MB (original default)
3455
const maxMemory = os.totalmem() * 0.1; // Don't use more than 10% of total system memory
3556

3657
while (true) {
3758
const result = child_process.spawnSync(command, args, {
3859
encoding: 'utf8',
3960
maxBuffer: maxBuffer,
61+
timeout: timeout, // Will be undefined if not provided, which is fine
4062
});
4163

4264
const error = result.error as NodeJS.ErrnoException | null;
@@ -57,6 +79,67 @@ function spawnSyncWithRetry(command: string, args: string[]): child_process.Spaw
5779
}
5880
}
5981

82+
// Utility function for temporary directory cleanup
83+
function cleanupTempDirectory(tempDir: string): void {
84+
try {
85+
fs.rmSync(tempDir, { recursive: true, force: true });
86+
} catch (error) {
87+
console.warn(`Warning: Failed to cleanup temp directory ${tempDir}: ${error}`);
88+
}
89+
}
90+
91+
// Helper function to validate and warn about missing packages
92+
function validatePackageResults(results: PythonPackage[], requestedNames: string[]): PythonPackage[] {
93+
if (results.length !== requestedNames.length) {
94+
const foundNames = new Set(results.map((pkg) => pkg.name));
95+
const missingNames = requestedNames.filter((name) => !foundNames.has(name));
96+
console.warn(`Warning: Could not find package information for: ${missingNames.join(', ')}`);
97+
}
98+
return results;
99+
}
100+
101+
function generatePackageInfoScript(): string {
102+
return `#!/usr/bin/env python3
103+
import sys
104+
import json
105+
import importlib.metadata
106+
107+
def get_package_info(package_names):
108+
results = []
109+
package_set = set(package_names) # Use set for faster lookup
110+
111+
for dist in importlib.metadata.distributions():
112+
if dist.name in package_set:
113+
files = []
114+
115+
# Get files for this package
116+
if dist.files:
117+
for file_path in dist.files:
118+
file_str = str(file_path)
119+
120+
# Skip cached or out-of-project files
121+
if file_str.startswith('..') or '__pycache__' in file_str:
122+
continue
123+
124+
# Only include .py and .pyi files
125+
if file_str.endswith(('.py', '.pyi')):
126+
files.append(file_str)
127+
128+
results.append({
129+
'name': dist.name,
130+
'version': dist.version,
131+
'files': files
132+
})
133+
134+
return results
135+
136+
if __name__ == '__main__':
137+
package_names = set(sys.argv[1:])
138+
package_info = get_package_info(package_names)
139+
json.dump(package_info, sys.stdout)
140+
`;
141+
}
142+
60143
function pipList(): PipInformation[] {
61144
const result = spawnSyncWithRetry(getPipCommand(), ['list', '--format=json']);
62145

@@ -70,19 +153,75 @@ function pipList(): PipInformation[] {
70153
// pipBulkShow returns the results of 'pip show', one for each package.
71154
//
72155
// It doesn't cross-check if the length of the output matches that of the input.
73-
function pipBulkShow(names: string[]): string[] {
156+
function pipBulkShow(names: string[]): PipBulkShowResult {
74157
// FIXME: The performance of this scales with the number of packages that
75158
// are installed in the Python distribution, not just the number of packages
76159
// that are requested. If 10K packages are installed, this can take several
77160
// minutes. However, it's not super obvious if there is a more performant
78161
// way to do this without hand-rolling the functionality ourselves.
79-
const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names]);
162+
const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names], 60000); // 1 minute timeout
80163

81164
if (result.status !== 0) {
82-
throw new Error(`pip show failed with code ${result.status}: ${result.stderr}`);
165+
const error = result.error as NodeJS.ErrnoException | null;
166+
if (result.signal === 'SIGTERM' || (error && error.code === 'ETIMEDOUT')) {
167+
return {
168+
success: false,
169+
error: 'timeout',
170+
message: 'pip show timed out after 1 minute.',
171+
};
172+
}
173+
return {
174+
success: false,
175+
error: 'other',
176+
message: `pip show failed: ${result.stderr}`,
177+
code: result.status ?? undefined,
178+
};
83179
}
84180

85-
return result.stdout.split('\n---').filter((pkg) => pkg.trim());
181+
return {
182+
success: true,
183+
data: result.stdout.split('\n---').filter((pkg) => pkg.trim()),
184+
};
185+
}
186+
187+
// Get package information by running a short Python script.
188+
// If we fail to run that, attempt to use `pip show`.
189+
function gatherPackageData(packageNames: string[]): PythonPackage[] {
190+
// First try the new importlib.metadata approach
191+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'scip-python-'));
192+
try {
193+
const scriptPath = path.join(tempDir, 'get_packages.py');
194+
const scriptContent = generatePackageInfoScript();
195+
196+
fs.writeFileSync(scriptPath, scriptContent, { mode: 0o755 });
197+
198+
const result = spawnSyncWithRetry(getPythonCommand(), [scriptPath, ...packageNames]);
199+
200+
if (result.status === 0) {
201+
const packageData = JSON.parse(result.stdout);
202+
const packages = packageData.map((pkg: any) => new PythonPackage(pkg.name, pkg.version, pkg.files));
203+
return validatePackageResults(packages, packageNames);
204+
} else {
205+
console.warn(`Python script failed with code ${result.status}: ${result.stderr}`);
206+
console.warn('Falling back to pip show approach');
207+
}
208+
} catch (error) {
209+
console.warn(`Failed to use importlib.metadata approach: ${error}`);
210+
console.warn('Falling back to pip show approach');
211+
} finally {
212+
cleanupTempDirectory(tempDir);
213+
}
214+
215+
// Fallback to original pip show approach
216+
const bulkResult = pipBulkShow(packageNames);
217+
if (!bulkResult.success) {
218+
console.warn(`Warning: Package discovery failed - ${bulkResult.message}`);
219+
console.warn('Navigation to external packages may not work correctly.');
220+
return [];
221+
}
222+
223+
const pipResults = bulkResult.data.map((shown) => PythonPackage.fromPipShow(shown));
224+
return validatePackageResults(pipResults, packageNames);
86225
}
87226

88227
export default function getEnvironment(
@@ -101,13 +240,13 @@ export default function getEnvironment(
101240
return withStatus('Evaluating python environment dependencies', (progress) => {
102241
const listed = pipList();
103242

104-
progress.message('Gathering environment information from `pip`');
105-
const bulk = pipBulkShow(listed.map((item) => item.name));
243+
progress.message('Gathering environment information');
244+
const packageNames = listed.map((item) => item.name);
245+
const info = gatherPackageData(packageNames);
106246

107-
progress.message('Analyzing dependencies');
108-
const info = bulk.map((shown) => {
109-
return PythonPackage.fromPipShow(shown);
110-
});
111247
return new PythonEnvironment(projectFiles, projectVersion, info);
112248
});
113249
}
250+
251+
// Export for testing purposes
252+
export { gatherPackageData };

0 commit comments

Comments
 (0)