Skip to content

Commit

Permalink
Cache glob resolutions in import graph (#13413)
Browse files Browse the repository at this point in the history
## Summary

These are often repeated; caching the resolutions can have a huge
impact.
  • Loading branch information
charliermarsh authored Sep 20, 2024
1 parent 4e935f7 commit 770b276
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 37 deletions.
110 changes: 76 additions & 34 deletions crates/ruff/src/commands/analyze_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use ruff_linter::{warn_user, warn_user_once};
use ruff_python_ast::{PySourceType, SourceType};
use ruff_workspace::resolver::{python_files_in_path, ResolvedFile};
use rustc_hash::FxHashMap;
use std::path::Path;
use std::sync::Arc;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};

/// Generate an import map.
pub(crate) fn analyze_graph(
Expand Down Expand Up @@ -51,7 +51,7 @@ pub(crate) fn analyze_graph(
.map(|(path, package)| (path.to_path_buf(), package.map(Path::to_path_buf)))
.collect::<FxHashMap<_, _>>();

// Create a database for each source root.
// Create a database from the source roots.
let db = ModuleDb::from_src_roots(
package_roots
.values()
Expand All @@ -61,8 +61,11 @@ pub(crate) fn analyze_graph(
.filter_map(|path| SystemPathBuf::from_path_buf(path).ok()),
)?;

// Create a cache for resolved globs.
let glob_resolver = Arc::new(Mutex::new(GlobResolver::default()));

// Collect and resolve the imports for each file.
let result = Arc::new(std::sync::Mutex::new(Vec::new()));
let result = Arc::new(Mutex::new(Vec::new()));
let inner_result = Arc::clone(&result);

rayon::scope(move |scope| {
Expand Down Expand Up @@ -111,6 +114,7 @@ pub(crate) fn analyze_graph(
let db = db.snapshot();
let root = root.clone();
let result = inner_result.clone();
let glob_resolver = glob_resolver.clone();
scope.spawn(move |_| {
// Identify any imports via static analysis.
let mut imports =
Expand All @@ -120,38 +124,12 @@ pub(crate) fn analyze_graph(
ModuleImports::default()
});

debug!("Discovered {} imports for {}", imports.len(), path);

// Append any imports that were statically defined in the configuration.
if let Some((root, globs)) = include_dependencies {
match globwalk::GlobWalkerBuilder::from_patterns(root, &globs)
.file_type(globwalk::FileType::FILE)
.build()
{
Ok(walker) => {
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let path = match SystemPathBuf::from_path_buf(entry.into_path()) {
Ok(path) => path,
Err(err) => {
warn!(
"Failed to convert path to system path: {}",
err.display()
);
continue;
}
};
imports.insert(path);
}
}
Err(err) => {
warn!("Failed to read glob walker: {err}");
}
}
let mut glob_resolver = glob_resolver.lock().unwrap();
imports.extend(glob_resolver.resolve(root, globs));
}

// Convert the path (and imports) to be relative to the working directory.
Expand Down Expand Up @@ -180,3 +158,67 @@ pub(crate) fn analyze_graph(

Ok(ExitStatus::Success)
}

/// A resolver for glob sets.
#[derive(Default, Debug)]
struct GlobResolver {
cache: GlobCache,
}

impl GlobResolver {
/// Resolve a set of globs, anchored at a given root.
fn resolve(&mut self, root: PathBuf, globs: Vec<String>) -> Vec<SystemPathBuf> {
if let Some(cached) = self.cache.get(&root, &globs) {
return cached.clone();
}

let walker = match globwalk::GlobWalkerBuilder::from_patterns(&root, &globs)
.file_type(globwalk::FileType::FILE)
.build()
{
Ok(walker) => walker,
Err(err) => {
warn!("Failed to read glob walker: {err}");
return Vec::new();
}
};

let mut paths = Vec::new();
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let path = match SystemPathBuf::from_path_buf(entry.into_path()) {
Ok(path) => path,
Err(err) => {
warn!("Failed to convert path to system path: {}", err.display());
continue;
}
};
paths.push(path);
}

self.cache.insert(root, globs, paths.clone());
paths
}
}

/// A cache for resolved globs.
#[derive(Default, Debug)]
struct GlobCache(FxHashMap<PathBuf, FxHashMap<Vec<String>, Vec<SystemPathBuf>>>);

impl GlobCache {
/// Insert a resolved glob.
fn insert(&mut self, root: PathBuf, globs: Vec<String>, paths: Vec<SystemPathBuf>) {
self.0.entry(root).or_default().insert(globs, paths);
}

/// Get a resolved glob.
fn get(&self, root: &Path, globs: &[String]) -> Option<&Vec<SystemPathBuf>> {
self.0.get(root).and_then(|map| map.get(globs))
}
}
7 changes: 5 additions & 2 deletions crates/ruff/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub fn run(
Command::Check(args) => check(args, global_options),
Command::Format(args) => format(args, global_options),
Command::Server(args) => server(args),
Command::Analyze(AnalyzeCommand::Graph(args)) => graph_build(args, global_options),
Command::Analyze(AnalyzeCommand::Graph(args)) => analyze_graph(args, global_options),
}
}

Expand All @@ -202,7 +202,10 @@ fn format(args: FormatCommand, global_options: GlobalConfigArgs) -> Result<ExitS
}
}

fn graph_build(args: AnalyzeGraphCommand, global_options: GlobalConfigArgs) -> Result<ExitStatus> {
fn analyze_graph(
args: AnalyzeGraphCommand,
global_options: GlobalConfigArgs,
) -> Result<ExitStatus> {
let (cli, config_arguments) = args.partition(global_options)?;

commands::analyze_graph::analyze_graph(cli, &config_arguments)
Expand Down
3 changes: 2 additions & 1 deletion crates/ruff/tests/analyze_graph.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Tests the interaction of the `analyze graph` command.

#![cfg(not(target_family = "wasm"))]
#![cfg(not(target_arch = "wasm32"))]
#![cfg(not(windows))]

use assert_fs::prelude::*;
use std::process::Command;
Expand Down
5 changes: 5 additions & 0 deletions crates/ruff_graph/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ impl ModuleImports {
self.0.insert(path);
}

/// Extend the module imports with additional file paths.
pub fn extend(&mut self, paths: impl IntoIterator<Item = SystemPathBuf>) {
self.0.extend(paths);
}

/// Returns `true` if the module imports are empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
Expand Down

0 comments on commit 770b276

Please sign in to comment.