From 770b276c213e712e6ec5e65752475542543681d4 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 19 Sep 2024 22:24:06 -0400 Subject: [PATCH] Cache glob resolutions in import graph (#13413) ## Summary These are often repeated; caching the resolutions can have a huge impact. --- crates/ruff/src/commands/analyze_graph.rs | 110 +++++++++++++++------- crates/ruff/src/lib.rs | 7 +- crates/ruff/tests/analyze_graph.rs | 3 +- crates/ruff_graph/src/lib.rs | 5 + 4 files changed, 88 insertions(+), 37 deletions(-) diff --git a/crates/ruff/src/commands/analyze_graph.rs b/crates/ruff/src/commands/analyze_graph.rs index 9fb138553b27f..d12d1be23231a 100644 --- a/crates/ruff/src/commands/analyze_graph.rs +++ b/crates/ruff/src/commands/analyze_graph.rs @@ -10,8 +10,8 @@ use ruff_linter::{warn_user, warn_user_once}; use ruff_python_ast::{PySourceType, SourceType}; use ruff_workspace::resolver::{python_files_in_path, ResolvedFile}; use rustc_hash::FxHashMap; -use std::path::Path; -use std::sync::Arc; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; /// Generate an import map. pub(crate) fn analyze_graph( @@ -51,7 +51,7 @@ pub(crate) fn analyze_graph( .map(|(path, package)| (path.to_path_buf(), package.map(Path::to_path_buf))) .collect::>(); - // Create a database for each source root. + // Create a database from the source roots. let db = ModuleDb::from_src_roots( package_roots .values() @@ -61,8 +61,11 @@ pub(crate) fn analyze_graph( .filter_map(|path| SystemPathBuf::from_path_buf(path).ok()), )?; + // Create a cache for resolved globs. + let glob_resolver = Arc::new(Mutex::new(GlobResolver::default())); + // Collect and resolve the imports for each file. - let result = Arc::new(std::sync::Mutex::new(Vec::new())); + let result = Arc::new(Mutex::new(Vec::new())); let inner_result = Arc::clone(&result); rayon::scope(move |scope| { @@ -111,6 +114,7 @@ pub(crate) fn analyze_graph( let db = db.snapshot(); let root = root.clone(); let result = inner_result.clone(); + let glob_resolver = glob_resolver.clone(); scope.spawn(move |_| { // Identify any imports via static analysis. let mut imports = @@ -120,38 +124,12 @@ pub(crate) fn analyze_graph( ModuleImports::default() }); + debug!("Discovered {} imports for {}", imports.len(), path); + // Append any imports that were statically defined in the configuration. if let Some((root, globs)) = include_dependencies { - match globwalk::GlobWalkerBuilder::from_patterns(root, &globs) - .file_type(globwalk::FileType::FILE) - .build() - { - Ok(walker) => { - for entry in walker { - let entry = match entry { - Ok(entry) => entry, - Err(err) => { - warn!("Failed to read glob entry: {err}"); - continue; - } - }; - let path = match SystemPathBuf::from_path_buf(entry.into_path()) { - Ok(path) => path, - Err(err) => { - warn!( - "Failed to convert path to system path: {}", - err.display() - ); - continue; - } - }; - imports.insert(path); - } - } - Err(err) => { - warn!("Failed to read glob walker: {err}"); - } - } + let mut glob_resolver = glob_resolver.lock().unwrap(); + imports.extend(glob_resolver.resolve(root, globs)); } // Convert the path (and imports) to be relative to the working directory. @@ -180,3 +158,67 @@ pub(crate) fn analyze_graph( Ok(ExitStatus::Success) } + +/// A resolver for glob sets. +#[derive(Default, Debug)] +struct GlobResolver { + cache: GlobCache, +} + +impl GlobResolver { + /// Resolve a set of globs, anchored at a given root. + fn resolve(&mut self, root: PathBuf, globs: Vec) -> Vec { + if let Some(cached) = self.cache.get(&root, &globs) { + return cached.clone(); + } + + let walker = match globwalk::GlobWalkerBuilder::from_patterns(&root, &globs) + .file_type(globwalk::FileType::FILE) + .build() + { + Ok(walker) => walker, + Err(err) => { + warn!("Failed to read glob walker: {err}"); + return Vec::new(); + } + }; + + let mut paths = Vec::new(); + for entry in walker { + let entry = match entry { + Ok(entry) => entry, + Err(err) => { + warn!("Failed to read glob entry: {err}"); + continue; + } + }; + let path = match SystemPathBuf::from_path_buf(entry.into_path()) { + Ok(path) => path, + Err(err) => { + warn!("Failed to convert path to system path: {}", err.display()); + continue; + } + }; + paths.push(path); + } + + self.cache.insert(root, globs, paths.clone()); + paths + } +} + +/// A cache for resolved globs. +#[derive(Default, Debug)] +struct GlobCache(FxHashMap, Vec>>); + +impl GlobCache { + /// Insert a resolved glob. + fn insert(&mut self, root: PathBuf, globs: Vec, paths: Vec) { + self.0.entry(root).or_default().insert(globs, paths); + } + + /// Get a resolved glob. + fn get(&self, root: &Path, globs: &[String]) -> Option<&Vec> { + self.0.get(root).and_then(|map| map.get(globs)) + } +} diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index bda58d4a8a833..b88ae264e8e82 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -188,7 +188,7 @@ pub fn run( Command::Check(args) => check(args, global_options), Command::Format(args) => format(args, global_options), Command::Server(args) => server(args), - Command::Analyze(AnalyzeCommand::Graph(args)) => graph_build(args, global_options), + Command::Analyze(AnalyzeCommand::Graph(args)) => analyze_graph(args, global_options), } } @@ -202,7 +202,10 @@ fn format(args: FormatCommand, global_options: GlobalConfigArgs) -> Result Result { +fn analyze_graph( + args: AnalyzeGraphCommand, + global_options: GlobalConfigArgs, +) -> Result { let (cli, config_arguments) = args.partition(global_options)?; commands::analyze_graph::analyze_graph(cli, &config_arguments) diff --git a/crates/ruff/tests/analyze_graph.rs b/crates/ruff/tests/analyze_graph.rs index 81901eefc1fac..33c0b123667bc 100644 --- a/crates/ruff/tests/analyze_graph.rs +++ b/crates/ruff/tests/analyze_graph.rs @@ -1,6 +1,7 @@ //! Tests the interaction of the `analyze graph` command. -#![cfg(not(target_family = "wasm"))] +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(windows))] use assert_fs::prelude::*; use std::process::Command; diff --git a/crates/ruff_graph/src/lib.rs b/crates/ruff_graph/src/lib.rs index 3d6f92c7ef3a5..e8cb83416d952 100644 --- a/crates/ruff_graph/src/lib.rs +++ b/crates/ruff_graph/src/lib.rs @@ -26,6 +26,11 @@ impl ModuleImports { self.0.insert(path); } + /// Extend the module imports with additional file paths. + pub fn extend(&mut self, paths: impl IntoIterator) { + self.0.extend(paths); + } + /// Returns `true` if the module imports are empty. pub fn is_empty(&self) -> bool { self.0.is_empty()