From aedb7fb129d8c12e73655ba2821a1aa7d43b7f9e Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 12 Jul 2024 11:24:58 +0200 Subject: [PATCH] Basic `walk_directories` implementation --- crates/ruff_db/Cargo.toml | 2 +- crates/ruff_db/src/system.rs | 24 +- crates/ruff_db/src/system/memory_fs.rs | 364 ++++++++++++++++++-- crates/ruff_db/src/system/os.rs | 342 +++++++++++++++++- crates/ruff_db/src/system/test.rs | 24 +- crates/ruff_db/src/system/walk_directory.rs | 315 +++++++++++++++++ 6 files changed, 1010 insertions(+), 61 deletions(-) create mode 100644 crates/ruff_db/src/system/walk_directory.rs diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 7113f276f32a69..bbaf27ace212b6 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -31,4 +31,4 @@ insta = { workspace = true } tempfile = { workspace = true } [features] -os = [] +os = ["ignore"] diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index bbba692d413d33..89b8e72b832a21 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -1,8 +1,10 @@ +use std::fmt::Debug; + pub use memory_fs::MemoryFileSystem; #[cfg(feature = "os")] pub use os::OsSystem; -use std::fmt::Debug; pub use test::{DbWithTestSystem, TestSystem}; +use walk_directory::WalkDirectoryBuilder; use crate::file_revision::FileRevision; @@ -13,6 +15,7 @@ mod memory_fs; mod os; mod path; mod test; +pub mod walk_directory; pub type Result = std::io::Result; @@ -85,6 +88,9 @@ pub trait System: Debug { path: &SystemPath, ) -> Result> + 'a>>; + /// Recursively walks the content of `path`. + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder; + fn as_any(&self) -> &dyn std::any::Any; } @@ -130,14 +136,14 @@ impl FileType { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub struct DirectoryEntry { path: SystemPathBuf, - file_type: Result, + file_type: FileType, } impl DirectoryEntry { - pub fn new(path: SystemPathBuf, file_type: Result) -> Self { + pub fn new(path: SystemPathBuf, file_type: FileType) -> Self { Self { path, file_type } } @@ -145,13 +151,7 @@ impl DirectoryEntry { &self.path } - pub fn file_type(&self) -> &Result { - &self.file_type - } -} - -impl PartialEq for DirectoryEntry { - fn eq(&self, other: &Self) -> bool { - self.path == other.path + pub fn file_type(&self) -> FileType { + self.file_type } } diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 3d06bfc807229f..0b14d6369fad02 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -1,10 +1,18 @@ use std::collections::BTreeMap; +use std::iter::FusedIterator; use std::sync::{Arc, RwLock, RwLockWriteGuard}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; -use crate::system::{DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf}; +use crate::system::{ + walk_directory, DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf, +}; + +use super::walk_directory::{ + DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, WalkDirectoryVisitor, + WalkDirectoryVisitorBuilder, WalkState, +}; /// File system that stores all content in memory. /// @@ -157,6 +165,11 @@ impl MemoryFileSystem { Ok(()) } + /// Returns a builder for walking the directory tree of `path`. + pub fn walk_directory(&self, path: impl AsRef) -> WalkDirectoryBuilder { + WalkDirectoryBuilder::new(path, MemoryWalker { fs: self.clone() }) + } + pub fn remove_file(&self, path: impl AsRef) -> Result<()> { fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { let mut by_path = fs.inner.by_path.write().unwrap(); @@ -238,32 +251,30 @@ impl MemoryFileSystem { normalized.into_utf8_path_buf() } - pub fn read_directory( - &self, - path: impl AsRef, - ) -> Result> + '_> { + pub fn read_directory(&self, path: impl AsRef) -> Result { let by_path = self.inner.by_path.read().unwrap(); let normalized = self.normalize_path(path.as_ref()); let entry = by_path.get(&normalized).ok_or_else(not_found)?; if entry.is_file() { return Err(not_a_directory()); }; - Ok(by_path - .range(normalized.clone()..) - .skip(1) - .take_while(|(path, _)| path.starts_with(&normalized)) - .filter_map(|(path, entry)| { - if path.parent()? == normalized { - Some(Ok(DirectoryEntry { - path: SystemPathBuf::from_utf8_path_buf(path.to_owned()), - file_type: Ok(entry.file_type()), - })) - } else { - None - } - }) - .collect::>() - .into_iter()) + + let mut collected = Vec::new(); + + for (path, entry) in by_path.range(normalized.clone()..).skip(1) { + if !path.starts_with(&normalized) { + break; + } + + if path.parent() == Some(&normalized) { + collected.push(Ok(DirectoryEntry { + path: SystemPathBuf::from_utf8_path_buf(path.to_owned()), + file_type: entry.file_type(), + })); + } + } + + Ok(ReadDirectory::new(collected)) } } @@ -379,11 +390,181 @@ fn get_or_create_file<'a>( } } +#[derive(Debug)] +pub struct ReadDirectory { + entries: std::vec::IntoIter>, +} + +impl ReadDirectory { + fn new(entries: Vec>) -> Self { + Self { + entries: entries.into_iter(), + } + } +} + +impl Iterator for ReadDirectory { + type Item = std::io::Result; + + fn next(&mut self) -> Option { + self.entries.next() + } +} + +impl FusedIterator for ReadDirectory {} + +#[derive(Debug)] +struct MemoryWalker { + fs: MemoryFileSystem, +} + +impl MemoryWalker { + fn visit_entry( + &self, + visitor: &mut dyn WalkDirectoryVisitor, + entry: walk_directory::DirectoryEntry, + queue: &mut Vec, + ) -> WalkState { + if entry.file_type().is_directory() { + let path = entry.path.clone(); + let depth = entry.depth; + + let state = visitor.visit(Ok(entry)); + + if matches!(state, WalkState::Continue) { + queue.push(WalkerState::Nested { + path, + depth: depth + 1, + }); + } + + state + } else { + visitor.visit(Ok(entry)) + } + } +} + +impl DirectoryWalker for MemoryWalker { + fn walk( + &self, + builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ) { + let WalkDirectoryConfiguration { + paths, + ignore_hidden, + standard_filters: _, + } = configuration; + + let mut visitor = builder.build(); + let mut queue: Vec<_> = paths + .into_iter() + .map(|path| WalkerState::Start { path }) + .collect(); + + while let Some(state) = queue.pop() { + let (path, depth) = match state { + WalkerState::Start { path } => { + match self.fs.metadata(&path) { + Ok(metadata) => { + let entry = walk_directory::DirectoryEntry { + file_type: metadata.file_type, + depth: 0, + path, + }; + + if self.visit_entry(&mut *visitor, entry, &mut queue) == WalkState::Quit + { + return; + } + } + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(0), + kind: walk_directory::ErrorKind::Io { + path: Some(path), + err: error, + }, + })); + } + } + + continue; + } + WalkerState::Nested { path, depth } => (path, depth), + }; + + // Use `read_directory` here instead of locking `by_path` to avoid deadlocks + // when the `visitor` calls any file system operations. + let entries = match self.fs.read_directory(&path) { + Ok(entries) => entries, + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::Io { + path: Some(path), + err: error, + }, + })); + continue; + } + }; + + for entry in entries { + match entry { + Ok(entry) => { + if ignore_hidden + && entry + .path + .file_name() + .is_some_and(|name| name.starts_with('.')) + { + continue; + } + + let entry = walk_directory::DirectoryEntry { + file_type: entry.file_type, + depth, + path: entry.path, + }; + + if self.visit_entry(&mut *visitor, entry, &mut queue) == WalkState::Quit { + return; + } + } + + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::Io { + path: None, + err: error, + }, + })); + } + } + } + } + } +} + +#[derive(Debug)] +enum WalkerState { + /// An entry path that was directly provided to the walker. Always has depth 0. + Start { path: SystemPathBuf }, + + /// Traverse into the directory with the given path at the given depth. + Nested { path: SystemPathBuf, depth: usize }, +} + #[cfg(test)] mod tests { use std::io::ErrorKind; use std::time::Duration; + use crate::system::walk_directory::tests::DirectoryEntryToString; + use crate::system::walk_directory::WalkState; use crate::system::{ DirectoryEntry, FileType, MemoryFileSystem, Result, SystemPath, SystemPathBuf, }; @@ -659,9 +840,9 @@ mod tests { .map(Result::unwrap) .collect(); let expected_contents = vec![ - DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), Ok(FileType::File)), - DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), Ok(FileType::File)), - DirectoryEntry::new(SystemPathBuf::from("/a/foo"), Ok(FileType::Directory)), + DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), FileType::File), + DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), FileType::File), + DirectoryEntry::new(SystemPathBuf::from("/a/foo"), FileType::Directory), ]; assert_eq!(contents, expected_contents) } @@ -684,4 +865,139 @@ mod tests { assert_eq!(error.kind(), std::io::ErrorKind::Other); assert!(error.to_string().contains("Not a directory")); } + + #[test] + fn walk_directory() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_files([ + (root.join("foo.py"), "print('foo')"), + (root.join("a/bar.py"), "print('bar')"), + (root.join("a/baz.py"), "print('baz')"), + (root.join("a/b/c.py"), "print('c')"), + ])?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/b": ( + Directory, + 2, + ), + "a/b/c.py": ( + File, + 3, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_hidden() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_files([ + (root.join("foo.py"), "print('foo')"), + (root.join("a/bar.py"), "print('bar')"), + (root.join("a/.baz.py"), "print('baz')"), + ])?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/bar.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_file() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_file(root.join("foo.py"), "print('foo')")?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root.join("foo.py")).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "foo.py": ( + File, + 0, + ), +}"# + ); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 93e7d12d1996b7..2b7dd397da03c4 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -2,9 +2,15 @@ use crate::system::{ DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, }; use filetime::FileTime; -use std::any::Any; use std::sync::Arc; +use std::{any::Any, path::PathBuf}; +use super::walk_directory::{ + self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, + WalkDirectoryVisitorBuilder, WalkState, +}; + +/// A system implementation that uses the OS file system. #[derive(Default, Debug)] pub struct OsSystem { inner: Arc, @@ -67,6 +73,10 @@ impl System for OsSystem { &self.inner.cwd } + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder { + WalkDirectoryBuilder::new(path, OsDirectoryWalker {}) + } + fn as_any(&self) -> &dyn Any { self } @@ -75,11 +85,154 @@ impl System for OsSystem { &self, path: &SystemPath, ) -> Result>>> { - Ok(Box::new( - path.as_utf8_path() - .read_dir_utf8()? - .map(|res| res.map(DirectoryEntry::from)), - )) + Ok(Box::new(path.as_utf8_path().read_dir_utf8()?.map(|res| { + let res = res?; + + let file_type = res.file_type()?; + Ok(DirectoryEntry { + path: SystemPathBuf::from_utf8_path_buf(res.into_path()), + file_type: file_type.into(), + }) + }))) + } +} + +#[derive(Debug)] +struct OsDirectoryWalker; + +impl DirectoryWalker for OsDirectoryWalker { + fn walk( + &self, + visitor_builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ) { + let WalkDirectoryConfiguration { + paths, + ignore_hidden: hidden, + standard_filters, + } = configuration; + + let Some((first, additional)) = paths.split_first() else { + return; + }; + + let mut builder = ignore::WalkBuilder::new(first.as_std_path()); + + builder.standard_filters(standard_filters); + builder.hidden(hidden); + + for additional_path in additional { + builder.add(additional_path.as_std_path()); + } + + builder.threads( + std::thread::available_parallelism() + .map_or(1, std::num::NonZeroUsize::get) + .min(12), + ); + + builder.build_parallel().run(|| { + let mut visitor = visitor_builder.build(); + + Box::new(move |entry| { + match entry { + Ok(entry) => { + // SAFETY: WalkDirectoryVisitor doesn't support walking stdin. + let file_type = entry.file_type().unwrap(); + let depth = entry.depth(); + + // If there's an error related to a git ignore file, warn. + // Errors related to traversing the file system are reported alongside the entry. + if let Some(error) = entry.error() { + tracing::warn!("{error}"); + } + + match SystemPathBuf::from_path_buf(entry.into_path()) { + Ok(path) => { + let directory_entry = walk_directory::DirectoryEntry { + path, + file_type: FileType::from(file_type), + depth, + }; + + visitor.visit(Ok(directory_entry)).into() + } + Err(path) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + })); + + // Skip the entire directory because all the paths won't be UTF-8 paths. + ignore::WalkState::Skip + } + } + } + Err(error) => match ignore_to_walk_directory_error(error, None, None) { + Ok(error) => visitor.visit(Err(error)).into(), + Err(error) => { + // This should only be reached when the error is an ignore related error + // (which, should not be reported here but the `ignore` crate doesn't distinguish between ignore and IO errors). + // Let's log the error to at least make it visible. + tracing::warn!("Failed to traverse directory: {error}."); + ignore::WalkState::Continue + } + }, + } + }) + }); + } +} + +#[cold] +fn ignore_to_walk_directory_error( + error: ignore::Error, + path: Option, + depth: Option, +) -> std::result::Result { + use ignore::Error; + + match error { + Error::WithPath { path, err } => ignore_to_walk_directory_error(*err, Some(path), depth), + Error::WithDepth { err, depth } => ignore_to_walk_directory_error(*err, path, Some(depth)), + Error::WithLineNumber { err, .. } => ignore_to_walk_directory_error(*err, path, depth), + Error::Loop { child, ancestor } => { + match ( + SystemPathBuf::from_path_buf(child), + SystemPathBuf::from_path_buf(ancestor), + ) { + (Ok(child), Ok(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Loop { child, ancestor }, + }), + (Err(child), _) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: child }, + }), + // We should never reach this because we should never traverse into a non UTF8 path but handle it anyway. + (_, Err(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: ancestor }, + }), + } + } + + Error::Io(err) => match path.map(SystemPathBuf::from_path_buf).transpose() { + Ok(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Io { path, err }, + }), + Err(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + }), + }, + + // Ignore related errors, we warn about them but we don't abort iteration because of them. + error @ (Error::Glob { .. } + | Error::UnrecognizedFileType(_) + | Error::InvalidDefinition + | Error::Partial(..)) => Err(error), } } @@ -95,21 +248,22 @@ impl From for FileType { } } -impl From for DirectoryEntry { - fn from(value: camino::Utf8DirEntry) -> Self { - let file_type = value.file_type().map(FileType::from); - Self { - path: SystemPathBuf::from_utf8_path_buf(value.into_path()), - file_type, +impl From for ignore::WalkState { + fn from(value: WalkState) -> Self { + match value { + WalkState::Continue => ignore::WalkState::Continue, + WalkState::Skip => ignore::WalkState::Skip, + WalkState::Quit => ignore::WalkState::Quit, } } } #[cfg(test)] mod tests { - use tempfile::TempDir; - use super::*; + use crate::system::walk_directory::tests::DirectoryEntryToString; + use crate::system::DirectoryEntry; + use tempfile::TempDir; #[test] fn read_directory() { @@ -132,9 +286,9 @@ mod tests { sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); let expected_contents = vec![ - DirectoryEntry::new(tempdir_path.join("a/bar.py"), Ok(FileType::File)), - DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), Ok(FileType::File)), - DirectoryEntry::new(tempdir_path.join("a/foo"), Ok(FileType::Directory)), + DirectoryEntry::new(tempdir_path.join("a/bar.py"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/foo"), FileType::Directory), ]; assert_eq!(sorted_contents, expected_contents) } @@ -169,4 +323,158 @@ mod tests { assert!(error.to_string().contains("Not a directory")); } } + + #[test] + fn walk_directory() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')")?; + std::fs::write(root.join("a/bar.py"), "print('bar')")?; + std::fs::write(root.join("a/baz.py"), "print('baz')")?; + std::fs::write(root.join("a/b/c.py"), "print('c')")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system.walk_directory(root_sys).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/b": ( + Directory, + 2, + ), + "a/b/c.py": ( + File, + 3, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_ignore() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + std::fs::write(root.join("a/bar.py"), "print('bar')\n")?; + std::fs::write(root.join("a/baz.py"), "print('baz')\n")?; + + // Exclude the `b` directory. + std::fs::write(root.join("a/.ignore"), "b/\n")?; + std::fs::write(root.join("a/b/c.py"), "print('c')\n")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(root_sys) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_file() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(&root_sys.join("foo.py")) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.into_string(), + r#"{ + "foo.py": ( + File, + 0, + ), +}"# + ); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index b62f60af5e65c1..b48c3cdcbfdd24 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -2,8 +2,11 @@ use crate::files::File; use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath}; use crate::Db; use std::any::Any; +use std::panic::RefUnwindSafe; use std::sync::Arc; +use super::walk_directory::WalkDirectoryBuilder; + /// System implementation intended for testing. /// /// It uses a memory-file system by default, but can be switched to the real file system for tests @@ -37,7 +40,7 @@ impl TestSystem { fn us_system(&mut self, system: S) where - S: System + Send + Sync + 'static, + S: System + Send + Sync + RefUnwindSafe + 'static, { self.inner = TestSystemInner::System(Arc::new(system)); } @@ -61,28 +64,35 @@ impl System for TestSystem { fn path_exists(&self, path: &SystemPath) -> bool { match &self.inner { TestSystemInner::Stub(fs) => fs.exists(path), - TestSystemInner::System(fs) => fs.path_exists(path), + TestSystemInner::System(system) => system.path_exists(path), } } fn is_directory(&self, path: &SystemPath) -> bool { match &self.inner { TestSystemInner::Stub(fs) => fs.is_directory(path), - TestSystemInner::System(fs) => fs.is_directory(path), + TestSystemInner::System(system) => system.is_directory(path), } } fn is_file(&self, path: &SystemPath) -> bool { match &self.inner { TestSystemInner::Stub(fs) => fs.is_file(path), - TestSystemInner::System(fs) => fs.is_file(path), + TestSystemInner::System(system) => system.is_file(path), } } fn current_directory(&self) -> &SystemPath { match &self.inner { TestSystemInner::Stub(fs) => fs.current_directory(), - TestSystemInner::System(fs) => fs.current_directory(), + TestSystemInner::System(system) => system.current_directory(), + } + } + + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder { + match &self.inner { + TestSystemInner::Stub(fs) => fs.walk_directory(path), + TestSystemInner::System(system) => system.walk_directory(path), } } @@ -155,7 +165,7 @@ pub trait DbWithTestSystem: Db + Sized { /// Note that any files written to the memory file system won't be copied over. fn use_system(&mut self, os: S) where - S: System + Send + Sync + 'static, + S: System + Send + Sync + RefUnwindSafe + 'static, { self.test_system_mut().us_system(os); } @@ -172,7 +182,7 @@ pub trait DbWithTestSystem: Db + Sized { #[derive(Debug)] enum TestSystemInner { Stub(MemoryFileSystem), - System(Arc), + System(Arc), } impl TestSystemInner { diff --git a/crates/ruff_db/src/system/walk_directory.rs b/crates/ruff_db/src/system/walk_directory.rs new file mode 100644 index 00000000000000..e53961dd67ed28 --- /dev/null +++ b/crates/ruff_db/src/system/walk_directory.rs @@ -0,0 +1,315 @@ +use crate::system::SystemPathBuf; +use std::fmt::{Display, Formatter}; +use std::path::PathBuf; + +use super::{FileType, SystemPath}; + +/// A builder for constructing a directory recursive traversal. +pub struct WalkDirectoryBuilder { + /// The implementation that does the directory walking. + walker: Box, + + /// The paths that should be walked. + paths: Vec, + + ignore_hidden: bool, + + standard_filters: bool, +} + +impl WalkDirectoryBuilder { + pub fn new(path: impl AsRef, walker: W) -> Self + where + W: DirectoryWalker + 'static, + { + Self { + walker: Box::new(walker), + paths: vec![path.as_ref().to_path_buf()], + ignore_hidden: true, + standard_filters: true, + } + } + + /// Adds an additional path that should be traversed recursively. + /// + /// Using the same walker to traverse multiple paths can be more efficient than creating a new walker for each path. + #[allow(clippy::should_implement_trait)] + pub fn add(mut self, path: impl AsRef) -> Self { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// Whether hidden files should be ignored. + /// + /// The definition of what a hidden file depends on the [`System`](super::System) and can be platform dependent. + /// + /// This is enabled by default. + pub fn ignore_hidden(mut self, hidden: bool) -> Self { + self.ignore_hidden = hidden; + self + } + + /// Whether files that are commonly ignored according to the conventions on [`System`](super::System) should be ignored. + /// + /// This includes ignoring hidden files. + /// + /// Defaults to `true`. + pub fn standard_filters(mut self, standard_filters: bool) -> Self { + self.standard_filters = standard_filters; + self.ignore_hidden = true; + self + } + + /// Runs the directory traversal and calls `builder` to create a visitor for each thread + /// that does the visiting. It's up to the actual walker to decide how many threads to use. + pub fn run<'s, F>(self, builder: F) + where + F: FnMut() -> FnVisitor<'s>, + { + let configuration = WalkDirectoryConfiguration { + paths: self.paths, + ignore_hidden: self.ignore_hidden, + standard_filters: self.standard_filters, + }; + + self.walker.walk(&mut FnBuilder { builder }, configuration); + } +} + +/// Concrete walker that performs the directory walking. +pub trait DirectoryWalker { + fn walk( + &self, + builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ); +} + +/// Creates a visitor for each thread that does the visiting. +pub trait WalkDirectoryVisitorBuilder<'s> { + fn build(&mut self) -> Box; +} + +/// Visitor handling the individual directory entries. +pub trait WalkDirectoryVisitor: Send { + fn visit(&mut self, entry: std::result::Result) -> WalkState; +} + +struct FnBuilder { + builder: F, +} + +impl<'s, F> WalkDirectoryVisitorBuilder<'s> for FnBuilder +where + F: FnMut() -> FnVisitor<'s>, +{ + fn build(&mut self) -> Box { + let visitor = (self.builder)(); + Box::new(FnVisitorImpl(visitor)) + } +} + +type FnVisitor<'s> = + Box) -> WalkState + Send + 's>; + +struct FnVisitorImpl<'s>(FnVisitor<'s>); + +impl WalkDirectoryVisitor for FnVisitorImpl<'_> { + fn visit(&mut self, entry: std::result::Result) -> WalkState { + (self.0)(entry) + } +} + +pub struct WalkDirectoryConfiguration { + pub paths: Vec, + pub ignore_hidden: bool, + pub standard_filters: bool, +} + +/// An entry in a directory. +#[derive(Debug, Clone)] +pub struct DirectoryEntry { + pub(super) path: SystemPathBuf, + pub(super) file_type: FileType, + pub(super) depth: usize, +} + +impl DirectoryEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &SystemPath { + &self.path + } + + /// The full path that this entry represents. + /// Analogous to [`DirectoryEntry::path`], but moves ownership of the path. + pub fn into_path(self) -> SystemPathBuf { + self.path + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> FileType { + self.file_type + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &str { + self.path.file_name().unwrap_or(self.path.as_str()) + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.depth + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum WalkState { + /// Continue walking as normal + Continue, + + /// If the entry given is a directory, don't decent into it. + /// In all other cases, this has no effect. + Skip, + + /// Quit the entire iterator as soon as possible. + /// + /// Note: This is an inherently asynchronous action. It's possible + /// for more entries to be yielded even after instructing the iterator to quit. + Quit, +} + +pub struct Error { + pub(super) depth: Option, + pub(super) kind: ErrorKind, +} + +impl Error { + pub fn depth(&self) -> Option { + self.depth + } + + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.kind { + ErrorKind::Loop { ancestor, child } => { + write!( + f, + "File system loop found: {child} points to an ancestor {ancestor}", + ) + } + ErrorKind::Io { + path: Some(path), + err, + } => { + write!(f, "IO error for operation on {}: {}", path, err) + } + ErrorKind::Io { path: None, err } => err.fmt(f), + ErrorKind::NonUtf8Path { path } => { + write!(f, "Non-UTF8 path: {}", path.display()) + } + } + } +} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::error::Error for Error {} + +#[derive(Debug)] +pub enum ErrorKind { + /// An error that occurs when a file loop is detected when traversing + /// symbolic links. + Loop { + ancestor: SystemPathBuf, + child: SystemPathBuf, + }, + + /// An error that occurs when doing I/O + Io { + path: Option, + err: std::io::Error, + }, + + /// A path is not a valid UTF-8 path. + NonUtf8Path { path: PathBuf }, +} + +#[cfg(test)] +pub(super) mod tests { + use crate::system::walk_directory::{DirectoryEntry, Error}; + use crate::system::{FileType, SystemPathBuf}; + use std::collections::BTreeMap; + + pub(crate) struct DirectoryEntryToString { + root_path: SystemPathBuf, + inner: std::sync::Mutex, + } + + impl DirectoryEntryToString { + pub(crate) fn new(root_path: SystemPathBuf) -> Self { + Self { + root_path, + inner: std::sync::Mutex::new(DirectoryEntryToStringInner::default()), + } + } + + pub(crate) fn write_entry(&self, entry: Result) { + let mut inner = self.inner.lock().unwrap(); + let DirectoryEntryToStringInner { errors, visited } = &mut *inner; + + match entry { + Ok(entry) => { + let relative_path = entry + .path() + .strip_prefix(&self.root_path) + .unwrap_or(entry.path()); + + let unix_path = relative_path + .components() + .map(|component| component.as_str()) + .collect::>() + .join("/"); + + visited.insert(unix_path, (entry.file_type, entry.depth)); + } + Err(error) => { + errors.push_str(&error.to_string()); + errors.push('\n'); + } + } + } + + pub(crate) fn into_string(self) -> String { + use std::fmt::Write; + + let inner = self.inner.into_inner().unwrap(); + let mut output = format!("{paths:#?}", paths = inner.visited); + + if !inner.errors.is_empty() { + writeln!(output, "\n\n{errors}", errors = inner.errors).unwrap(); + } + output + } + } + + #[derive(Default)] + struct DirectoryEntryToStringInner { + errors: String, + /// Stores the visisted path. The key is the relative path to the root, using `/` as path separator. + visited: BTreeMap, + } +}