From 6febd96dfe4a6b6f1829e50c517b77ee0ac8bc12 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 12 Jul 2024 13:31:05 +0100 Subject: [PATCH] [red-knot] Add a `read_directory()` method to the `ruff_db::system::System` trait (#12289) --- Cargo.lock | 1 + crates/ruff_db/Cargo.toml | 1 + crates/ruff_db/src/system.rs | 54 +++++++++++++++ crates/ruff_db/src/system/memory_fs.rs | 76 ++++++++++++++++++++- crates/ruff_db/src/system/os.rs | 91 +++++++++++++++++++++++++- crates/ruff_db/src/system/path.rs | 6 ++ crates/ruff_db/src/system/test.rs | 14 +++- 7 files changed, 239 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b44c28f03db0..9d64570b9a718 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2102,6 +2102,7 @@ dependencies = [ "ruff_text_size", "rustc-hash 2.0.0", "salsa", + "tempfile", "tracing", "zip", ] diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 1cfb7e88062a3..12c4436f59e3f 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -27,3 +27,4 @@ zip = { workspace = true } [dev-dependencies] insta = { workspace = true } +tempfile = { workspace = true } diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 3816dd2723d80..80250fd3fb3e3 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -54,6 +54,34 @@ pub trait System { /// Returns the current working directory fn current_directory(&self) -> &SystemPath; + /// Iterate over the contents of the directory at `path`. + /// + /// The returned iterator must have the following properties: + /// - It only iterates over the top level of the directory, + /// i.e., it does not recurse into subdirectories. + /// - It skips the current and parent directories (`.` and `..` + /// respectively). + /// - The iterator yields `std::io::Result` instances. + /// For each instance, an `Err` variant may signify that the path + /// of the entry was not valid UTF8, in which case it should be an + /// [`std::io::Error`] with the ErrorKind set to + /// [`std::io::ErrorKind::InvalidData`] and the payload set to a + /// [`camino::FromPathBufError`]. It may also indicate that + /// "some sort of intermittent IO error occurred during iteration" + /// (language taken from the [`std::fs::read_dir`] documentation). + /// + /// # Errors + /// Returns an error: + /// - if `path` does not exist in the system, + /// - if `path` does not point to a directory, + /// - if the process does not have sufficient permissions to + /// view the contents of the directory at `path` + /// - May also return an error in some other situations as well. + fn read_directory<'a>( + &'a self, + path: &SystemPath, + ) -> Result> + 'a>>; + fn as_any(&self) -> &dyn std::any::Any; } @@ -98,3 +126,29 @@ impl FileType { matches!(self, FileType::Symlink) } } + +#[derive(Debug)] +pub struct DirectoryEntry { + path: SystemPathBuf, + file_type: Result, +} + +impl DirectoryEntry { + pub fn new(path: SystemPathBuf, file_type: Result) -> Self { + Self { path, file_type } + } + + pub fn path(&self) -> &SystemPath { + &self.path + } + + pub fn file_type(&self) -> &Result { + &self.file_type + } +} + +impl PartialEq for DirectoryEntry { + fn eq(&self, other: &Self) -> bool { + self.path == other.path + } +} diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 03ff19bb780ce..3d06bfc807229 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, RwLock, RwLockWriteGuard}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; -use crate::system::{FileType, Metadata, Result, SystemPath, SystemPathBuf}; +use crate::system::{DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf}; /// File system that stores all content in memory. /// @@ -237,6 +237,34 @@ impl MemoryFileSystem { let normalized = SystemPath::absolute(path, &self.inner.cwd); normalized.into_utf8_path_buf() } + + pub fn read_directory( + &self, + path: impl AsRef, + ) -> Result> + '_> { + let by_path = self.inner.by_path.read().unwrap(); + let normalized = self.normalize_path(path.as_ref()); + let entry = by_path.get(&normalized).ok_or_else(not_found)?; + if entry.is_file() { + return Err(not_a_directory()); + }; + Ok(by_path + .range(normalized.clone()..) + .skip(1) + .take_while(|(path, _)| path.starts_with(&normalized)) + .filter_map(|(path, entry)| { + if path.parent()? == normalized { + Some(Ok(DirectoryEntry { + path: SystemPathBuf::from_utf8_path_buf(path.to_owned()), + file_type: Ok(entry.file_type()), + })) + } else { + None + } + }) + .collect::>() + .into_iter()) + } } impl Default for MemoryFileSystem { @@ -268,6 +296,13 @@ impl Entry { const fn is_file(&self) -> bool { matches!(self, Entry::File(_)) } + + const fn file_type(&self) -> FileType { + match self { + Self::File(_) => FileType::File, + Self::Directory(_) => FileType::Directory, + } + } } #[derive(Debug)] @@ -349,7 +384,9 @@ mod tests { use std::io::ErrorKind; use std::time::Duration; - use crate::system::{MemoryFileSystem, Result, SystemPath}; + use crate::system::{ + DirectoryEntry, FileType, MemoryFileSystem, Result, SystemPath, SystemPathBuf, + }; /// Creates a file system with the given files. /// @@ -612,4 +649,39 @@ mod tests { let error = fs.remove_directory("a").unwrap_err(); assert_eq!(error.kind(), ErrorKind::Other); } + + #[test] + fn read_directory() { + let fs = with_files(["b.ts", "a/bar.py", "d.rs", "a/foo/bar.py", "a/baz.pyi"]); + let contents: Vec = fs + .read_directory("a") + .unwrap() + .map(Result::unwrap) + .collect(); + let expected_contents = vec![ + DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), Ok(FileType::File)), + DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), Ok(FileType::File)), + DirectoryEntry::new(SystemPathBuf::from("/a/foo"), Ok(FileType::Directory)), + ]; + assert_eq!(contents, expected_contents) + } + + #[test] + fn read_directory_nonexistent() { + let fs = MemoryFileSystem::new(); + let Err(error) = fs.read_directory("doesnt_exist") else { + panic!("Expected this to fail"); + }; + assert_eq!(error.kind(), std::io::ErrorKind::NotFound); + } + + #[test] + fn read_directory_on_file() { + let fs = with_files(["a.py"]); + let Err(error) = fs.read_directory("a.py") else { + panic!("Expected this to fail"); + }; + assert_eq!(error.kind(), std::io::ErrorKind::Other); + assert!(error.to_string().contains("Not a directory")); + } } diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 40165c97c8836..93e7d12d1996b 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -1,4 +1,6 @@ -use crate::system::{FileType, Metadata, Result, System, SystemPath, SystemPathBuf}; +use crate::system::{ + DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, +}; use filetime::FileTime; use std::any::Any; use std::sync::Arc; @@ -68,6 +70,17 @@ impl System for OsSystem { fn as_any(&self) -> &dyn Any { self } + + fn read_directory( + &self, + path: &SystemPath, + ) -> Result>>> { + Ok(Box::new( + path.as_utf8_path() + .read_dir_utf8()? + .map(|res| res.map(DirectoryEntry::from)), + )) + } } impl From for FileType { @@ -81,3 +94,79 @@ impl From for FileType { } } } + +impl From for DirectoryEntry { + fn from(value: camino::Utf8DirEntry) -> Self { + let file_type = value.file_type().map(FileType::from); + Self { + path: SystemPathBuf::from_utf8_path_buf(value.into_path()), + file_type, + } + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + + #[test] + fn read_directory() { + let tempdir = TempDir::new().unwrap(); + let tempdir_path = tempdir.path(); + std::fs::create_dir_all(tempdir_path.join("a/foo")).unwrap(); + let files = &["b.ts", "a/bar.py", "d.rs", "a/foo/bar.py", "a/baz.pyi"]; + for path in files { + std::fs::File::create(tempdir_path.join(path)).unwrap(); + } + + let tempdir_path = SystemPath::from_std_path(tempdir_path).unwrap(); + let fs = OsSystem::new(tempdir_path); + + let mut sorted_contents: Vec = fs + .read_directory(&tempdir_path.join("a")) + .unwrap() + .map(Result::unwrap) + .collect(); + sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); + + let expected_contents = vec![ + DirectoryEntry::new(tempdir_path.join("a/bar.py"), Ok(FileType::File)), + DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), Ok(FileType::File)), + DirectoryEntry::new(tempdir_path.join("a/foo"), Ok(FileType::Directory)), + ]; + assert_eq!(sorted_contents, expected_contents) + } + + #[test] + fn read_directory_nonexistent() { + let fs = OsSystem::new(""); + let result = fs.read_directory(SystemPath::new("doesnt_exist")); + assert!(result.is_err_and(|error| error.kind() == std::io::ErrorKind::NotFound)); + } + + #[test] + fn read_directory_on_file() { + let tempdir = TempDir::new().unwrap(); + let tempdir_path = tempdir.path(); + std::fs::File::create(tempdir_path.join("a.py")).unwrap(); + + let tempdir_path = SystemPath::from_std_path(tempdir_path).unwrap(); + let fs = OsSystem::new(tempdir_path); + let result = fs.read_directory(&tempdir_path.join("a.py")); + let Err(error) = result else { + panic!("Expected the read_dir() call to fail!"); + }; + + // We can't assert the error kind here because it's apparently an unstable feature! + // https://github.com/rust-lang/rust/issues/86442 + // assert_eq!(error.kind(), std::io::ErrorKind::NotADirectory); + + // We can't even assert the error message on all platforms, as it's different on Windows, + // where the message is "The directory name is invalid" rather than "Not a directory". + if cfg!(unix) { + assert!(error.to_string().contains("Not a directory")); + } + } +} diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index 82bd9c2afceb9..993bb13d0273a 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -303,6 +303,12 @@ impl SystemPath { self.0.as_std_path() } + /// Returns the [`Utf8Path`] for the file. + #[inline] + pub fn as_utf8_path(&self) -> &Utf8Path { + &self.0 + } + pub fn from_std_path(path: &Path) -> Option<&SystemPath> { Some(SystemPath::new(Utf8Path::from_path(path)?)) } diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index f8a9267b573fd..38c5dad7ce8dc 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -1,5 +1,7 @@ use crate::files::File; -use crate::system::{MemoryFileSystem, Metadata, OsSystem, System, SystemPath}; +use crate::system::{ + DirectoryEntry, MemoryFileSystem, Metadata, OsSystem, Result, System, SystemPath, +}; use crate::Db; use std::any::Any; @@ -85,6 +87,16 @@ impl System for TestSystem { fn as_any(&self) -> &dyn Any { self } + + fn read_directory<'a>( + &'a self, + path: &SystemPath, + ) -> Result> + 'a>> { + match &self.inner { + TestFileSystem::Os(fs) => fs.read_directory(path), + TestFileSystem::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)), + } + } } /// Extension trait for databases that use [`TestSystem`].