From 85ae02d62e4595e6826c11a5f48cab00a88a974f Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 16 Jul 2024 08:40:10 +0200 Subject: [PATCH] [red-knot] Add `walk_directories` to `System` (#12297) --- Cargo.lock | 1 + crates/red_knot/Cargo.toml | 2 +- crates/red_knot_module_resolver/Cargo.toml | 2 + .../red_knot_module_resolver/src/resolver.rs | 2 +- crates/ruff_db/Cargo.toml | 4 + crates/ruff_db/src/system.rs | 30 +- crates/ruff_db/src/system/memory_fs.rs | 351 +++++++++++++++++- crates/ruff_db/src/system/os.rs | 348 ++++++++++++++++- crates/ruff_db/src/system/test.rs | 75 ++-- crates/ruff_db/src/system/walk_directory.rs | 318 ++++++++++++++++ 10 files changed, 1060 insertions(+), 73 deletions(-) create mode 100644 crates/ruff_db/src/system/walk_directory.rs diff --git a/Cargo.lock b/Cargo.lock index 018df705ac..0198e7bb53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2085,6 +2085,7 @@ dependencies = [ "countme", "dashmap 6.0.1", "filetime", + "ignore", "insta", "ruff_python_ast", "ruff_python_parser", diff --git a/crates/red_knot/Cargo.toml b/crates/red_knot/Cargo.toml index 716db345cd..2d36f4d4b0 100644 --- a/crates/red_knot/Cargo.toml +++ b/crates/red_knot/Cargo.toml @@ -15,7 +15,7 @@ license.workspace = true red_knot_module_resolver = { workspace = true } red_knot_python_semantic = { workspace = true } -ruff_db = { workspace = true } +ruff_db = { workspace = true, features = ["os"] } ruff_python_ast = { workspace = true } anyhow = { workspace = true } diff --git a/crates/red_knot_module_resolver/Cargo.toml b/crates/red_knot_module_resolver/Cargo.toml index a6761665d6..2681630e3f 100644 --- a/crates/red_knot_module_resolver/Cargo.toml +++ b/crates/red_knot_module_resolver/Cargo.toml @@ -28,6 +28,8 @@ walkdir = { workspace = true } zip = { workspace = true } [dev-dependencies] +ruff_db = { workspace = true, features = ["os"] } + anyhow = { workspace = true } insta = { workspace = true } tempfile = { workspace = true } diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 71b8078711..79ffed13ae 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -881,7 +881,7 @@ mod tests { let temp_dir = tempfile::tempdir()?; let root = SystemPath::from_std_path(temp_dir.path()).unwrap(); - db.use_os_system(OsSystem::new(root)); + db.use_system(OsSystem::new(root)); let src = root.join("src"); let site_packages = root.join("site-packages"); diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 12c4436f59..bbaf27ace2 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -20,6 +20,7 @@ camino = { workspace = true } countme = { workspace = true } dashmap = { workspace = true } filetime = { workspace = true } +ignore = { workspace = true, optional = true } salsa = { workspace = true } tracing = { workspace = true } rustc-hash = { workspace = true } @@ -28,3 +29,6 @@ zip = { workspace = true } [dev-dependencies] insta = { workspace = true } tempfile = { workspace = true } + +[features] +os = ["ignore"] diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 80250fd3fb..168ee1ebe1 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -1,15 +1,21 @@ +use std::fmt::Debug; + pub use memory_fs::MemoryFileSystem; +#[cfg(feature = "os")] pub use os::OsSystem; pub use test::{DbWithTestSystem, TestSystem}; +use walk_directory::WalkDirectoryBuilder; use crate::file_revision::FileRevision; pub use self::path::{SystemPath, SystemPathBuf}; mod memory_fs; +#[cfg(feature = "os")] mod os; mod path; mod test; +pub mod walk_directory; pub type Result = std::io::Result; @@ -27,7 +33,7 @@ pub type Result = std::io::Result; /// * File watching isn't supported. /// /// Abstracting the system also enables tests to use a more efficient in-memory file system. -pub trait System { +pub trait System: Debug { /// Reads the metadata of the file or directory at `path`. fn path_metadata(&self, path: &SystemPath) -> Result; @@ -82,6 +88,12 @@ pub trait System { path: &SystemPath, ) -> Result> + 'a>>; + /// Recursively walks the content of `path`. + /// + /// It is allowed to pass a `path` that points to a file. In this case, the walker + /// yields a single entry for that file. + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder; + fn as_any(&self) -> &dyn std::any::Any; } @@ -127,14 +139,14 @@ impl FileType { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub struct DirectoryEntry { path: SystemPathBuf, - file_type: Result, + file_type: FileType, } impl DirectoryEntry { - pub fn new(path: SystemPathBuf, file_type: Result) -> Self { + pub fn new(path: SystemPathBuf, file_type: FileType) -> Self { Self { path, file_type } } @@ -142,13 +154,7 @@ impl DirectoryEntry { &self.path } - pub fn file_type(&self) -> &Result { - &self.file_type - } -} - -impl PartialEq for DirectoryEntry { - fn eq(&self, other: &Self) -> bool { - self.path == other.path + pub fn file_type(&self) -> FileType { + self.file_type } } diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 3d06bfc807..3c48690e8f 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -1,10 +1,18 @@ use std::collections::BTreeMap; +use std::iter::FusedIterator; use std::sync::{Arc, RwLock, RwLockWriteGuard}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; -use crate::system::{DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf}; +use crate::system::{ + walk_directory, DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf, +}; + +use super::walk_directory::{ + DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, WalkDirectoryVisitor, + WalkDirectoryVisitorBuilder, WalkState, +}; /// File system that stores all content in memory. /// @@ -157,6 +165,14 @@ impl MemoryFileSystem { Ok(()) } + /// Returns a builder for walking the directory tree of `path`. + /// + /// The only files that are ignored when setting `WalkDirectoryBuilder::standard_filters` + /// are hidden files (files with a name starting with a `.`). + pub fn walk_directory(&self, path: impl AsRef) -> WalkDirectoryBuilder { + WalkDirectoryBuilder::new(path, MemoryWalker { fs: self.clone() }) + } + pub fn remove_file(&self, path: impl AsRef) -> Result<()> { fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { let mut by_path = fs.inner.by_path.write().unwrap(); @@ -238,17 +254,18 @@ impl MemoryFileSystem { normalized.into_utf8_path_buf() } - pub fn read_directory( - &self, - path: impl AsRef, - ) -> Result> + '_> { + pub fn read_directory(&self, path: impl AsRef) -> Result { let by_path = self.inner.by_path.read().unwrap(); let normalized = self.normalize_path(path.as_ref()); let entry = by_path.get(&normalized).ok_or_else(not_found)?; if entry.is_file() { return Err(not_a_directory()); }; - Ok(by_path + + // Collect the entries into a vector to avoid deadlocks when the + // consumer calls into other file system methods while iterating over the + // directory entries. + let collected = by_path .range(normalized.clone()..) .skip(1) .take_while(|(path, _)| path.starts_with(&normalized)) @@ -256,14 +273,15 @@ impl MemoryFileSystem { if path.parent()? == normalized { Some(Ok(DirectoryEntry { path: SystemPathBuf::from_utf8_path_buf(path.to_owned()), - file_type: Ok(entry.file_type()), + file_type: entry.file_type(), })) } else { None } }) - .collect::>() - .into_iter()) + .collect(); + + Ok(ReadDirectory::new(collected)) } } @@ -379,11 +397,185 @@ fn get_or_create_file<'a>( } } +#[derive(Debug)] +pub struct ReadDirectory { + entries: std::vec::IntoIter>, +} + +impl ReadDirectory { + fn new(entries: Vec>) -> Self { + Self { + entries: entries.into_iter(), + } + } +} + +impl Iterator for ReadDirectory { + type Item = std::io::Result; + + fn next(&mut self) -> Option { + self.entries.next() + } +} + +impl FusedIterator for ReadDirectory {} + +/// Recursively walks a directory in the memory file system. +#[derive(Debug)] +struct MemoryWalker { + fs: MemoryFileSystem, +} + +impl MemoryWalker { + fn visit_entry( + &self, + visitor: &mut dyn WalkDirectoryVisitor, + entry: walk_directory::DirectoryEntry, + queue: &mut Vec, + ignore_hidden: bool, + ) -> WalkState { + if entry.file_type().is_directory() { + let path = entry.path.clone(); + let depth = entry.depth; + + let state = visitor.visit(Ok(entry)); + + if matches!(state, WalkState::Continue) { + queue.push(WalkerState::Nested { + path, + depth: depth + 1, + }); + } + + state + } else if ignore_hidden + && entry + .path + .file_name() + .is_some_and(|name| name.starts_with('.')) + { + WalkState::Skip + } else { + visitor.visit(Ok(entry)) + } + } +} + +impl DirectoryWalker for MemoryWalker { + fn walk( + &self, + builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ) { + let WalkDirectoryConfiguration { + paths, + ignore_hidden, + standard_filters: _, + } = configuration; + + let mut visitor = builder.build(); + let mut queue: Vec<_> = paths + .into_iter() + .map(|path| WalkerState::Start { path }) + .collect(); + + while let Some(state) = queue.pop() { + let (path, depth) = match state { + WalkerState::Start { path } => { + match self.fs.metadata(&path) { + Ok(metadata) => { + let entry = walk_directory::DirectoryEntry { + file_type: metadata.file_type, + depth: 0, + path, + }; + + if self.visit_entry(&mut *visitor, entry, &mut queue, ignore_hidden) + == WalkState::Quit + { + return; + } + } + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(0), + kind: walk_directory::ErrorKind::Io { + path: Some(path), + err: error, + }, + })); + } + } + + continue; + } + WalkerState::Nested { path, depth } => (path, depth), + }; + + // Use `read_directory` here instead of locking `by_path` to avoid deadlocks + // when the `visitor` calls any file system operations. + let entries = match self.fs.read_directory(&path) { + Ok(entries) => entries, + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::Io { + path: Some(path), + err: error, + }, + })); + + continue; + } + }; + + for entry in entries { + match entry { + Ok(entry) => { + let entry = walk_directory::DirectoryEntry { + file_type: entry.file_type, + depth, + path: entry.path, + }; + + if self.visit_entry(&mut *visitor, entry, &mut queue, ignore_hidden) + == WalkState::Quit + { + return; + } + } + + Err(error) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::Io { + path: None, + err: error, + }, + })); + } + } + } + } + } +} + +#[derive(Debug)] +enum WalkerState { + /// An entry path that was directly provided to the walker. Always has depth 0. + Start { path: SystemPathBuf }, + + /// Traverse into the directory with the given path at the given depth. + Nested { path: SystemPathBuf, depth: usize }, +} + #[cfg(test)] mod tests { use std::io::ErrorKind; use std::time::Duration; + use crate::system::walk_directory::tests::DirectoryEntryToString; + use crate::system::walk_directory::WalkState; use crate::system::{ DirectoryEntry, FileType, MemoryFileSystem, Result, SystemPath, SystemPathBuf, }; @@ -659,9 +851,9 @@ mod tests { .map(Result::unwrap) .collect(); let expected_contents = vec![ - DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), Ok(FileType::File)), - DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), Ok(FileType::File)), - DirectoryEntry::new(SystemPathBuf::from("/a/foo"), Ok(FileType::Directory)), + DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), FileType::File), + DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), FileType::File), + DirectoryEntry::new(SystemPathBuf::from("/a/foo"), FileType::Directory), ]; assert_eq!(contents, expected_contents) } @@ -684,4 +876,139 @@ mod tests { assert_eq!(error.kind(), std::io::ErrorKind::Other); assert!(error.to_string().contains("Not a directory")); } + + #[test] + fn walk_directory() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_files([ + (root.join("foo.py"), "print('foo')"), + (root.join("a/bar.py"), "print('bar')"), + (root.join("a/baz.py"), "print('baz')"), + (root.join("a/b/c.py"), "print('c')"), + ])?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/b": ( + Directory, + 2, + ), + "a/b/c.py": ( + File, + 3, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_hidden() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_files([ + (root.join("foo.py"), "print('foo')"), + (root.join("a/bar.py"), "print('bar')"), + (root.join("a/.baz.py"), "print('baz')"), + ])?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/bar.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_file() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let system = MemoryFileSystem::with_current_directory(root); + + system.write_file(root.join("foo.py"), "print('foo')")?; + + let writer = DirectoryEntryToString::new(root.to_path_buf()); + + system.walk_directory(root.join("foo.py")).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "foo.py": ( + File, + 0, + ), +}"# + ); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 93e7d12d19..5f7882623c 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -2,9 +2,15 @@ use crate::system::{ DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, }; use filetime::FileTime; -use std::any::Any; use std::sync::Arc; +use std::{any::Any, path::PathBuf}; +use super::walk_directory::{ + self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, + WalkDirectoryVisitorBuilder, WalkState, +}; + +/// A system implementation that uses the OS file system. #[derive(Default, Debug)] pub struct OsSystem { inner: Arc, @@ -67,6 +73,14 @@ impl System for OsSystem { &self.inner.cwd } + /// Creates a builder to recursively walk `path`. + /// + /// The walker ignores files according to [`ignore::WalkBuilder::standard_filters`] + /// when setting [`WalkDirectoryBuilder::standard_filters`] to true. + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder { + WalkDirectoryBuilder::new(path, OsDirectoryWalker {}) + } + fn as_any(&self) -> &dyn Any { self } @@ -75,11 +89,156 @@ impl System for OsSystem { &self, path: &SystemPath, ) -> Result>>> { - Ok(Box::new( - path.as_utf8_path() - .read_dir_utf8()? - .map(|res| res.map(DirectoryEntry::from)), - )) + Ok(Box::new(path.as_utf8_path().read_dir_utf8()?.map(|res| { + let res = res?; + + let file_type = res.file_type()?; + Ok(DirectoryEntry { + path: SystemPathBuf::from_utf8_path_buf(res.into_path()), + file_type: file_type.into(), + }) + }))) + } +} + +#[derive(Debug)] +struct OsDirectoryWalker; + +impl DirectoryWalker for OsDirectoryWalker { + fn walk( + &self, + visitor_builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ) { + let WalkDirectoryConfiguration { + paths, + ignore_hidden: hidden, + standard_filters, + } = configuration; + + let Some((first, additional)) = paths.split_first() else { + return; + }; + + let mut builder = ignore::WalkBuilder::new(first.as_std_path()); + + builder.standard_filters(standard_filters); + builder.hidden(hidden); + + for additional_path in additional { + builder.add(additional_path.as_std_path()); + } + + builder.threads( + std::thread::available_parallelism() + .map_or(1, std::num::NonZeroUsize::get) + .min(12), + ); + + builder.build_parallel().run(|| { + let mut visitor = visitor_builder.build(); + + Box::new(move |entry| { + match entry { + Ok(entry) => { + // SAFETY: The walkdir crate supports `stdin` files and `file_type` can be `None` for these files. + // We don't make use of this feature, which is why unwrapping here is ok. + let file_type = entry.file_type().unwrap(); + let depth = entry.depth(); + + // `walkdir` reports errors related to parsing ignore files as part of the entry. + // These aren't fatal for us. We should keep going even if an ignore file contains a syntax error. + // But we log the error here for better visibility (same as ripgrep, Ruff ignores it) + if let Some(error) = entry.error() { + tracing::warn!("{error}"); + } + + match SystemPathBuf::from_path_buf(entry.into_path()) { + Ok(path) => { + let directory_entry = walk_directory::DirectoryEntry { + path, + file_type: file_type.into(), + depth, + }; + + visitor.visit(Ok(directory_entry)).into() + } + Err(path) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + })); + + // Skip the entire directory because all the paths won't be UTF-8 paths. + ignore::WalkState::Skip + } + } + } + Err(error) => match ignore_to_walk_directory_error(error, None, None) { + Ok(error) => visitor.visit(Err(error)).into(), + Err(error) => { + // This should only be reached when the error is a `.ignore` file related error + // (which, should not be reported here but the `ignore` crate doesn't distinguish between ignore and IO errors). + // Let's log the error to at least make it visible. + tracing::warn!("Failed to traverse directory: {error}."); + ignore::WalkState::Continue + } + }, + } + }) + }); + } +} + +#[cold] +fn ignore_to_walk_directory_error( + error: ignore::Error, + path: Option, + depth: Option, +) -> std::result::Result { + use ignore::Error; + + match error { + Error::WithPath { path, err } => ignore_to_walk_directory_error(*err, Some(path), depth), + Error::WithDepth { err, depth } => ignore_to_walk_directory_error(*err, path, Some(depth)), + Error::WithLineNumber { err, .. } => ignore_to_walk_directory_error(*err, path, depth), + Error::Loop { child, ancestor } => { + match ( + SystemPathBuf::from_path_buf(child), + SystemPathBuf::from_path_buf(ancestor), + ) { + (Ok(child), Ok(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Loop { child, ancestor }, + }), + (Err(child), _) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: child }, + }), + // We should never reach this because we should never traverse into a non UTF8 path but handle it anyway. + (_, Err(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: ancestor }, + }), + } + } + + Error::Io(err) => match path.map(SystemPathBuf::from_path_buf).transpose() { + Ok(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Io { path, err }, + }), + Err(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + }), + }, + + // Ignore related errors, we warn about them but we don't abort iteration because of them. + error @ (Error::Glob { .. } + | Error::UnrecognizedFileType(_) + | Error::InvalidDefinition + | Error::Partial(..)) => Err(error), } } @@ -95,21 +254,22 @@ impl From for FileType { } } -impl From for DirectoryEntry { - fn from(value: camino::Utf8DirEntry) -> Self { - let file_type = value.file_type().map(FileType::from); - Self { - path: SystemPathBuf::from_utf8_path_buf(value.into_path()), - file_type, +impl From for ignore::WalkState { + fn from(value: WalkState) -> Self { + match value { + WalkState::Continue => ignore::WalkState::Continue, + WalkState::Skip => ignore::WalkState::Skip, + WalkState::Quit => ignore::WalkState::Quit, } } } #[cfg(test)] mod tests { - use tempfile::TempDir; - use super::*; + use crate::system::walk_directory::tests::DirectoryEntryToString; + use crate::system::DirectoryEntry; + use tempfile::TempDir; #[test] fn read_directory() { @@ -132,9 +292,9 @@ mod tests { sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); let expected_contents = vec![ - DirectoryEntry::new(tempdir_path.join("a/bar.py"), Ok(FileType::File)), - DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), Ok(FileType::File)), - DirectoryEntry::new(tempdir_path.join("a/foo"), Ok(FileType::Directory)), + DirectoryEntry::new(tempdir_path.join("a/bar.py"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/foo"), FileType::Directory), ]; assert_eq!(sorted_contents, expected_contents) } @@ -169,4 +329,158 @@ mod tests { assert!(error.to_string().contains("Not a directory")); } } + + #[test] + fn walk_directory() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')")?; + std::fs::write(root.join("a/bar.py"), "print('bar')")?; + std::fs::write(root.join("a/baz.py"), "print('baz')")?; + std::fs::write(root.join("a/b/c.py"), "print('c')")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system.walk_directory(root_sys).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/b": ( + Directory, + 2, + ), + "a/b/c.py": ( + File, + 3, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_ignore() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + std::fs::write(root.join("a/bar.py"), "print('bar')\n")?; + std::fs::write(root.join("a/baz.py"), "print('baz')\n")?; + + // Exclude the `b` directory. + std::fs::write(root.join("a/.ignore"), "b/\n")?; + std::fs::write(root.join("a/b/c.py"), "print('c')\n")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(root_sys) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_file() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + + let root_sys = SystemPath::from_std_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(&root_sys.join("foo.py")) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "foo.py": ( + File, + 0, + ), +}"# + ); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 38c5dad7ce..d59e92d905 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -1,9 +1,11 @@ use crate::files::File; -use crate::system::{ - DirectoryEntry, MemoryFileSystem, Metadata, OsSystem, Result, System, SystemPath, -}; +use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath}; use crate::Db; use std::any::Any; +use std::panic::RefUnwindSafe; +use std::sync::Arc; + +use super::walk_directory::WalkDirectoryBuilder; /// System implementation intended for testing. /// @@ -14,7 +16,7 @@ use std::any::Any; /// Don't use this system for production code. It's intended for testing only. #[derive(Default, Debug)] pub struct TestSystem { - inner: TestFileSystem, + inner: TestSystemInner, } impl TestSystem { @@ -29,58 +31,68 @@ impl TestSystem { /// ## Panics /// If this test db isn't using a memory file system. pub fn memory_file_system(&self) -> &MemoryFileSystem { - if let TestFileSystem::Stub(fs) = &self.inner { + if let TestSystemInner::Stub(fs) = &self.inner { fs } else { panic!("The test db is not using a memory file system"); } } - fn use_os_system(&mut self, os: OsSystem) { - self.inner = TestFileSystem::Os(os); + fn use_system(&mut self, system: S) + where + S: System + Send + Sync + RefUnwindSafe + 'static, + { + self.inner = TestSystemInner::System(Arc::new(system)); } } impl System for TestSystem { fn path_metadata(&self, path: &SystemPath) -> crate::system::Result { match &self.inner { - TestFileSystem::Stub(fs) => fs.metadata(path), - TestFileSystem::Os(fs) => fs.path_metadata(path), + TestSystemInner::Stub(fs) => fs.metadata(path), + TestSystemInner::System(fs) => fs.path_metadata(path), } } fn read_to_string(&self, path: &SystemPath) -> crate::system::Result { match &self.inner { - TestFileSystem::Stub(fs) => fs.read_to_string(path), - TestFileSystem::Os(fs) => fs.read_to_string(path), + TestSystemInner::Stub(fs) => fs.read_to_string(path), + TestSystemInner::System(fs) => fs.read_to_string(path), } } fn path_exists(&self, path: &SystemPath) -> bool { match &self.inner { - TestFileSystem::Stub(fs) => fs.exists(path), - TestFileSystem::Os(fs) => fs.path_exists(path), + TestSystemInner::Stub(fs) => fs.exists(path), + TestSystemInner::System(system) => system.path_exists(path), } } fn is_directory(&self, path: &SystemPath) -> bool { match &self.inner { - TestFileSystem::Stub(fs) => fs.is_directory(path), - TestFileSystem::Os(fs) => fs.is_directory(path), + TestSystemInner::Stub(fs) => fs.is_directory(path), + TestSystemInner::System(system) => system.is_directory(path), } } fn is_file(&self, path: &SystemPath) -> bool { match &self.inner { - TestFileSystem::Stub(fs) => fs.is_file(path), - TestFileSystem::Os(fs) => fs.is_file(path), + TestSystemInner::Stub(fs) => fs.is_file(path), + TestSystemInner::System(system) => system.is_file(path), } } fn current_directory(&self) -> &SystemPath { match &self.inner { - TestFileSystem::Stub(fs) => fs.current_directory(), - TestFileSystem::Os(fs) => fs.current_directory(), + TestSystemInner::Stub(fs) => fs.current_directory(), + TestSystemInner::System(system) => system.current_directory(), + } + } + + fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder { + match &self.inner { + TestSystemInner::Stub(fs) => fs.walk_directory(path), + TestSystemInner::System(system) => system.walk_directory(path), } } @@ -93,8 +105,8 @@ impl System for TestSystem { path: &SystemPath, ) -> Result> + 'a>> { match &self.inner { - TestFileSystem::Os(fs) => fs.read_directory(path), - TestFileSystem::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)), + TestSystemInner::System(fs) => fs.read_directory(path), + TestSystemInner::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)), } } } @@ -146,13 +158,16 @@ pub trait DbWithTestSystem: Db + Sized { Ok(()) } - /// Uses the real file system instead of the memory file system. + /// Uses the given system instead of the testing system. /// /// This useful for testing advanced file system features like permissions, symlinks, etc. /// /// Note that any files written to the memory file system won't be copied over. - fn use_os_system(&mut self, os: OsSystem) { - self.test_system_mut().use_os_system(os); + fn use_system(&mut self, os: S) + where + S: System + Send + Sync + RefUnwindSafe + 'static, + { + self.test_system_mut().use_system(os); } /// Returns the memory file system. @@ -165,21 +180,21 @@ pub trait DbWithTestSystem: Db + Sized { } #[derive(Debug)] -enum TestFileSystem { +enum TestSystemInner { Stub(MemoryFileSystem), - Os(OsSystem), + System(Arc), } -impl TestFileSystem { +impl TestSystemInner { fn snapshot(&self) -> Self { match self { - Self::Stub(fs) => Self::Stub(fs.snapshot()), - Self::Os(fs) => Self::Os(fs.snapshot()), + Self::Stub(system) => Self::Stub(system.snapshot()), + Self::System(system) => Self::System(Arc::clone(system)), } } } -impl Default for TestFileSystem { +impl Default for TestSystemInner { fn default() -> Self { Self::Stub(MemoryFileSystem::default()) } diff --git a/crates/ruff_db/src/system/walk_directory.rs b/crates/ruff_db/src/system/walk_directory.rs new file mode 100644 index 0000000000..7932ccfae7 --- /dev/null +++ b/crates/ruff_db/src/system/walk_directory.rs @@ -0,0 +1,318 @@ +use crate::system::SystemPathBuf; +use std::fmt::{Display, Formatter}; +use std::path::PathBuf; + +use super::{FileType, SystemPath}; + +/// A builder for constructing a directory recursive traversal. +pub struct WalkDirectoryBuilder { + /// The implementation that does the directory walking. + walker: Box, + + /// The paths that should be walked. + paths: Vec, + + ignore_hidden: bool, + + standard_filters: bool, +} + +impl WalkDirectoryBuilder { + pub fn new(path: impl AsRef, walker: W) -> Self + where + W: DirectoryWalker + 'static, + { + Self { + walker: Box::new(walker), + paths: vec![path.as_ref().to_path_buf()], + ignore_hidden: true, + standard_filters: true, + } + } + + /// Adds a path that should be traversed recursively. + /// + /// Each additional path is traversed recursively. + /// This should be preferred over building multiple + /// walkers since it enables reusing resources. + #[allow(clippy::should_implement_trait)] + pub fn add(mut self, path: impl AsRef) -> Self { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// Whether hidden files should be ignored. + /// + /// The definition of what a hidden file depends on the [`System`](super::System) and can be platform-dependent. + /// + /// This is enabled by default. + pub fn ignore_hidden(mut self, hidden: bool) -> Self { + self.ignore_hidden = hidden; + self + } + + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// * [`hidden`](Self::ignore_hidden) + /// * Any [`System`](super::System) specific filters according (e.g., respecting `.ignore`, `.gitignore`, files). + /// + /// Defaults to `true`. + pub fn standard_filters(mut self, standard_filters: bool) -> Self { + self.standard_filters = standard_filters; + self.ignore_hidden = standard_filters; + + self + } + + /// Runs the directory traversal and calls the passed `builder` to create visitors + /// that do the visiting. The walker may run multiple threads to visit the directories. + pub fn run<'s, F>(self, builder: F) + where + F: FnMut() -> FnVisitor<'s>, + { + self.visit(&mut FnBuilder { builder }); + } + + /// Runs the directory traversal and calls the passed `builder` to create visitors + /// that do the visiting. The walker may run multiple threads to visit the directories. + pub fn visit(self, builder: &mut dyn WalkDirectoryVisitorBuilder) { + let configuration = WalkDirectoryConfiguration { + paths: self.paths, + ignore_hidden: self.ignore_hidden, + standard_filters: self.standard_filters, + }; + + self.walker.walk(builder, configuration); + } +} + +/// Concrete walker that performs the directory walking. +pub trait DirectoryWalker { + fn walk( + &self, + builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ); +} + +/// Creates a visitor for each thread that does the visiting. +pub trait WalkDirectoryVisitorBuilder<'s> { + fn build(&mut self) -> Box; +} + +/// Visitor handling the individual directory entries. +pub trait WalkDirectoryVisitor: Send { + fn visit(&mut self, entry: std::result::Result) -> WalkState; +} + +struct FnBuilder { + builder: F, +} + +impl<'s, F> WalkDirectoryVisitorBuilder<'s> for FnBuilder +where + F: FnMut() -> FnVisitor<'s>, +{ + fn build(&mut self) -> Box { + let visitor = (self.builder)(); + Box::new(FnVisitorImpl(visitor)) + } +} + +type FnVisitor<'s> = + Box) -> WalkState + Send + 's>; + +struct FnVisitorImpl<'s>(FnVisitor<'s>); + +impl WalkDirectoryVisitor for FnVisitorImpl<'_> { + fn visit(&mut self, entry: std::result::Result) -> WalkState { + (self.0)(entry) + } +} + +pub struct WalkDirectoryConfiguration { + pub paths: Vec, + pub ignore_hidden: bool, + pub standard_filters: bool, +} + +/// An entry in a directory. +#[derive(Debug, Clone)] +pub struct DirectoryEntry { + pub(super) path: SystemPathBuf, + pub(super) file_type: FileType, + pub(super) depth: usize, +} + +impl DirectoryEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &SystemPath { + &self.path + } + + /// The full path that this entry represents. + /// Analogous to [`DirectoryEntry::path`], but moves ownership of the path. + pub fn into_path(self) -> SystemPathBuf { + self.path + } + + /// Return the file type for the file that this entry points to. + pub fn file_type(&self) -> FileType { + self.file_type + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.depth + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum WalkState { + /// Continue walking as normal + Continue, + + /// If the entry given is a directory, don't descend into it. + /// In all other cases, this has no effect. + Skip, + + /// Quit the entire iterator as soon as possible. + /// + /// Note: This is an inherently asynchronous action. It's possible + /// for more entries to be yielded even after instructing the iterator to quit. + Quit, +} + +pub struct Error { + pub(super) depth: Option, + pub(super) kind: ErrorKind, +} + +impl Error { + pub fn depth(&self) -> Option { + self.depth + } + + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.kind { + ErrorKind::Loop { ancestor, child } => { + write!( + f, + "File system loop found: {child} points to an ancestor {ancestor}", + ) + } + ErrorKind::Io { + path: Some(path), + err, + } => { + write!(f, "IO error for operation on {}: {}", path, err) + } + ErrorKind::Io { path: None, err } => err.fmt(f), + ErrorKind::NonUtf8Path { path } => { + write!(f, "Non-UTF8 path: {}", path.display()) + } + } + } +} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::error::Error for Error {} + +#[derive(Debug)] +pub enum ErrorKind { + /// An error that occurs when a file loop is detected when traversing + /// symbolic links. + Loop { + ancestor: SystemPathBuf, + child: SystemPathBuf, + }, + + /// An error that occurs when doing I/O + Io { + path: Option, + err: std::io::Error, + }, + + /// A path is not a valid UTF-8 path. + NonUtf8Path { path: PathBuf }, +} + +#[cfg(test)] +pub(super) mod tests { + use crate::system::walk_directory::{DirectoryEntry, Error}; + use crate::system::{FileType, SystemPathBuf}; + use std::collections::BTreeMap; + + /// Test helper that creates a visual representation of the visited directory entries. + pub(crate) struct DirectoryEntryToString { + root_path: SystemPathBuf, + inner: std::sync::Mutex, + } + + impl DirectoryEntryToString { + pub(crate) fn new(root_path: SystemPathBuf) -> Self { + Self { + root_path, + inner: std::sync::Mutex::new(DirectoryEntryToStringInner::default()), + } + } + + pub(crate) fn write_entry(&self, entry: Result) { + let mut inner = self.inner.lock().unwrap(); + let DirectoryEntryToStringInner { errors, visited } = &mut *inner; + + match entry { + Ok(entry) => { + let relative_path = entry + .path() + .strip_prefix(&self.root_path) + .unwrap_or(entry.path()); + + let unix_path = relative_path + .components() + .map(|component| component.as_str()) + .collect::>() + .join("/"); + + visited.insert(unix_path, (entry.file_type, entry.depth)); + } + Err(error) => { + errors.push_str(&error.to_string()); + errors.push('\n'); + } + } + } + } + + impl std::fmt::Display for DirectoryEntryToString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let inner = self.inner.lock().unwrap(); + write!(f, "{paths:#?}", paths = inner.visited)?; + + if !inner.errors.is_empty() { + writeln!(f, "\n\n{errors}", errors = inner.errors).unwrap(); + } + + Ok(()) + } + } + + #[derive(Default)] + struct DirectoryEntryToStringInner { + errors: String, + /// Stores the visited path. The key is the relative path to the root, using `/` as path separator. + visited: BTreeMap, + } +}