[red-knot] Add walk_directories to System (#12297)

This commit is contained in:
Micha Reiser 2024-07-16 08:40:10 +02:00 committed by GitHub
parent 9a817a2922
commit 85ae02d62e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1060 additions and 73 deletions

1
Cargo.lock generated
View file

@ -2085,6 +2085,7 @@ dependencies = [
"countme", "countme",
"dashmap 6.0.1", "dashmap 6.0.1",
"filetime", "filetime",
"ignore",
"insta", "insta",
"ruff_python_ast", "ruff_python_ast",
"ruff_python_parser", "ruff_python_parser",

View file

@ -15,7 +15,7 @@ license.workspace = true
red_knot_module_resolver = { workspace = true } red_knot_module_resolver = { workspace = true }
red_knot_python_semantic = { workspace = true } red_knot_python_semantic = { workspace = true }
ruff_db = { workspace = true } ruff_db = { workspace = true, features = ["os"] }
ruff_python_ast = { workspace = true } ruff_python_ast = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }

View file

@ -28,6 +28,8 @@ walkdir = { workspace = true }
zip = { workspace = true } zip = { workspace = true }
[dev-dependencies] [dev-dependencies]
ruff_db = { workspace = true, features = ["os"] }
anyhow = { workspace = true } anyhow = { workspace = true }
insta = { workspace = true } insta = { workspace = true }
tempfile = { workspace = true } tempfile = { workspace = true }

View file

@ -881,7 +881,7 @@ mod tests {
let temp_dir = tempfile::tempdir()?; let temp_dir = tempfile::tempdir()?;
let root = SystemPath::from_std_path(temp_dir.path()).unwrap(); let root = SystemPath::from_std_path(temp_dir.path()).unwrap();
db.use_os_system(OsSystem::new(root)); db.use_system(OsSystem::new(root));
let src = root.join("src"); let src = root.join("src");
let site_packages = root.join("site-packages"); let site_packages = root.join("site-packages");

View file

@ -20,6 +20,7 @@ camino = { workspace = true }
countme = { workspace = true } countme = { workspace = true }
dashmap = { workspace = true } dashmap = { workspace = true }
filetime = { workspace = true } filetime = { workspace = true }
ignore = { workspace = true, optional = true }
salsa = { workspace = true } salsa = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
rustc-hash = { workspace = true } rustc-hash = { workspace = true }
@ -28,3 +29,6 @@ zip = { workspace = true }
[dev-dependencies] [dev-dependencies]
insta = { workspace = true } insta = { workspace = true }
tempfile = { workspace = true } tempfile = { workspace = true }
[features]
os = ["ignore"]

View file

@ -1,15 +1,21 @@
use std::fmt::Debug;
pub use memory_fs::MemoryFileSystem; pub use memory_fs::MemoryFileSystem;
#[cfg(feature = "os")]
pub use os::OsSystem; pub use os::OsSystem;
pub use test::{DbWithTestSystem, TestSystem}; pub use test::{DbWithTestSystem, TestSystem};
use walk_directory::WalkDirectoryBuilder;
use crate::file_revision::FileRevision; use crate::file_revision::FileRevision;
pub use self::path::{SystemPath, SystemPathBuf}; pub use self::path::{SystemPath, SystemPathBuf};
mod memory_fs; mod memory_fs;
#[cfg(feature = "os")]
mod os; mod os;
mod path; mod path;
mod test; mod test;
pub mod walk_directory;
pub type Result<T> = std::io::Result<T>; pub type Result<T> = std::io::Result<T>;
@ -27,7 +33,7 @@ pub type Result<T> = std::io::Result<T>;
/// * File watching isn't supported. /// * File watching isn't supported.
/// ///
/// Abstracting the system also enables tests to use a more efficient in-memory file system. /// Abstracting the system also enables tests to use a more efficient in-memory file system.
pub trait System { pub trait System: Debug {
/// Reads the metadata of the file or directory at `path`. /// Reads the metadata of the file or directory at `path`.
fn path_metadata(&self, path: &SystemPath) -> Result<Metadata>; fn path_metadata(&self, path: &SystemPath) -> Result<Metadata>;
@ -82,6 +88,12 @@ pub trait System {
path: &SystemPath, path: &SystemPath,
) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>> + 'a>>; ) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>> + 'a>>;
/// Recursively walks the content of `path`.
///
/// It is allowed to pass a `path` that points to a file. In this case, the walker
/// yields a single entry for that file.
fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder;
fn as_any(&self) -> &dyn std::any::Any; fn as_any(&self) -> &dyn std::any::Any;
} }
@ -127,14 +139,14 @@ impl FileType {
} }
} }
#[derive(Debug)] #[derive(Debug, PartialEq, Eq)]
pub struct DirectoryEntry { pub struct DirectoryEntry {
path: SystemPathBuf, path: SystemPathBuf,
file_type: Result<FileType>, file_type: FileType,
} }
impl DirectoryEntry { impl DirectoryEntry {
pub fn new(path: SystemPathBuf, file_type: Result<FileType>) -> Self { pub fn new(path: SystemPathBuf, file_type: FileType) -> Self {
Self { path, file_type } Self { path, file_type }
} }
@ -142,13 +154,7 @@ impl DirectoryEntry {
&self.path &self.path
} }
pub fn file_type(&self) -> &Result<FileType> { pub fn file_type(&self) -> FileType {
&self.file_type self.file_type
}
}
impl PartialEq for DirectoryEntry {
fn eq(&self, other: &Self) -> bool {
self.path == other.path
} }
} }

View file

@ -1,10 +1,18 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::iter::FusedIterator;
use std::sync::{Arc, RwLock, RwLockWriteGuard}; use std::sync::{Arc, RwLock, RwLockWriteGuard};
use camino::{Utf8Path, Utf8PathBuf}; use camino::{Utf8Path, Utf8PathBuf};
use filetime::FileTime; use filetime::FileTime;
use crate::system::{DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf}; use crate::system::{
walk_directory, DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf,
};
use super::walk_directory::{
DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, WalkDirectoryVisitor,
WalkDirectoryVisitorBuilder, WalkState,
};
/// File system that stores all content in memory. /// File system that stores all content in memory.
/// ///
@ -157,6 +165,14 @@ impl MemoryFileSystem {
Ok(()) Ok(())
} }
/// Returns a builder for walking the directory tree of `path`.
///
/// The only files that are ignored when setting `WalkDirectoryBuilder::standard_filters`
/// are hidden files (files with a name starting with a `.`).
pub fn walk_directory(&self, path: impl AsRef<SystemPath>) -> WalkDirectoryBuilder {
WalkDirectoryBuilder::new(path, MemoryWalker { fs: self.clone() })
}
pub fn remove_file(&self, path: impl AsRef<SystemPath>) -> Result<()> { pub fn remove_file(&self, path: impl AsRef<SystemPath>) -> Result<()> {
fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> {
let mut by_path = fs.inner.by_path.write().unwrap(); let mut by_path = fs.inner.by_path.write().unwrap();
@ -238,17 +254,18 @@ impl MemoryFileSystem {
normalized.into_utf8_path_buf() normalized.into_utf8_path_buf()
} }
pub fn read_directory( pub fn read_directory(&self, path: impl AsRef<SystemPath>) -> Result<ReadDirectory> {
&self,
path: impl AsRef<SystemPath>,
) -> Result<impl Iterator<Item = Result<DirectoryEntry>> + '_> {
let by_path = self.inner.by_path.read().unwrap(); let by_path = self.inner.by_path.read().unwrap();
let normalized = self.normalize_path(path.as_ref()); let normalized = self.normalize_path(path.as_ref());
let entry = by_path.get(&normalized).ok_or_else(not_found)?; let entry = by_path.get(&normalized).ok_or_else(not_found)?;
if entry.is_file() { if entry.is_file() {
return Err(not_a_directory()); return Err(not_a_directory());
}; };
Ok(by_path
// Collect the entries into a vector to avoid deadlocks when the
// consumer calls into other file system methods while iterating over the
// directory entries.
let collected = by_path
.range(normalized.clone()..) .range(normalized.clone()..)
.skip(1) .skip(1)
.take_while(|(path, _)| path.starts_with(&normalized)) .take_while(|(path, _)| path.starts_with(&normalized))
@ -256,14 +273,15 @@ impl MemoryFileSystem {
if path.parent()? == normalized { if path.parent()? == normalized {
Some(Ok(DirectoryEntry { Some(Ok(DirectoryEntry {
path: SystemPathBuf::from_utf8_path_buf(path.to_owned()), path: SystemPathBuf::from_utf8_path_buf(path.to_owned()),
file_type: Ok(entry.file_type()), file_type: entry.file_type(),
})) }))
} else { } else {
None None
} }
}) })
.collect::<Vec<_>>() .collect();
.into_iter())
Ok(ReadDirectory::new(collected))
} }
} }
@ -379,11 +397,185 @@ fn get_or_create_file<'a>(
} }
} }
#[derive(Debug)]
pub struct ReadDirectory {
entries: std::vec::IntoIter<Result<DirectoryEntry>>,
}
impl ReadDirectory {
fn new(entries: Vec<Result<DirectoryEntry>>) -> Self {
Self {
entries: entries.into_iter(),
}
}
}
impl Iterator for ReadDirectory {
type Item = std::io::Result<DirectoryEntry>;
fn next(&mut self) -> Option<Self::Item> {
self.entries.next()
}
}
impl FusedIterator for ReadDirectory {}
/// Recursively walks a directory in the memory file system.
#[derive(Debug)]
struct MemoryWalker {
fs: MemoryFileSystem,
}
impl MemoryWalker {
fn visit_entry(
&self,
visitor: &mut dyn WalkDirectoryVisitor,
entry: walk_directory::DirectoryEntry,
queue: &mut Vec<WalkerState>,
ignore_hidden: bool,
) -> WalkState {
if entry.file_type().is_directory() {
let path = entry.path.clone();
let depth = entry.depth;
let state = visitor.visit(Ok(entry));
if matches!(state, WalkState::Continue) {
queue.push(WalkerState::Nested {
path,
depth: depth + 1,
});
}
state
} else if ignore_hidden
&& entry
.path
.file_name()
.is_some_and(|name| name.starts_with('.'))
{
WalkState::Skip
} else {
visitor.visit(Ok(entry))
}
}
}
impl DirectoryWalker for MemoryWalker {
fn walk(
&self,
builder: &mut dyn WalkDirectoryVisitorBuilder,
configuration: WalkDirectoryConfiguration,
) {
let WalkDirectoryConfiguration {
paths,
ignore_hidden,
standard_filters: _,
} = configuration;
let mut visitor = builder.build();
let mut queue: Vec<_> = paths
.into_iter()
.map(|path| WalkerState::Start { path })
.collect();
while let Some(state) = queue.pop() {
let (path, depth) = match state {
WalkerState::Start { path } => {
match self.fs.metadata(&path) {
Ok(metadata) => {
let entry = walk_directory::DirectoryEntry {
file_type: metadata.file_type,
depth: 0,
path,
};
if self.visit_entry(&mut *visitor, entry, &mut queue, ignore_hidden)
== WalkState::Quit
{
return;
}
}
Err(error) => {
visitor.visit(Err(walk_directory::Error {
depth: Some(0),
kind: walk_directory::ErrorKind::Io {
path: Some(path),
err: error,
},
}));
}
}
continue;
}
WalkerState::Nested { path, depth } => (path, depth),
};
// Use `read_directory` here instead of locking `by_path` to avoid deadlocks
// when the `visitor` calls any file system operations.
let entries = match self.fs.read_directory(&path) {
Ok(entries) => entries,
Err(error) => {
visitor.visit(Err(walk_directory::Error {
depth: Some(depth),
kind: walk_directory::ErrorKind::Io {
path: Some(path),
err: error,
},
}));
continue;
}
};
for entry in entries {
match entry {
Ok(entry) => {
let entry = walk_directory::DirectoryEntry {
file_type: entry.file_type,
depth,
path: entry.path,
};
if self.visit_entry(&mut *visitor, entry, &mut queue, ignore_hidden)
== WalkState::Quit
{
return;
}
}
Err(error) => {
visitor.visit(Err(walk_directory::Error {
depth: Some(depth),
kind: walk_directory::ErrorKind::Io {
path: None,
err: error,
},
}));
}
}
}
}
}
}
#[derive(Debug)]
enum WalkerState {
/// An entry path that was directly provided to the walker. Always has depth 0.
Start { path: SystemPathBuf },
/// Traverse into the directory with the given path at the given depth.
Nested { path: SystemPathBuf, depth: usize },
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::io::ErrorKind; use std::io::ErrorKind;
use std::time::Duration; use std::time::Duration;
use crate::system::walk_directory::tests::DirectoryEntryToString;
use crate::system::walk_directory::WalkState;
use crate::system::{ use crate::system::{
DirectoryEntry, FileType, MemoryFileSystem, Result, SystemPath, SystemPathBuf, DirectoryEntry, FileType, MemoryFileSystem, Result, SystemPath, SystemPathBuf,
}; };
@ -659,9 +851,9 @@ mod tests {
.map(Result::unwrap) .map(Result::unwrap)
.collect(); .collect();
let expected_contents = vec![ let expected_contents = vec![
DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), Ok(FileType::File)), DirectoryEntry::new(SystemPathBuf::from("/a/bar.py"), FileType::File),
DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), Ok(FileType::File)), DirectoryEntry::new(SystemPathBuf::from("/a/baz.pyi"), FileType::File),
DirectoryEntry::new(SystemPathBuf::from("/a/foo"), Ok(FileType::Directory)), DirectoryEntry::new(SystemPathBuf::from("/a/foo"), FileType::Directory),
]; ];
assert_eq!(contents, expected_contents) assert_eq!(contents, expected_contents)
} }
@ -684,4 +876,139 @@ mod tests {
assert_eq!(error.kind(), std::io::ErrorKind::Other); assert_eq!(error.kind(), std::io::ErrorKind::Other);
assert!(error.to_string().contains("Not a directory")); assert!(error.to_string().contains("Not a directory"));
} }
#[test]
fn walk_directory() -> std::io::Result<()> {
let root = SystemPath::new("/src");
let system = MemoryFileSystem::with_current_directory(root);
system.write_files([
(root.join("foo.py"), "print('foo')"),
(root.join("a/bar.py"), "print('bar')"),
(root.join("a/baz.py"), "print('baz')"),
(root.join("a/b/c.py"), "print('c')"),
])?;
let writer = DirectoryEntryToString::new(root.to_path_buf());
system.walk_directory(root).run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"": (
Directory,
0,
),
"a": (
Directory,
1,
),
"a/b": (
Directory,
2,
),
"a/b/c.py": (
File,
3,
),
"a/bar.py": (
File,
2,
),
"a/baz.py": (
File,
2,
),
"foo.py": (
File,
1,
),
}"#
);
Ok(())
}
#[test]
fn walk_directory_hidden() -> std::io::Result<()> {
let root = SystemPath::new("/src");
let system = MemoryFileSystem::with_current_directory(root);
system.write_files([
(root.join("foo.py"), "print('foo')"),
(root.join("a/bar.py"), "print('bar')"),
(root.join("a/.baz.py"), "print('baz')"),
])?;
let writer = DirectoryEntryToString::new(root.to_path_buf());
system.walk_directory(root).run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"": (
Directory,
0,
),
"a": (
Directory,
1,
),
"a/bar.py": (
File,
2,
),
"foo.py": (
File,
1,
),
}"#
);
Ok(())
}
#[test]
fn walk_directory_file() -> std::io::Result<()> {
let root = SystemPath::new("/src");
let system = MemoryFileSystem::with_current_directory(root);
system.write_file(root.join("foo.py"), "print('foo')")?;
let writer = DirectoryEntryToString::new(root.to_path_buf());
system.walk_directory(root.join("foo.py")).run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"foo.py": (
File,
0,
),
}"#
);
Ok(())
}
} }

View file

@ -2,9 +2,15 @@ use crate::system::{
DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf,
}; };
use filetime::FileTime; use filetime::FileTime;
use std::any::Any;
use std::sync::Arc; use std::sync::Arc;
use std::{any::Any, path::PathBuf};
use super::walk_directory::{
self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration,
WalkDirectoryVisitorBuilder, WalkState,
};
/// A system implementation that uses the OS file system.
#[derive(Default, Debug)] #[derive(Default, Debug)]
pub struct OsSystem { pub struct OsSystem {
inner: Arc<OsSystemInner>, inner: Arc<OsSystemInner>,
@ -67,6 +73,14 @@ impl System for OsSystem {
&self.inner.cwd &self.inner.cwd
} }
/// Creates a builder to recursively walk `path`.
///
/// The walker ignores files according to [`ignore::WalkBuilder::standard_filters`]
/// when setting [`WalkDirectoryBuilder::standard_filters`] to true.
fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder {
WalkDirectoryBuilder::new(path, OsDirectoryWalker {})
}
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {
self self
} }
@ -75,11 +89,156 @@ impl System for OsSystem {
&self, &self,
path: &SystemPath, path: &SystemPath,
) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>>>> { ) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>>>> {
Ok(Box::new( Ok(Box::new(path.as_utf8_path().read_dir_utf8()?.map(|res| {
path.as_utf8_path() let res = res?;
.read_dir_utf8()?
.map(|res| res.map(DirectoryEntry::from)), let file_type = res.file_type()?;
)) Ok(DirectoryEntry {
path: SystemPathBuf::from_utf8_path_buf(res.into_path()),
file_type: file_type.into(),
})
})))
}
}
#[derive(Debug)]
struct OsDirectoryWalker;
impl DirectoryWalker for OsDirectoryWalker {
fn walk(
&self,
visitor_builder: &mut dyn WalkDirectoryVisitorBuilder,
configuration: WalkDirectoryConfiguration,
) {
let WalkDirectoryConfiguration {
paths,
ignore_hidden: hidden,
standard_filters,
} = configuration;
let Some((first, additional)) = paths.split_first() else {
return;
};
let mut builder = ignore::WalkBuilder::new(first.as_std_path());
builder.standard_filters(standard_filters);
builder.hidden(hidden);
for additional_path in additional {
builder.add(additional_path.as_std_path());
}
builder.threads(
std::thread::available_parallelism()
.map_or(1, std::num::NonZeroUsize::get)
.min(12),
);
builder.build_parallel().run(|| {
let mut visitor = visitor_builder.build();
Box::new(move |entry| {
match entry {
Ok(entry) => {
// SAFETY: The walkdir crate supports `stdin` files and `file_type` can be `None` for these files.
// We don't make use of this feature, which is why unwrapping here is ok.
let file_type = entry.file_type().unwrap();
let depth = entry.depth();
// `walkdir` reports errors related to parsing ignore files as part of the entry.
// These aren't fatal for us. We should keep going even if an ignore file contains a syntax error.
// But we log the error here for better visibility (same as ripgrep, Ruff ignores it)
if let Some(error) = entry.error() {
tracing::warn!("{error}");
}
match SystemPathBuf::from_path_buf(entry.into_path()) {
Ok(path) => {
let directory_entry = walk_directory::DirectoryEntry {
path,
file_type: file_type.into(),
depth,
};
visitor.visit(Ok(directory_entry)).into()
}
Err(path) => {
visitor.visit(Err(walk_directory::Error {
depth: Some(depth),
kind: walk_directory::ErrorKind::NonUtf8Path { path },
}));
// Skip the entire directory because all the paths won't be UTF-8 paths.
ignore::WalkState::Skip
}
}
}
Err(error) => match ignore_to_walk_directory_error(error, None, None) {
Ok(error) => visitor.visit(Err(error)).into(),
Err(error) => {
// This should only be reached when the error is a `.ignore` file related error
// (which, should not be reported here but the `ignore` crate doesn't distinguish between ignore and IO errors).
// Let's log the error to at least make it visible.
tracing::warn!("Failed to traverse directory: {error}.");
ignore::WalkState::Continue
}
},
}
})
});
}
}
#[cold]
fn ignore_to_walk_directory_error(
error: ignore::Error,
path: Option<PathBuf>,
depth: Option<usize>,
) -> std::result::Result<walk_directory::Error, ignore::Error> {
use ignore::Error;
match error {
Error::WithPath { path, err } => ignore_to_walk_directory_error(*err, Some(path), depth),
Error::WithDepth { err, depth } => ignore_to_walk_directory_error(*err, path, Some(depth)),
Error::WithLineNumber { err, .. } => ignore_to_walk_directory_error(*err, path, depth),
Error::Loop { child, ancestor } => {
match (
SystemPathBuf::from_path_buf(child),
SystemPathBuf::from_path_buf(ancestor),
) {
(Ok(child), Ok(ancestor)) => Ok(walk_directory::Error {
depth,
kind: walk_directory::ErrorKind::Loop { child, ancestor },
}),
(Err(child), _) => Ok(walk_directory::Error {
depth,
kind: walk_directory::ErrorKind::NonUtf8Path { path: child },
}),
// We should never reach this because we should never traverse into a non UTF8 path but handle it anyway.
(_, Err(ancestor)) => Ok(walk_directory::Error {
depth,
kind: walk_directory::ErrorKind::NonUtf8Path { path: ancestor },
}),
}
}
Error::Io(err) => match path.map(SystemPathBuf::from_path_buf).transpose() {
Ok(path) => Ok(walk_directory::Error {
depth,
kind: walk_directory::ErrorKind::Io { path, err },
}),
Err(path) => Ok(walk_directory::Error {
depth,
kind: walk_directory::ErrorKind::NonUtf8Path { path },
}),
},
// Ignore related errors, we warn about them but we don't abort iteration because of them.
error @ (Error::Glob { .. }
| Error::UnrecognizedFileType(_)
| Error::InvalidDefinition
| Error::Partial(..)) => Err(error),
} }
} }
@ -95,21 +254,22 @@ impl From<std::fs::FileType> for FileType {
} }
} }
impl From<camino::Utf8DirEntry> for DirectoryEntry { impl From<WalkState> for ignore::WalkState {
fn from(value: camino::Utf8DirEntry) -> Self { fn from(value: WalkState) -> Self {
let file_type = value.file_type().map(FileType::from); match value {
Self { WalkState::Continue => ignore::WalkState::Continue,
path: SystemPathBuf::from_utf8_path_buf(value.into_path()), WalkState::Skip => ignore::WalkState::Skip,
file_type, WalkState::Quit => ignore::WalkState::Quit,
} }
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use tempfile::TempDir;
use super::*; use super::*;
use crate::system::walk_directory::tests::DirectoryEntryToString;
use crate::system::DirectoryEntry;
use tempfile::TempDir;
#[test] #[test]
fn read_directory() { fn read_directory() {
@ -132,9 +292,9 @@ mod tests {
sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); sorted_contents.sort_by(|a, b| a.path.cmp(&b.path));
let expected_contents = vec![ let expected_contents = vec![
DirectoryEntry::new(tempdir_path.join("a/bar.py"), Ok(FileType::File)), DirectoryEntry::new(tempdir_path.join("a/bar.py"), FileType::File),
DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), Ok(FileType::File)), DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), FileType::File),
DirectoryEntry::new(tempdir_path.join("a/foo"), Ok(FileType::Directory)), DirectoryEntry::new(tempdir_path.join("a/foo"), FileType::Directory),
]; ];
assert_eq!(sorted_contents, expected_contents) assert_eq!(sorted_contents, expected_contents)
} }
@ -169,4 +329,158 @@ mod tests {
assert!(error.to_string().contains("Not a directory")); assert!(error.to_string().contains("Not a directory"));
} }
} }
#[test]
fn walk_directory() -> std::io::Result<()> {
let tempdir = TempDir::new()?;
let root = tempdir.path();
std::fs::create_dir_all(root.join("a/b"))?;
std::fs::write(root.join("foo.py"), "print('foo')")?;
std::fs::write(root.join("a/bar.py"), "print('bar')")?;
std::fs::write(root.join("a/baz.py"), "print('baz')")?;
std::fs::write(root.join("a/b/c.py"), "print('c')")?;
let root_sys = SystemPath::from_std_path(root).unwrap();
let system = OsSystem::new(root_sys);
let writer = DirectoryEntryToString::new(root_sys.to_path_buf());
system.walk_directory(root_sys).run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"": (
Directory,
0,
),
"a": (
Directory,
1,
),
"a/b": (
Directory,
2,
),
"a/b/c.py": (
File,
3,
),
"a/bar.py": (
File,
2,
),
"a/baz.py": (
File,
2,
),
"foo.py": (
File,
1,
),
}"#
);
Ok(())
}
#[test]
fn walk_directory_ignore() -> std::io::Result<()> {
let tempdir = TempDir::new()?;
let root = tempdir.path();
std::fs::create_dir_all(root.join("a/b"))?;
std::fs::write(root.join("foo.py"), "print('foo')\n")?;
std::fs::write(root.join("a/bar.py"), "print('bar')\n")?;
std::fs::write(root.join("a/baz.py"), "print('baz')\n")?;
// Exclude the `b` directory.
std::fs::write(root.join("a/.ignore"), "b/\n")?;
std::fs::write(root.join("a/b/c.py"), "print('c')\n")?;
let root_sys = SystemPath::from_std_path(root).unwrap();
let system = OsSystem::new(root_sys);
let writer = DirectoryEntryToString::new(root_sys.to_path_buf());
system
.walk_directory(root_sys)
.standard_filters(true)
.run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"": (
Directory,
0,
),
"a": (
Directory,
1,
),
"a/bar.py": (
File,
2,
),
"a/baz.py": (
File,
2,
),
"foo.py": (
File,
1,
),
}"#
);
Ok(())
}
#[test]
fn walk_directory_file() -> std::io::Result<()> {
let tempdir = TempDir::new()?;
let root = tempdir.path();
std::fs::write(root.join("foo.py"), "print('foo')\n")?;
let root_sys = SystemPath::from_std_path(root).unwrap();
let system = OsSystem::new(root_sys);
let writer = DirectoryEntryToString::new(root_sys.to_path_buf());
system
.walk_directory(&root_sys.join("foo.py"))
.standard_filters(true)
.run(|| {
Box::new(|entry| {
writer.write_entry(entry);
WalkState::Continue
})
});
assert_eq!(
writer.to_string(),
r#"{
"foo.py": (
File,
0,
),
}"#
);
Ok(())
}
} }

View file

@ -1,9 +1,11 @@
use crate::files::File; use crate::files::File;
use crate::system::{ use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath};
DirectoryEntry, MemoryFileSystem, Metadata, OsSystem, Result, System, SystemPath,
};
use crate::Db; use crate::Db;
use std::any::Any; use std::any::Any;
use std::panic::RefUnwindSafe;
use std::sync::Arc;
use super::walk_directory::WalkDirectoryBuilder;
/// System implementation intended for testing. /// System implementation intended for testing.
/// ///
@ -14,7 +16,7 @@ use std::any::Any;
/// Don't use this system for production code. It's intended for testing only. /// Don't use this system for production code. It's intended for testing only.
#[derive(Default, Debug)] #[derive(Default, Debug)]
pub struct TestSystem { pub struct TestSystem {
inner: TestFileSystem, inner: TestSystemInner,
} }
impl TestSystem { impl TestSystem {
@ -29,58 +31,68 @@ impl TestSystem {
/// ## Panics /// ## Panics
/// If this test db isn't using a memory file system. /// If this test db isn't using a memory file system.
pub fn memory_file_system(&self) -> &MemoryFileSystem { pub fn memory_file_system(&self) -> &MemoryFileSystem {
if let TestFileSystem::Stub(fs) = &self.inner { if let TestSystemInner::Stub(fs) = &self.inner {
fs fs
} else { } else {
panic!("The test db is not using a memory file system"); panic!("The test db is not using a memory file system");
} }
} }
fn use_os_system(&mut self, os: OsSystem) { fn use_system<S>(&mut self, system: S)
self.inner = TestFileSystem::Os(os); where
S: System + Send + Sync + RefUnwindSafe + 'static,
{
self.inner = TestSystemInner::System(Arc::new(system));
} }
} }
impl System for TestSystem { impl System for TestSystem {
fn path_metadata(&self, path: &SystemPath) -> crate::system::Result<Metadata> { fn path_metadata(&self, path: &SystemPath) -> crate::system::Result<Metadata> {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.metadata(path), TestSystemInner::Stub(fs) => fs.metadata(path),
TestFileSystem::Os(fs) => fs.path_metadata(path), TestSystemInner::System(fs) => fs.path_metadata(path),
} }
} }
fn read_to_string(&self, path: &SystemPath) -> crate::system::Result<String> { fn read_to_string(&self, path: &SystemPath) -> crate::system::Result<String> {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.read_to_string(path), TestSystemInner::Stub(fs) => fs.read_to_string(path),
TestFileSystem::Os(fs) => fs.read_to_string(path), TestSystemInner::System(fs) => fs.read_to_string(path),
} }
} }
fn path_exists(&self, path: &SystemPath) -> bool { fn path_exists(&self, path: &SystemPath) -> bool {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.exists(path), TestSystemInner::Stub(fs) => fs.exists(path),
TestFileSystem::Os(fs) => fs.path_exists(path), TestSystemInner::System(system) => system.path_exists(path),
} }
} }
fn is_directory(&self, path: &SystemPath) -> bool { fn is_directory(&self, path: &SystemPath) -> bool {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.is_directory(path), TestSystemInner::Stub(fs) => fs.is_directory(path),
TestFileSystem::Os(fs) => fs.is_directory(path), TestSystemInner::System(system) => system.is_directory(path),
} }
} }
fn is_file(&self, path: &SystemPath) -> bool { fn is_file(&self, path: &SystemPath) -> bool {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.is_file(path), TestSystemInner::Stub(fs) => fs.is_file(path),
TestFileSystem::Os(fs) => fs.is_file(path), TestSystemInner::System(system) => system.is_file(path),
} }
} }
fn current_directory(&self) -> &SystemPath { fn current_directory(&self) -> &SystemPath {
match &self.inner { match &self.inner {
TestFileSystem::Stub(fs) => fs.current_directory(), TestSystemInner::Stub(fs) => fs.current_directory(),
TestFileSystem::Os(fs) => fs.current_directory(), TestSystemInner::System(system) => system.current_directory(),
}
}
fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder {
match &self.inner {
TestSystemInner::Stub(fs) => fs.walk_directory(path),
TestSystemInner::System(system) => system.walk_directory(path),
} }
} }
@ -93,8 +105,8 @@ impl System for TestSystem {
path: &SystemPath, path: &SystemPath,
) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>> + 'a>> { ) -> Result<Box<dyn Iterator<Item = Result<DirectoryEntry>> + 'a>> {
match &self.inner { match &self.inner {
TestFileSystem::Os(fs) => fs.read_directory(path), TestSystemInner::System(fs) => fs.read_directory(path),
TestFileSystem::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)), TestSystemInner::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)),
} }
} }
} }
@ -146,13 +158,16 @@ pub trait DbWithTestSystem: Db + Sized {
Ok(()) Ok(())
} }
/// Uses the real file system instead of the memory file system. /// Uses the given system instead of the testing system.
/// ///
/// This useful for testing advanced file system features like permissions, symlinks, etc. /// This useful for testing advanced file system features like permissions, symlinks, etc.
/// ///
/// Note that any files written to the memory file system won't be copied over. /// Note that any files written to the memory file system won't be copied over.
fn use_os_system(&mut self, os: OsSystem) { fn use_system<S>(&mut self, os: S)
self.test_system_mut().use_os_system(os); where
S: System + Send + Sync + RefUnwindSafe + 'static,
{
self.test_system_mut().use_system(os);
} }
/// Returns the memory file system. /// Returns the memory file system.
@ -165,21 +180,21 @@ pub trait DbWithTestSystem: Db + Sized {
} }
#[derive(Debug)] #[derive(Debug)]
enum TestFileSystem { enum TestSystemInner {
Stub(MemoryFileSystem), Stub(MemoryFileSystem),
Os(OsSystem), System(Arc<dyn System + RefUnwindSafe + Send + Sync>),
} }
impl TestFileSystem { impl TestSystemInner {
fn snapshot(&self) -> Self { fn snapshot(&self) -> Self {
match self { match self {
Self::Stub(fs) => Self::Stub(fs.snapshot()), Self::Stub(system) => Self::Stub(system.snapshot()),
Self::Os(fs) => Self::Os(fs.snapshot()), Self::System(system) => Self::System(Arc::clone(system)),
} }
} }
} }
impl Default for TestFileSystem { impl Default for TestSystemInner {
fn default() -> Self { fn default() -> Self {
Self::Stub(MemoryFileSystem::default()) Self::Stub(MemoryFileSystem::default())
} }

View file

@ -0,0 +1,318 @@
use crate::system::SystemPathBuf;
use std::fmt::{Display, Formatter};
use std::path::PathBuf;
use super::{FileType, SystemPath};
/// A builder for constructing a directory recursive traversal.
pub struct WalkDirectoryBuilder {
/// The implementation that does the directory walking.
walker: Box<dyn DirectoryWalker>,
/// The paths that should be walked.
paths: Vec<SystemPathBuf>,
ignore_hidden: bool,
standard_filters: bool,
}
impl WalkDirectoryBuilder {
pub fn new<W>(path: impl AsRef<SystemPath>, walker: W) -> Self
where
W: DirectoryWalker + 'static,
{
Self {
walker: Box::new(walker),
paths: vec![path.as_ref().to_path_buf()],
ignore_hidden: true,
standard_filters: true,
}
}
/// Adds a path that should be traversed recursively.
///
/// Each additional path is traversed recursively.
/// This should be preferred over building multiple
/// walkers since it enables reusing resources.
#[allow(clippy::should_implement_trait)]
pub fn add(mut self, path: impl AsRef<SystemPath>) -> Self {
self.paths.push(path.as_ref().to_path_buf());
self
}
/// Whether hidden files should be ignored.
///
/// The definition of what a hidden file depends on the [`System`](super::System) and can be platform-dependent.
///
/// This is enabled by default.
pub fn ignore_hidden(mut self, hidden: bool) -> Self {
self.ignore_hidden = hidden;
self
}
/// Enables all the standard ignore filters.
///
/// This toggles, as a group, all the filters that are enabled by default:
/// * [`hidden`](Self::ignore_hidden)
/// * Any [`System`](super::System) specific filters according (e.g., respecting `.ignore`, `.gitignore`, files).
///
/// Defaults to `true`.
pub fn standard_filters(mut self, standard_filters: bool) -> Self {
self.standard_filters = standard_filters;
self.ignore_hidden = standard_filters;
self
}
/// Runs the directory traversal and calls the passed `builder` to create visitors
/// that do the visiting. The walker may run multiple threads to visit the directories.
pub fn run<'s, F>(self, builder: F)
where
F: FnMut() -> FnVisitor<'s>,
{
self.visit(&mut FnBuilder { builder });
}
/// Runs the directory traversal and calls the passed `builder` to create visitors
/// that do the visiting. The walker may run multiple threads to visit the directories.
pub fn visit(self, builder: &mut dyn WalkDirectoryVisitorBuilder) {
let configuration = WalkDirectoryConfiguration {
paths: self.paths,
ignore_hidden: self.ignore_hidden,
standard_filters: self.standard_filters,
};
self.walker.walk(builder, configuration);
}
}
/// Concrete walker that performs the directory walking.
pub trait DirectoryWalker {
fn walk(
&self,
builder: &mut dyn WalkDirectoryVisitorBuilder,
configuration: WalkDirectoryConfiguration,
);
}
/// Creates a visitor for each thread that does the visiting.
pub trait WalkDirectoryVisitorBuilder<'s> {
fn build(&mut self) -> Box<dyn WalkDirectoryVisitor + 's>;
}
/// Visitor handling the individual directory entries.
pub trait WalkDirectoryVisitor: Send {
fn visit(&mut self, entry: std::result::Result<DirectoryEntry, Error>) -> WalkState;
}
struct FnBuilder<F> {
builder: F,
}
impl<'s, F> WalkDirectoryVisitorBuilder<'s> for FnBuilder<F>
where
F: FnMut() -> FnVisitor<'s>,
{
fn build(&mut self) -> Box<dyn WalkDirectoryVisitor + 's> {
let visitor = (self.builder)();
Box::new(FnVisitorImpl(visitor))
}
}
type FnVisitor<'s> =
Box<dyn FnMut(std::result::Result<DirectoryEntry, Error>) -> WalkState + Send + 's>;
struct FnVisitorImpl<'s>(FnVisitor<'s>);
impl WalkDirectoryVisitor for FnVisitorImpl<'_> {
fn visit(&mut self, entry: std::result::Result<DirectoryEntry, Error>) -> WalkState {
(self.0)(entry)
}
}
pub struct WalkDirectoryConfiguration {
pub paths: Vec<SystemPathBuf>,
pub ignore_hidden: bool,
pub standard_filters: bool,
}
/// An entry in a directory.
#[derive(Debug, Clone)]
pub struct DirectoryEntry {
pub(super) path: SystemPathBuf,
pub(super) file_type: FileType,
pub(super) depth: usize,
}
impl DirectoryEntry {
/// The full path that this entry represents.
pub fn path(&self) -> &SystemPath {
&self.path
}
/// The full path that this entry represents.
/// Analogous to [`DirectoryEntry::path`], but moves ownership of the path.
pub fn into_path(self) -> SystemPathBuf {
self.path
}
/// Return the file type for the file that this entry points to.
pub fn file_type(&self) -> FileType {
self.file_type
}
/// Returns the depth at which this entry was created relative to the root.
pub fn depth(&self) -> usize {
self.depth
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum WalkState {
/// Continue walking as normal
Continue,
/// If the entry given is a directory, don't descend into it.
/// In all other cases, this has no effect.
Skip,
/// Quit the entire iterator as soon as possible.
///
/// Note: This is an inherently asynchronous action. It's possible
/// for more entries to be yielded even after instructing the iterator to quit.
Quit,
}
pub struct Error {
pub(super) depth: Option<usize>,
pub(super) kind: ErrorKind,
}
impl Error {
pub fn depth(&self) -> Option<usize> {
self.depth
}
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
}
impl Display for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.kind {
ErrorKind::Loop { ancestor, child } => {
write!(
f,
"File system loop found: {child} points to an ancestor {ancestor}",
)
}
ErrorKind::Io {
path: Some(path),
err,
} => {
write!(f, "IO error for operation on {}: {}", path, err)
}
ErrorKind::Io { path: None, err } => err.fmt(f),
ErrorKind::NonUtf8Path { path } => {
write!(f, "Non-UTF8 path: {}", path.display())
}
}
}
}
impl std::fmt::Debug for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
impl std::error::Error for Error {}
#[derive(Debug)]
pub enum ErrorKind {
/// An error that occurs when a file loop is detected when traversing
/// symbolic links.
Loop {
ancestor: SystemPathBuf,
child: SystemPathBuf,
},
/// An error that occurs when doing I/O
Io {
path: Option<SystemPathBuf>,
err: std::io::Error,
},
/// A path is not a valid UTF-8 path.
NonUtf8Path { path: PathBuf },
}
#[cfg(test)]
pub(super) mod tests {
use crate::system::walk_directory::{DirectoryEntry, Error};
use crate::system::{FileType, SystemPathBuf};
use std::collections::BTreeMap;
/// Test helper that creates a visual representation of the visited directory entries.
pub(crate) struct DirectoryEntryToString {
root_path: SystemPathBuf,
inner: std::sync::Mutex<DirectoryEntryToStringInner>,
}
impl DirectoryEntryToString {
pub(crate) fn new(root_path: SystemPathBuf) -> Self {
Self {
root_path,
inner: std::sync::Mutex::new(DirectoryEntryToStringInner::default()),
}
}
pub(crate) fn write_entry(&self, entry: Result<DirectoryEntry, Error>) {
let mut inner = self.inner.lock().unwrap();
let DirectoryEntryToStringInner { errors, visited } = &mut *inner;
match entry {
Ok(entry) => {
let relative_path = entry
.path()
.strip_prefix(&self.root_path)
.unwrap_or(entry.path());
let unix_path = relative_path
.components()
.map(|component| component.as_str())
.collect::<Vec<_>>()
.join("/");
visited.insert(unix_path, (entry.file_type, entry.depth));
}
Err(error) => {
errors.push_str(&error.to_string());
errors.push('\n');
}
}
}
}
impl std::fmt::Display for DirectoryEntryToString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let inner = self.inner.lock().unwrap();
write!(f, "{paths:#?}", paths = inner.visited)?;
if !inner.errors.is_empty() {
writeln!(f, "\n\n{errors}", errors = inner.errors).unwrap();
}
Ok(())
}
}
#[derive(Default)]
struct DirectoryEntryToStringInner {
errors: String,
/// Stores the visited path. The key is the relative path to the root, using `/` as path separator.
visited: BTreeMap<String, (FileType, usize)>,
}
}