red-knot: Add directory support to MemoryFileSystem (#11825)

This commit is contained in:
Micha Reiser 2024-06-13 08:48:28 +01:00 committed by GitHub
parent d4dd96d1f4
commit 22b6488550
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 463 additions and 93 deletions

View file

@ -27,6 +27,18 @@ pub trait FileSystem {
/// Returns `true` if `path` exists.
fn exists(&self, path: &FileSystemPath) -> bool;
/// Returns `true` if `path` exists and is a directory.
fn is_directory(&self, path: &FileSystemPath) -> bool {
self.metadata(path)
.map_or(false, |metadata| metadata.file_type.is_directory())
}
/// Returns `true` if `path` exists and is a file.
fn is_file(&self, path: &FileSystemPath) -> bool {
self.metadata(path)
.map_or(false, |metadata| metadata.file_type.is_file())
}
}
// TODO support untitled files for the LSP use case. Wrap a `str` and `String`
@ -37,7 +49,7 @@ pub trait FileSystem {
///
/// The path is guaranteed to be valid UTF-8.
#[repr(transparent)]
#[derive(Eq, PartialEq, Hash)]
#[derive(Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct FileSystemPath(Utf8Path);
impl FileSystemPath {
@ -95,7 +107,7 @@ impl FileSystemPath {
///
/// The path is guaranteed to be valid UTF-8.
#[repr(transparent)]
#[derive(Eq, PartialEq, Clone, Hash)]
#[derive(Eq, PartialEq, Clone, Hash, PartialOrd, Ord)]
pub struct FileSystemPathBuf(Utf8PathBuf);
impl Default for FileSystemPathBuf {
@ -109,6 +121,10 @@ impl FileSystemPathBuf {
Self(Utf8PathBuf::new())
}
pub fn from_utf8_path_buf(path: Utf8PathBuf) -> Self {
Self(path)
}
#[inline]
pub fn as_path(&self) -> &FileSystemPath {
FileSystemPath::new(&self.0)

View file

@ -1,23 +1,57 @@
use crate::file_system::{
FileSystem, FileSystemPath, FileSystemPathBuf, FileType, Metadata, Result,
};
use crate::FxDashMap;
use dashmap::mapref::one::RefMut;
use filetime::FileTime;
use rustc_hash::FxHasher;
use std::hash::BuildHasherDefault;
use std::io::ErrorKind;
use std::sync::Arc;
use std::sync::{Arc, RwLock, RwLockWriteGuard};
/// In memory file system.
use camino::{Utf8Path, Utf8PathBuf};
use filetime::FileTime;
use rustc_hash::FxHashMap;
use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result};
/// File system that stores all content in memory.
///
/// Only intended for testing purposes. Directories aren't yet supported.
#[derive(Default)]
/// The file system supports files and directories. Paths are case-sensitive.
///
/// The implementation doesn't aim at fully capturing the behavior of a real file system.
/// The implementation intentionally doesn't support:
/// * symlinks
/// * hardlinks
/// * permissions: All files and directories have the permission 0755.
///
/// Use a tempdir with the real file system to test these advanced file system features and complex file system behavior.
///
/// Only intended for testing purposes.
pub struct MemoryFileSystem {
inner: Arc<MemoryFileSystemInner>,
}
impl MemoryFileSystem {
/// Permission used by all files and directories
const PERMISSION: u32 = 0o755;
pub fn new() -> Self {
Self::with_cwd("/")
}
pub fn with_cwd(cwd: impl AsRef<FileSystemPath>) -> Self {
let cwd = Utf8PathBuf::from(cwd.as_ref().as_str());
assert!(
cwd.is_absolute(),
"The current working directory must be an absolute path."
);
let fs = Self {
inner: Arc::new(MemoryFileSystemInner {
by_path: RwLock::new(FxHashMap::default()),
cwd: cwd.clone(),
}),
};
fs.create_directory_all(FileSystemPath::new(&cwd)).unwrap();
fs
}
#[must_use]
pub fn snapshot(&self) -> Self {
Self {
inner: self.inner.clone(),
@ -25,112 +59,415 @@ impl MemoryFileSystem {
}
/// Writes the files to the file system.
pub fn write_files<P, C>(&self, files: impl IntoIterator<Item = (P, C)>)
///
/// The operation overrides existing files with the same normalized path.
///
/// Enclosing directories are automatically created if they don't exist.
pub fn write_files<P, C>(&self, files: impl IntoIterator<Item = (P, C)>) -> Result<()>
where
P: AsRef<FileSystemPath>,
C: ToString,
{
for (path, content) in files {
self.write_file(path.as_ref(), content.to_string());
self.write_file(path.as_ref(), content.to_string())?;
}
Ok(())
}
/// Stores a new file in the file system
pub fn write_file(&self, path: &FileSystemPath, content: String) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
value.content = content;
value.last_modified = FileTime::now();
}
/// Sets the permissions of the file at `path`.
/// Stores a new file in the file system.
///
/// Creates a new file with an empty content if the file doesn't exist.
pub fn set_permissions(&self, path: &FileSystemPath, permissions: u32) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
value.permission = permissions;
}
/// Updates the last modified time of the file at `path` to now.
/// The operation overrides the content for an existing file with the same normalized `path`.
///
/// Creates a new file with an empty content if the file doesn't exist.
pub fn touch(&self, path: &FileSystemPath) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
/// Enclosing directories are automatically created if they don't exist.
pub fn write_file(&self, path: impl AsRef<FileSystemPath>, content: String) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
value.last_modified = FileTime::now();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
get_or_create_file(&mut by_path, &normalized)?.content = content;
Ok(())
}
fn entry_or_insert(
&self,
path: &FileSystemPath,
) -> RefMut<FileSystemPathBuf, FileData, BuildHasherDefault<FxHasher>> {
self.inner
.files
.entry(path.to_path_buf())
.or_insert_with(|| FileData {
content: String::new(),
last_modified: FileTime::now(),
permission: 0o755,
})
/// Sets the last modified timestamp of the file stored at `path` to now.
///
/// Creates a new file if the file at `path` doesn't exist.
pub fn touch(&self, path: impl AsRef<FileSystemPath>) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
get_or_create_file(&mut by_path, &normalized)?.last_modified = FileTime::now();
Ok(())
}
/// Creates a directory at `path`. All enclosing directories are created if they don't exist.
pub fn create_directory_all(&self, path: impl AsRef<FileSystemPath>) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
create_dir_all(&mut by_path, &normalized)
}
}
impl FileSystem for MemoryFileSystem {
fn metadata(&self, path: &FileSystemPath) -> Result<Metadata> {
let entry = self
.inner
.files
.get(&path.to_path_buf())
.ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?;
let by_path = self.inner.by_path.read().unwrap();
let normalized = normalize_path(path, &self.inner.cwd);
let value = entry.value();
let entry = by_path.get(&normalized).ok_or_else(not_found)?;
Ok(Metadata {
revision: value.last_modified.into(),
permissions: Some(value.permission),
file_type: FileType::File,
})
let metadata = match entry {
Entry::File(file) => Metadata {
revision: file.last_modified.into(),
permissions: Some(Self::PERMISSION),
file_type: FileType::File,
},
Entry::Directory(directory) => Metadata {
revision: directory.last_modified.into(),
permissions: Some(Self::PERMISSION),
file_type: FileType::Directory,
},
};
Ok(metadata)
}
fn read(&self, path: &FileSystemPath) -> Result<String> {
let entry = self
.inner
.files
.get(&path.to_path_buf())
.ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?;
let by_path = self.inner.by_path.read().unwrap();
let normalized = normalize_path(path, &self.inner.cwd);
let value = entry.value();
let entry = by_path.get(&normalized).ok_or_else(not_found)?;
Ok(value.content.clone())
match entry {
Entry::File(file) => Ok(file.content.clone()),
Entry::Directory(_) => Err(is_a_directory()),
}
}
fn exists(&self, path: &FileSystemPath) -> bool {
self.inner.files.contains_key(&path.to_path_buf())
let by_path = self.inner.by_path.read().unwrap();
let normalized = normalize_path(path, &self.inner.cwd);
by_path.contains_key(&normalized)
}
}
impl Default for MemoryFileSystem {
fn default() -> Self {
MemoryFileSystem::new()
}
}
impl std::fmt::Debug for MemoryFileSystem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
let paths = self.inner.by_path.read().unwrap();
for entry in self.inner.files.iter() {
map.entry(entry.key(), entry.value());
}
map.finish()
f.debug_map().entries(paths.iter()).finish()
}
}
#[derive(Default)]
struct MemoryFileSystemInner {
files: FxDashMap<FileSystemPathBuf, FileData>,
by_path: RwLock<FxHashMap<Utf8PathBuf, Entry>>,
cwd: Utf8PathBuf,
}
#[derive(Debug)]
struct FileData {
enum Entry {
File(File),
Directory(Directory),
}
impl Entry {
const fn is_file(&self) -> bool {
matches!(self, Entry::File(_))
}
}
#[derive(Debug)]
struct File {
content: String,
last_modified: FileTime,
permission: u32,
}
#[derive(Debug)]
struct Directory {
last_modified: FileTime,
}
fn not_found() -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::NotFound, "No such file or directory")
}
fn is_a_directory() -> std::io::Error {
// Note: Rust returns `ErrorKind::IsADirectory` for this error but this is a nightly only variant :(.
// So we have to use other for now.
std::io::Error::new(std::io::ErrorKind::Other, "Is a directory")
}
fn not_a_directory() -> std::io::Error {
// Note: Rust returns `ErrorKind::NotADirectory` for this error but this is a nightly only variant :(.
// So we have to use `Other` for now.
std::io::Error::new(std::io::ErrorKind::Other, "Not a directory")
}
/// Normalizes the path by removing `.` and `..` components and transform the path into an absolute path.
///
/// Adapted from https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61
fn normalize_path(path: &FileSystemPath, cwd: &Utf8Path) -> Utf8PathBuf {
let path = camino::Utf8Path::new(path.as_str());
let mut components = path.components().peekable();
let mut ret =
if let Some(c @ (camino::Utf8Component::Prefix(..) | camino::Utf8Component::RootDir)) =
components.peek().cloned()
{
components.next();
Utf8PathBuf::from(c.as_str())
} else {
cwd.to_path_buf()
};
for component in components {
match component {
camino::Utf8Component::Prefix(..) => unreachable!(),
camino::Utf8Component::RootDir => {
ret.push(component);
}
camino::Utf8Component::CurDir => {}
camino::Utf8Component::ParentDir => {
ret.pop();
}
camino::Utf8Component::Normal(c) => {
ret.push(c);
}
}
}
ret
}
fn create_dir_all(
paths: &mut RwLockWriteGuard<FxHashMap<Utf8PathBuf, Entry>>,
normalized: &Utf8Path,
) -> Result<()> {
let mut path = Utf8PathBuf::new();
for component in normalized.components() {
path.push(component);
let entry = paths.entry(path.clone()).or_insert_with(|| {
Entry::Directory(Directory {
last_modified: FileTime::now(),
})
});
if entry.is_file() {
return Err(not_a_directory());
}
}
Ok(())
}
fn get_or_create_file<'a>(
paths: &'a mut RwLockWriteGuard<FxHashMap<Utf8PathBuf, Entry>>,
normalized: &Utf8Path,
) -> Result<&'a mut File> {
if let Some(parent) = normalized.parent() {
create_dir_all(paths, parent)?;
}
let entry = paths.entry(normalized.to_path_buf()).or_insert_with(|| {
Entry::File(File {
content: String::new(),
last_modified: FileTime::now(),
})
});
match entry {
Entry::File(file) => Ok(file),
Entry::Directory(_) => Err(is_a_directory()),
}
}
#[cfg(test)]
mod tests {
use crate::file_system::{FileSystem, FileSystemPath, MemoryFileSystem, Result};
use std::io::ErrorKind;
use std::time::Duration;
/// Creates a file system with the given files.
///
/// The content of all files will be empty.
fn with_files<P>(files: impl IntoIterator<Item = P>) -> super::MemoryFileSystem
where
P: AsRef<FileSystemPath>,
{
let fs = MemoryFileSystem::new();
fs.write_files(files.into_iter().map(|path| (path, "")))
.unwrap();
fs
}
#[test]
fn is_file() {
let path = FileSystemPath::new("a.py");
let fs = with_files([path]);
assert!(fs.is_file(path));
assert!(!fs.is_directory(path));
}
#[test]
fn exists() {
let fs = with_files(["a.py"]);
assert!(fs.exists(FileSystemPath::new("a.py")));
assert!(!fs.exists(FileSystemPath::new("b.py")));
}
#[test]
fn exists_directories() {
let fs = with_files(["a/b/c.py"]);
assert!(fs.exists(FileSystemPath::new("a")));
assert!(fs.exists(FileSystemPath::new("a/b")));
assert!(fs.exists(FileSystemPath::new("a/b/c.py")));
}
#[test]
fn path_normalization() {
let fs = with_files(["a.py"]);
assert!(fs.exists(FileSystemPath::new("a.py")));
assert!(fs.exists(FileSystemPath::new("/a.py")));
assert!(fs.exists(FileSystemPath::new("/b/./../a.py")));
}
#[test]
fn permissions() -> Result<()> {
let fs = with_files(["a.py"]);
// The default permissions match the default on Linux: 0755
assert_eq!(
fs.metadata(FileSystemPath::new("a.py"))?.permissions(),
Some(MemoryFileSystem::PERMISSION)
);
Ok(())
}
#[test]
fn touch() -> Result<()> {
let fs = MemoryFileSystem::new();
let path = FileSystemPath::new("a.py");
// Creates a file if it doesn't exist
fs.touch(path)?;
assert!(fs.exists(path));
let timestamp1 = fs.metadata(path)?.revision();
// Sleep to ensure that the timestamp changes
std::thread::sleep(Duration::from_millis(1));
fs.touch(path)?;
let timestamp2 = fs.metadata(path)?.revision();
assert_ne!(timestamp1, timestamp2);
Ok(())
}
#[test]
fn create_dir_all() {
let fs = MemoryFileSystem::new();
fs.create_directory_all(FileSystemPath::new("a/b/c"))
.unwrap();
assert!(fs.is_directory(FileSystemPath::new("a")));
assert!(fs.is_directory(FileSystemPath::new("a/b")));
assert!(fs.is_directory(FileSystemPath::new("a/b/c")));
// Should not fail if the directory already exists
fs.create_directory_all(FileSystemPath::new("a/b/c"))
.unwrap();
}
#[test]
fn create_dir_all_fails_if_a_component_is_a_file() {
let fs = with_files(["a/b.py"]);
let error = fs
.create_directory_all(FileSystemPath::new("a/b.py/c"))
.unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
}
#[test]
fn write_file_fails_if_a_component_is_a_file() {
let fs = with_files(["a/b.py"]);
let error = fs
.write_file(FileSystemPath::new("a/b.py/c"), "content".to_string())
.unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
}
#[test]
fn write_file_fails_if_path_points_to_a_directory() -> Result<()> {
let fs = MemoryFileSystem::new();
fs.create_directory_all("a")?;
let error = fs
.write_file(FileSystemPath::new("a"), "content".to_string())
.unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
Ok(())
}
#[test]
fn read() -> Result<()> {
let fs = MemoryFileSystem::new();
let path = FileSystemPath::new("a.py");
fs.write_file(path, "Test content".to_string())?;
assert_eq!(fs.read(path)?, "Test content");
Ok(())
}
#[test]
fn read_fails_if_path_is_a_directory() -> Result<()> {
let fs = MemoryFileSystem::new();
fs.create_directory_all("a")?;
let error = fs.read(FileSystemPath::new("a")).unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
Ok(())
}
#[test]
fn read_fails_if_path_doesnt_exist() -> Result<()> {
let fs = MemoryFileSystem::new();
let error = fs.read(FileSystemPath::new("a")).unwrap_err();
assert_eq!(error.kind(), ErrorKind::NotFound);
Ok(())
}
}

View file

@ -73,32 +73,37 @@ mod tests {
use crate::Db;
#[test]
fn python_file() {
fn python_file() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");
db.file_system_mut().write_file(path, "x = 10".to_string());
db.file_system_mut()
.write_file(path, "x = 10".to_string())?;
let file = db.file(path);
let parsed = parsed_module(&db, file);
assert!(parsed.is_valid());
Ok(())
}
#[test]
fn python_ipynb_file() {
fn python_ipynb_file() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.ipynb");
db.file_system_mut()
.write_file(path, "%timeit a = b".to_string());
.write_file(path, "%timeit a = b".to_string())?;
let file = db.file(path);
let parsed = parsed_module(&db, file);
assert!(parsed.is_valid());
Ok(())
}
#[test]

View file

@ -66,28 +66,34 @@ mod tests {
use crate::Db;
#[test]
fn re_runs_query_when_file_revision_changes() {
fn re_runs_query_when_file_revision_changes() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");
db.file_system_mut().write_file(path, "x = 10".to_string());
db.file_system_mut()
.write_file(path, "x = 10".to_string())?;
let file = db.file(path);
assert_eq!(&*source_text(&db, file), "x = 10");
db.file_system_mut().write_file(path, "x = 20".to_string());
db.file_system_mut()
.write_file(path, "x = 20".to_string())
.unwrap();
file.set_revision(&mut db).to(FileTime::now().into());
assert_eq!(&*source_text(&db, file), "x = 20");
Ok(())
}
#[test]
fn text_is_cached_if_revision_is_unchanged() {
fn text_is_cached_if_revision_is_unchanged() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");
db.file_system_mut().write_file(path, "x = 10".to_string());
db.file_system_mut()
.write_file(path, "x = 10".to_string())?;
let file = db.file(path);
@ -104,15 +110,17 @@ mod tests {
assert!(!events
.iter()
.any(|event| matches!(event.kind, EventKind::WillExecute { .. })));
Ok(())
}
#[test]
fn line_index_for_source() {
fn line_index_for_source() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");
db.file_system_mut()
.write_file(path, "x = 10\ny = 20".to_string());
.write_file(path, "x = 10\ny = 20".to_string())?;
let file = db.file(path);
let index = line_index(&db, file);
@ -123,5 +131,7 @@ mod tests {
index.line_start(OneIndexed::from_zero_indexed(0), &text),
TextSize::new(0)
);
Ok(())
}
}

View file

@ -266,11 +266,11 @@ mod tests {
use crate::Db;
#[test]
fn file_system_existing_file() {
fn file_system_existing_file() -> crate::file_system::Result<()> {
let mut db = TestDb::new();
db.file_system_mut()
.write_files([("test.py", "print('Hello world')")]);
.write_files([("test.py", "print('Hello world')")])?;
let test = db.file(FileSystemPath::new("test.py"));
@ -278,6 +278,8 @@ mod tests {
assert_eq!(test.permissions(&db), Some(0o755));
assert_ne!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read(&db), "print('Hello world')");
Ok(())
}
#[test]