diff --git a/Cargo.lock b/Cargo.lock index ea95b29c80..d81a1577b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -133,6 +133,12 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "argfile" version = "0.2.0" @@ -214,6 +220,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "camino" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" + [[package]] name = "cast" version = "0.3.0" @@ -361,10 +373,10 @@ version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -605,7 +617,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn", + "syn 2.0.66", ] [[package]] @@ -616,7 +628,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -742,6 +754,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + [[package]] name = "fastrand" version = "2.0.2" @@ -879,6 +901,21 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -981,6 +1018,12 @@ dependencies = [ "rust-stemmers", ] +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + [[package]] name = "indexmap" version = "2.2.6" @@ -1086,7 +1129,7 @@ dependencies = [ "Inflector", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1212,7 +1255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2ae40017ac09cd2c6a53504cb3c871c7f2b41466eac5bc66ba63f39073b467b" dependencies = [ "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1976,6 +2019,19 @@ dependencies = [ "seahash", ] +[[package]] +name = "ruff_db" +version = "0.0.0" +dependencies = [ + "camino", + "countme", + "dashmap", + "filetime", + "rustc-hash", + "salsa-2022", + "tracing", +] + [[package]] name = "ruff_dev" version = "0.0.0" @@ -2120,7 +2176,7 @@ dependencies = [ "proc-macro2", "quote", "ruff_python_trivia", - "syn", + "syn 2.0.66", ] [[package]] @@ -2493,6 +2549,36 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "salsa-2022" +version = "0.1.0" +source = "git+https://github.com/salsa-rs/salsa.git?rev=05b4e3ebdcdc47730cdd359e7e97fb2470527279#05b4e3ebdcdc47730cdd359e7e97fb2470527279" +dependencies = [ + "arc-swap", + "crossbeam", + "crossbeam-utils", + "dashmap", + "hashlink", + "indexmap", + "log", + "parking_lot", + "rustc-hash", + "salsa-2022-macros", + "smallvec", +] + +[[package]] +name = "salsa-2022-macros" +version = "0.1.0" +source = "git+https://github.com/salsa-rs/salsa.git?rev=05b4e3ebdcdc47730cdd359e7e97fb2470527279#05b4e3ebdcdc47730cdd359e7e97fb2470527279" +dependencies = [ + "eyre", + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "same-file" version = "1.0.6" @@ -2523,7 +2609,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.66", ] [[package]] @@ -2572,7 +2658,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2583,7 +2669,7 @@ checksum = "330f01ce65a3a5fe59a60c82f3c9a024b573b8a6e875bd233fe5f934e71d54e3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2605,7 +2691,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2646,7 +2732,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2742,11 +2828,11 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.66", ] [[package]] @@ -2755,6 +2841,17 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.66" @@ -2819,7 +2916,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2830,7 +2927,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "test-case-core", ] @@ -2851,7 +2948,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2963,7 +3060,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -3193,7 +3290,7 @@ checksum = "9881bea7cbe687e36c9ab3b778c36cd0487402e270304e8b1296d5085303c1a2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -3278,7 +3375,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -3312,7 +3409,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3345,7 +3442,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -3614,7 +3711,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 09282926b3..a46220316a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ bincode = { version = "1.3.3" } bitflags = { version = "2.5.0" } bstr = { version = "1.9.1" } cachedir = { version = "0.3.1" } +camino = { version = "1.1.7" } chrono = { version = "0.4.35", default-features = false, features = ["clock"] } clap = { version = "4.5.3", features = ["derive"] } clap_complete_command = { version = "0.5.1" } @@ -79,7 +80,9 @@ libc = { version = "0.2.153" } libcst = { version = "1.1.0", default-features = false } log = { version = "0.4.17" } lsp-server = { version = "0.7.6" } -lsp-types = { git = "https://github.com/astral-sh/lsp-types.git", rev = "3512a9f", features = ["proposed"] } +lsp-types = { git = "https://github.com/astral-sh/lsp-types.git", rev = "3512a9f", features = [ + "proposed", +] } matchit = { version = "0.8.1" } memchr = { version = "2.7.1" } mimalloc = { version = "0.1.39" } @@ -100,13 +103,16 @@ rand = { version = "0.8.5" } rayon = { version = "1.10.0" } regex = { version = "1.10.2" } rustc-hash = { version = "1.1.0" } +salsa = { git = "https://github.com/salsa-rs/salsa.git", package = "salsa-2022", rev = "05b4e3ebdcdc47730cdd359e7e97fb2470527279" } schemars = { version = "0.8.16" } seahash = { version = "4.1.0" } serde = { version = "1.0.197", features = ["derive"] } serde-wasm-bindgen = { version = "0.6.4" } serde_json = { version = "1.0.113" } serde_test = { version = "1.0.152" } -serde_with = { version = "3.6.0", default-features = false, features = ["macros"] } +serde_with = { version = "3.6.0", default-features = false, features = [ + "macros", +] } shellexpand = { version = "3.0.0" } similar = { version = "2.4.0", features = ["inline"] } smallvec = { version = "1.13.2" } @@ -131,7 +137,12 @@ unicode_names2 = { version = "1.2.2" } unicode-normalization = { version = "0.1.23" } ureq = { version = "2.9.6" } url = { version = "2.5.0" } -uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] } +uuid = { version = "1.6.1", features = [ + "v4", + "fast-rng", + "macro-diagnostics", + "js", +] } walkdir = { version = "2.3.2" } wasm-bindgen = { version = "0.2.92" } wasm-bindgen-test = { version = "0.3.42" } diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml new file mode 100644 index 0000000000..06c4de9625 --- /dev/null +++ b/crates/ruff_db/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "ruff_db" +version = "0.0.0" +publish = false +authors = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +license = { workspace = true } + +[dependencies] +camino = { workspace = true } +countme = { workspace = true } +dashmap = { workspace = true } +filetime = { workspace = true } +salsa = { workspace = true } +tracing = { workspace = true } +rustc-hash = { workspace = true } diff --git a/crates/ruff_db/src/file_system.rs b/crates/ruff_db/src/file_system.rs new file mode 100644 index 0000000000..44ab921891 --- /dev/null +++ b/crates/ruff_db/src/file_system.rs @@ -0,0 +1,270 @@ +use std::fmt::Formatter; +use std::ops::Deref; +use std::path::Path; + +use camino::{Utf8Path, Utf8PathBuf}; +use filetime::FileTime; + +pub use memory::MemoryFileSystem; +pub use os::OsFileSystem; + +mod memory; +mod os; + +pub type Result = std::io::Result; + +/// A file system that can be used to read and write files. +/// +/// The file system is agnostic to the actual storage medium, it could be a real file system, a combination +/// of a real file system and an in-memory file system in the case of an LSP where unsaved changes are stored in memory, +/// or an all in-memory file system for testing. +pub trait FileSystem { + /// Reads the metadata of the file or directory at `path`. + fn metadata(&self, path: &FileSystemPath) -> Result; + + /// Reads the content of the file at `path`. + fn read(&self, path: &FileSystemPath) -> Result; + + /// Returns `true` if `path` exists. + fn exists(&self, path: &FileSystemPath) -> bool; +} + +// TODO support untitled files for the LSP use case. Wrap a `str` and `String` +// The main question is how `as_std_path` would work for untitled files, that can only exist in the LSP case +// but there's no compile time guarantee that a [`OsFileSystem`] never gets an untitled file path. + +/// Path to a file or directory stored in [`FileSystem`]. +/// +/// The path is guaranteed to be valid UTF-8. +#[repr(transparent)] +#[derive(Eq, PartialEq, Hash)] +pub struct FileSystemPath(Utf8Path); + +impl FileSystemPath { + pub fn new(path: &(impl AsRef + ?Sized)) -> &Self { + let path = path.as_ref(); + // SAFETY: FsPath is marked as #[repr(transparent)] so the conversion from a + // *const Utf8Path to a *const FsPath is valid. + unsafe { &*(path as *const Utf8Path as *const FileSystemPath) } + } + + /// Converts the path to an owned [`FileSystemPathBuf`]. + pub fn to_path_buf(&self) -> FileSystemPathBuf { + FileSystemPathBuf(self.0.to_path_buf()) + } + + /// Returns the path as a string slice. + #[inline] + pub fn as_str(&self) -> &str { + self.0.as_str() + } + + /// Returns the std path for the file. + #[inline] + pub fn as_std_path(&self) -> &Path { + self.0.as_std_path() + } +} + +/// Owned path to a file or directory stored in [`FileSystem`]. +/// +/// The path is guaranteed to be valid UTF-8. +#[repr(transparent)] +#[derive(Eq, PartialEq, Clone, Hash)] +pub struct FileSystemPathBuf(Utf8PathBuf); + +impl Default for FileSystemPathBuf { + fn default() -> Self { + Self::new() + } +} + +impl FileSystemPathBuf { + pub fn new() -> Self { + Self(Utf8PathBuf::new()) + } + + #[inline] + pub fn as_path(&self) -> &FileSystemPath { + FileSystemPath::new(&self.0) + } +} + +impl AsRef for FileSystemPathBuf { + #[inline] + fn as_ref(&self) -> &FileSystemPath { + self.as_path() + } +} + +impl AsRef for FileSystemPath { + #[inline] + fn as_ref(&self) -> &FileSystemPath { + self + } +} + +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &FileSystemPath { + FileSystemPath::new(self) + } +} + +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &FileSystemPath { + FileSystemPath::new(self) + } +} + +impl AsRef for FileSystemPath { + #[inline] + fn as_ref(&self) -> &Path { + self.0.as_std_path() + } +} + +impl Deref for FileSystemPathBuf { + type Target = FileSystemPath; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_path() + } +} + +impl std::fmt::Debug for FileSystemPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::fmt::Display for FileSystemPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::fmt::Debug for FileSystemPathBuf { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::fmt::Display for FileSystemPathBuf { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Metadata { + revision: FileRevision, + permissions: Option, + file_type: FileType, +} + +impl Metadata { + pub fn revision(&self) -> FileRevision { + self.revision + } + + pub fn permissions(&self) -> Option { + self.permissions + } + + pub fn file_type(&self) -> FileType { + self.file_type + } +} + +/// A number representing the revision of a file. +/// +/// Two revisions that don't compare equal signify that the file has been modified. +/// Revisions aren't guaranteed to be monotonically increasing or in any specific order. +/// +/// Possible revisions are: +/// * The last modification time of the file. +/// * The hash of the file's content. +/// * The revision as it comes from an external system, for example the LSP. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct FileRevision(u128); + +impl FileRevision { + pub fn new(value: u128) -> Self { + Self(value) + } + + pub const fn zero() -> Self { + Self(0) + } + + #[must_use] + pub fn as_u128(self) -> u128 { + self.0 + } +} + +impl From for FileRevision { + fn from(value: u128) -> Self { + FileRevision(value) + } +} + +impl From for FileRevision { + fn from(value: u64) -> Self { + FileRevision(u128::from(value)) + } +} + +impl From for FileRevision { + fn from(value: FileTime) -> Self { + let seconds = value.seconds() as u128; + let seconds = seconds << 64; + let nanos = u128::from(value.nanoseconds()); + + FileRevision(seconds | nanos) + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub enum FileType { + File, + Directory, + Symlink, +} + +impl FileType { + pub const fn is_file(self) -> bool { + matches!(self, FileType::File) + } + + pub const fn is_directory(self) -> bool { + matches!(self, FileType::Directory) + } + + pub const fn is_symlink(self) -> bool { + matches!(self, FileType::Symlink) + } +} + +#[cfg(test)] +mod tests { + use crate::file_system::FileRevision; + use filetime::FileTime; + + #[test] + fn revision_from_file_time() { + let file_time = FileTime::now(); + let revision = FileRevision::from(file_time); + + let revision = revision.as_u128(); + + let nano = revision & 0xFFFF_FFFF_FFFF_FFFF; + let seconds = revision >> 64; + + assert_eq!(file_time.nanoseconds(), nano as u32); + assert_eq!(file_time.seconds(), seconds as i64); + } +} diff --git a/crates/ruff_db/src/file_system/memory.rs b/crates/ruff_db/src/file_system/memory.rs new file mode 100644 index 0000000000..83f9c43817 --- /dev/null +++ b/crates/ruff_db/src/file_system/memory.rs @@ -0,0 +1,136 @@ +use crate::file_system::{ + FileSystem, FileSystemPath, FileSystemPathBuf, FileType, Metadata, Result, +}; +use crate::FxDashMap; +use dashmap::mapref::one::RefMut; +use filetime::FileTime; +use rustc_hash::FxHasher; +use std::hash::BuildHasherDefault; +use std::io::ErrorKind; +use std::sync::Arc; + +/// In memory file system. +/// +/// Only intended for testing purposes. Directories aren't yet supported. +#[derive(Default)] +pub struct MemoryFileSystem { + inner: Arc, +} + +impl MemoryFileSystem { + pub fn snapshot(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } + + /// Writes the files to the file system. + pub fn write_files(&self, files: impl IntoIterator) + where + P: AsRef, + C: ToString, + { + for (path, content) in files { + self.write_file(path.as_ref(), content.to_string()); + } + } + + /// Stores a new file in the file system + pub fn write_file(&self, path: &FileSystemPath, content: String) { + let mut entry = self.entry_or_insert(path); + let value = entry.value_mut(); + + value.content = content; + value.last_modified = FileTime::now(); + } + + /// Sets the permissions of the file at `path`. + /// + /// Creates a new file with an empty content if the file doesn't exist. + pub fn set_permissions(&self, path: &FileSystemPath, permissions: u32) { + let mut entry = self.entry_or_insert(path); + let value = entry.value_mut(); + value.permission = permissions; + } + + /// Updates the last modified time of the file at `path` to now. + /// + /// Creates a new file with an empty content if the file doesn't exist. + pub fn touch(&self, path: &FileSystemPath) { + let mut entry = self.entry_or_insert(path); + let value = entry.value_mut(); + + value.last_modified = FileTime::now(); + } + + fn entry_or_insert( + &self, + path: &FileSystemPath, + ) -> RefMut> { + self.inner + .files + .entry(path.to_path_buf()) + .or_insert_with(|| FileData { + content: String::new(), + last_modified: FileTime::now(), + permission: 0o755, + }) + } +} + +impl FileSystem for MemoryFileSystem { + fn metadata(&self, path: &FileSystemPath) -> Result { + let entry = self + .inner + .files + .get(&path.to_path_buf()) + .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?; + + let value = entry.value(); + + Ok(Metadata { + revision: value.last_modified.into(), + permissions: Some(value.permission), + file_type: FileType::File, + }) + } + + fn read(&self, path: &FileSystemPath) -> Result { + let entry = self + .inner + .files + .get(&path.to_path_buf()) + .ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?; + + let value = entry.value(); + + Ok(value.content.clone()) + } + + fn exists(&self, path: &FileSystemPath) -> bool { + self.inner.files.contains_key(&path.to_path_buf()) + } +} + +impl std::fmt::Debug for MemoryFileSystem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut map = f.debug_map(); + + for entry in self.inner.files.iter() { + map.entry(entry.key(), entry.value()); + } + map.finish() + } +} + +#[derive(Default)] +struct MemoryFileSystemInner { + files: FxDashMap, +} + +#[derive(Debug)] +struct FileData { + content: String, + last_modified: FileTime, + permission: u32, +} diff --git a/crates/ruff_db/src/file_system/os.rs b/crates/ruff_db/src/file_system/os.rs new file mode 100644 index 0000000000..417e06b248 --- /dev/null +++ b/crates/ruff_db/src/file_system/os.rs @@ -0,0 +1,51 @@ +use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result}; +use filetime::FileTime; + +pub struct OsFileSystem; + +impl OsFileSystem { + #[cfg(unix)] + fn permissions(metadata: &std::fs::Metadata) -> Option { + use std::os::unix::fs::PermissionsExt; + + Some(metadata.permissions().mode()) + } + + #[cfg(not(unix))] + fn permissions(_metadata: &std::fs::Metadata) -> Option { + None + } +} + +impl FileSystem for OsFileSystem { + fn metadata(&self, path: &FileSystemPath) -> Result { + let metadata = path.as_std_path().metadata()?; + let last_modified = FileTime::from_last_modification_time(&metadata); + + Ok(Metadata { + revision: last_modified.into(), + permissions: Self::permissions(&metadata), + file_type: metadata.file_type().into(), + }) + } + + fn read(&self, path: &FileSystemPath) -> Result { + std::fs::read_to_string(path) + } + + fn exists(&self, path: &FileSystemPath) -> bool { + path.as_std_path().exists() + } +} + +impl From for FileType { + fn from(file_type: std::fs::FileType) -> Self { + if file_type.is_file() { + FileType::File + } else if file_type.is_dir() { + FileType::Directory + } else { + FileType::Symlink + } + } +} diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs new file mode 100644 index 0000000000..0895951043 --- /dev/null +++ b/crates/ruff_db/src/lib.rs @@ -0,0 +1,126 @@ +use std::hash::BuildHasherDefault; + +use rustc_hash::FxHasher; +use salsa::DbWithJar; + +use crate::file_system::{FileSystem, FileSystemPath}; +use crate::vfs::{VendoredPath, Vfs, VfsFile}; + +pub mod file_system; +pub mod vfs; + +pub(crate) type FxDashMap = dashmap::DashMap>; + +#[salsa::jar(db=Db)] +pub struct Jar(VfsFile); + +/// Database that gives access to the virtual filesystem, source code, and parsed AST. +pub trait Db: DbWithJar { + /// Interns a file system path and returns a salsa `File` ingredient. + /// + /// The operation is guaranteed to always succeed, even if the path doesn't exist, isn't accessible, or if the path points to a directory. + /// In these cases, a file with status [`FileStatus::Deleted`](vfs::FileStatus::Deleted) is returned. + fn file(&self, path: &FileSystemPath) -> VfsFile + where + Self: Sized, + { + self.vfs().file(self, path) + } + + /// Interns a vendored file path. Returns `None` if no such vendored file exists and `Some` otherwise. + fn vendored_file(&self, path: &VendoredPath) -> Option + where + Self: Sized, + { + self.vfs().vendored(self, path) + } + + fn file_system(&self) -> &dyn FileSystem; + + fn vfs(&self) -> &Vfs; +} + +/// Trait for upcasting a reference to a base trait object. +pub trait Upcast { + fn upcast(&self) -> &T; +} + +#[cfg(test)] +mod tests { + use crate::file_system::{FileSystem, MemoryFileSystem}; + use crate::vfs::{VendoredPathBuf, Vfs}; + use crate::{Db, Jar}; + + /// Database that can be used for testing. + /// + /// Uses an in memory filesystem and it stubs out the vendored files by default. + #[salsa::db(Jar)] + pub struct TestDb { + storage: salsa::Storage, + vfs: Vfs, + file_system: MemoryFileSystem, + events: std::sync::Arc>>, + } + + impl TestDb { + #[allow(unused)] + pub fn new() -> Self { + let mut vfs = Vfs::default(); + vfs.stub_vendored::([]); + + Self { + storage: salsa::Storage::default(), + file_system: MemoryFileSystem::default(), + events: std::sync::Arc::default(), + vfs, + } + } + + #[allow(unused)] + pub fn file_system(&self) -> &MemoryFileSystem { + &self.file_system + } + + #[allow(unused)] + pub fn events(&self) -> std::sync::Arc>> { + self.events.clone() + } + + pub fn file_system_mut(&mut self) -> &mut MemoryFileSystem { + &mut self.file_system + } + + pub fn vfs_mut(&mut self) -> &mut Vfs { + &mut self.vfs + } + } + + impl Db for TestDb { + fn file_system(&self) -> &dyn FileSystem { + &self.file_system + } + + fn vfs(&self) -> &Vfs { + &self.vfs + } + } + + impl salsa::Database for TestDb { + fn salsa_event(&self, event: salsa::Event) { + tracing::trace!("event: {:?}", event); + let mut events = self.events.lock().unwrap(); + events.push(event); + } + } + + impl salsa::ParallelDatabase for TestDb { + fn snapshot(&self) -> salsa::Snapshot { + salsa::Snapshot::new(Self { + storage: self.storage.snapshot(), + file_system: self.file_system.snapshot(), + vfs: self.vfs.snapshot(), + events: self.events.clone(), + }) + } + } +} diff --git a/crates/ruff_db/src/vfs.rs b/crates/ruff_db/src/vfs.rs new file mode 100644 index 0000000000..b59d7da8eb --- /dev/null +++ b/crates/ruff_db/src/vfs.rs @@ -0,0 +1,318 @@ +use std::sync::Arc; + +use countme::Count; +use dashmap::mapref::entry::Entry; + +pub use path::{VendoredPath, VendoredPathBuf, VfsPath}; + +use crate::file_system::{FileRevision, FileSystemPath}; +use crate::{Db, FxDashMap}; + +mod path; + +/// Virtual file system that supports files from different sources. +/// +/// The [`Vfs`] supports accessing files from: +/// +/// * The file system +/// * Vendored files that are part of the distributed Ruff binary +/// +/// ## Why do both the [`Vfs`] and [`FileSystem`](crate::FileSystem) trait exist? +/// +/// It would have been an option to define [`FileSystem`](crate::FileSystem) in a way that all its operation accept +/// a [`VfsPath`]. This would have allowed to unify most of [`Vfs`] and [`FileSystem`](crate::FileSystem). The reason why they are +/// separate is that not all operations are supported for all [`VfsPath`]s: +/// +/// * The only relevant operations for [`VendoredPath`]s are testing for existence and reading the content. +/// * The vendored file system is immutable and doesn't support writing nor does it require watching for changes. +/// * There's no requirement to walk the vendored typesystem. +/// +/// The other reason is that most operations know if they are working with vendored or file system paths. +/// Requiring them to convert the path to an `VfsPath` to test if the file exist is cumbersome. +/// +/// The main downside of the approach is that vendored files needs their own stubbing mechanism. +#[derive(Default)] +pub struct Vfs { + inner: Arc, +} + +#[derive(Default)] +struct VfsInner { + /// Lookup table that maps the path to a salsa interned [`VfsFile`] instance. + /// + /// The map also stores entries for files that don't exist on the file system. This is necessary + /// so that queries that depend on the existence of a file are re-executed when the file is created. + /// + files_by_path: FxDashMap, + vendored: VendoredVfs, +} + +impl Vfs { + /// Creates a new [`Vfs`] instance where the vendored files are stubbed out. + pub fn with_stubbed_vendored() -> Self { + Self { + inner: Arc::new(VfsInner { + vendored: VendoredVfs::Stubbed(FxDashMap::default()), + ..VfsInner::default() + }), + } + } + + /// Looks up a file by its path. + /// + /// For a non-existing file, creates a new salsa [`VfsFile`] ingredient and stores it for future lookups. + /// + /// The operation always succeeds even if the path doesn't exist on disk, isn't accessible or if the path points to a directory. + /// In these cases, a file with status [`FileStatus::Deleted`] is returned. + pub fn file(&self, db: &dyn Db, path: &FileSystemPath) -> VfsFile { + *self + .inner + .files_by_path + .entry(VfsPath::FileSystem(path.to_path_buf())) + .or_insert_with(|| { + let metadata = db.file_system().metadata(path); + + match metadata { + Ok(metadata) if metadata.file_type().is_file() => VfsFile::new( + db, + VfsPath::FileSystem(path.to_path_buf()), + metadata.permissions(), + metadata.revision(), + FileStatus::Exists, + Count::default(), + ), + _ => VfsFile::new( + db, + VfsPath::FileSystem(path.to_path_buf()), + None, + FileRevision::zero(), + FileStatus::Deleted, + Count::default(), + ), + } + }) + } + + /// Lookups a vendored file by its path. Returns `Some` if a vendored file for the given path + /// exists and `None` otherwise. + pub fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option { + let file = match self + .inner + .files_by_path + .entry(VfsPath::Vendored(path.to_path_buf())) + { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let revision = self.inner.vendored.revision(path)?; + + let file = VfsFile::new( + db, + VfsPath::Vendored(path.to_path_buf()), + Some(0o444), + revision, + FileStatus::Exists, + Count::default(), + ); + + entry.insert(file); + + file + } + }; + + Some(file) + } + + /// Stubs out the vendored files with the given content. + /// + /// ## Panics + /// If there are pending snapshots referencing this `Vfs` instance. + pub fn stub_vendored(&mut self, vendored: impl IntoIterator) + where + P: AsRef, + S: ToString, + { + let inner = Arc::get_mut(&mut self.inner).unwrap(); + + let stubbed = FxDashMap::default(); + + for (path, content) in vendored { + stubbed.insert(path.as_ref().to_path_buf(), content.to_string()); + } + + inner.vendored = VendoredVfs::Stubbed(stubbed); + } + + /// Creates a salsa like snapshot of the files. The instances share + /// the same path to file mapping. + pub fn snapshot(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } + + fn read(&self, db: &dyn Db, path: &VfsPath) -> String { + match path { + VfsPath::FileSystem(path) => db.file_system().read(path).unwrap_or_default(), + + VfsPath::Vendored(vendored) => db + .vfs() + .inner + .vendored + .read(vendored) + .expect("Vendored file to exist"), + } + } +} + +impl std::fmt::Debug for Vfs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut map = f.debug_map(); + + for entry in self.inner.files_by_path.iter() { + map.entry(entry.key(), entry.value()); + } + map.finish() + } +} + +#[salsa::input] +pub struct VfsFile { + /// The path of the file. + #[id] + #[return_ref] + pub path: VfsPath, + + /// The unix permissions of the file. Only supported on unix systems. Always `None` on Windows + /// or when the file has been deleted. + pub permissions: Option, + + /// The file revision. A file has changed if the revisions don't compare equal. + pub revision: FileRevision, + + /// The status of the file. + /// + /// Salsa doesn't support deleting inputs. The only way to signal to the depending queries that + /// the file has been deleted is to change the status to `Deleted`. + pub status: FileStatus, + + /// Counter that counts the number of created file instances and active file instances. + /// Only enabled in debug builds. + #[allow(unused)] + count: Count, +} + +impl VfsFile { + /// Reads the content of the file into a [`String`]. + /// + /// Reading the same file multiple times isn't guaranteed to return the same content. It's possible + /// that the file has been modified in between the reads. It's even possible that a file that + /// is considered to exist has been deleted in the meantime. If this happens, then the method returns + /// an empty string, which is the closest to the content that the file contains now. Returning + /// an empty string shouldn't be a problem because the query will be re-executed as soon as the + /// changes are applied to the database. + #[allow(unused)] + pub(crate) fn read(&self, db: &dyn Db) -> String { + let path = self.path(db); + + if path.is_file_system_path() { + // Add a dependency on the revision to ensure the operation gets re-executed when the file changes. + let _ = self.revision(db); + } + + db.vfs().read(db, path) + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum FileStatus { + /// The file exists. + Exists, + + /// The file was deleted, didn't exist to begin with or the path isn't a file. + Deleted, +} + +#[derive(Default, Debug)] +enum VendoredVfs { + #[default] + Real, + Stubbed(FxDashMap), +} + +impl VendoredVfs { + fn revision(&self, path: &VendoredPath) -> Option { + match self { + VendoredVfs::Real => todo!(), + VendoredVfs::Stubbed(stubbed) => stubbed + .contains_key(&path.to_path_buf()) + .then_some(FileRevision::new(1)), + } + } + + fn read(&self, path: &VendoredPath) -> Option { + match self { + VendoredVfs::Real => todo!(), + VendoredVfs::Stubbed(stubbed) => stubbed.get(&path.to_path_buf()).as_deref().cloned(), + } + } +} + +#[cfg(test)] +mod tests { + use crate::file_system::{FileRevision, FileSystemPath}; + use crate::tests::TestDb; + use crate::vfs::{FileStatus, VendoredPath}; + use crate::Db; + + #[test] + fn file_system_existing_file() { + let mut db = TestDb::new(); + + db.file_system_mut() + .write_files([("test.py", "print('Hello world')")]); + + let test = db.file(FileSystemPath::new("test.py")); + + assert_eq!(test.status(&db), FileStatus::Exists); + assert_eq!(test.permissions(&db), Some(0o755)); + assert_ne!(test.revision(&db), FileRevision::zero()); + assert_eq!(&test.read(&db), "print('Hello world')"); + } + + #[test] + fn file_system_non_existing_file() { + let db = TestDb::new(); + + let test = db.file(FileSystemPath::new("test.py")); + + assert_eq!(test.status(&db), FileStatus::Deleted); + assert_eq!(test.permissions(&db), None); + assert_eq!(test.revision(&db), FileRevision::zero()); + assert_eq!(&test.read(&db), ""); + } + + #[test] + fn stubbed_vendored_file() { + let mut db = TestDb::new(); + + db.vfs_mut() + .stub_vendored([("test.py", "def foo() -> str")]); + + let test = db + .vendored_file(VendoredPath::new("test.py")) + .expect("Vendored file to exist."); + + assert_eq!(test.status(&db), FileStatus::Exists); + assert_eq!(test.permissions(&db), Some(0o444)); + assert_ne!(test.revision(&db), FileRevision::zero()); + assert_eq!(&test.read(&db), "def foo() -> str"); + } + + #[test] + fn stubbed_vendored_file_non_existing() { + let db = TestDb::new(); + + assert_eq!(db.vendored_file(VendoredPath::new("test.py")), None); + } +} diff --git a/crates/ruff_db/src/vfs/path.rs b/crates/ruff_db/src/vfs/path.rs new file mode 100644 index 0000000000..9febc542b9 --- /dev/null +++ b/crates/ruff_db/src/vfs/path.rs @@ -0,0 +1,140 @@ +use std::ops::Deref; +use std::path::Path; + +use camino::{Utf8Path, Utf8PathBuf}; + +use crate::file_system::{FileSystemPath, FileSystemPathBuf}; + +#[repr(transparent)] +#[derive(Debug, Eq, PartialEq, Hash)] +pub struct VendoredPath(Utf8Path); + +impl VendoredPath { + pub fn new(path: &(impl AsRef + ?Sized)) -> &Self { + let path = path.as_ref(); + // SAFETY: VendoredPath is marked as #[repr(transparent)] so the conversion from a + // *const Utf8Path to a *const VendoredPath is valid. + unsafe { &*(path as *const Utf8Path as *const VendoredPath) } + } + + pub fn to_path_buf(&self) -> VendoredPathBuf { + VendoredPathBuf(self.0.to_path_buf()) + } + + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +#[repr(transparent)] +#[derive(Debug, Eq, PartialEq, Clone, Hash)] +pub struct VendoredPathBuf(Utf8PathBuf); + +impl Default for VendoredPathBuf { + fn default() -> Self { + Self::new() + } +} + +impl VendoredPathBuf { + pub fn new() -> Self { + Self(Utf8PathBuf::new()) + } + + pub fn as_path(&self) -> &VendoredPath { + VendoredPath::new(&self.0) + } +} + +impl AsRef for VendoredPathBuf { + fn as_ref(&self) -> &VendoredPath { + self.as_path() + } +} + +impl AsRef for VendoredPath { + #[inline] + fn as_ref(&self) -> &VendoredPath { + self + } +} + +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &VendoredPath { + VendoredPath::new(self) + } +} + +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &VendoredPath { + VendoredPath::new(self) + } +} + +impl AsRef for VendoredPath { + #[inline] + fn as_ref(&self) -> &Path { + self.0.as_std_path() + } +} + +impl Deref for VendoredPathBuf { + type Target = VendoredPath; + + fn deref(&self) -> &Self::Target { + self.as_path() + } +} + +/// Path to a file. +/// +/// The path abstracts that files in Ruff can come from different sources: +/// +/// * a file stored on disk +/// * a vendored file that ships as part of the ruff binary +/// * Future: A virtual file that references a slice of another file. For example, the CSS code in a python file. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum VfsPath { + /// Path that points to a file on disk. + FileSystem(FileSystemPathBuf), + Vendored(VendoredPathBuf), +} + +impl VfsPath { + /// Create a new path to a file on the file system. + #[must_use] + pub fn file_system(path: impl AsRef) -> Self { + VfsPath::FileSystem(path.as_ref().to_path_buf()) + } + + /// Returns `Some` if the path is a file system path that points to a path on disk. + #[must_use] + pub fn into_file_system_path_buf(self) -> Option { + match self { + VfsPath::FileSystem(path) => Some(path), + VfsPath::Vendored(_) => None, + } + } + + /// Returns `true` if the path is a file system path that points to a path on disk. + #[must_use] + pub const fn is_file_system_path(&self) -> bool { + matches!(self, VfsPath::FileSystem(_)) + } + + /// Yields the underlying [`str`] slice. + pub fn as_str(&self) -> &str { + match self { + VfsPath::FileSystem(path) => path.as_str(), + VfsPath::Vendored(path) => path.as_str(), + } + } +} + +impl AsRef for VfsPath { + fn as_ref(&self) -> &str { + self.as_str() + } +}