red-knot: VfsFile input ingredient and a Vfs (#11802)

This commit is contained in:
Micha Reiser 2024-06-12 08:06:15 +01:00 committed by GitHub
parent db8f2c2d9f
commit 93973b96cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 1195 additions and 26 deletions

143
Cargo.lock generated
View file

@ -133,6 +133,12 @@ version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "arc-swap"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "argfile"
version = "0.2.0"
@ -214,6 +220,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "camino"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239"
[[package]]
name = "cast"
version = "0.3.0"
@ -361,10 +373,10 @@ version = "4.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6"
dependencies = [
"heck",
"heck 0.5.0",
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -605,7 +617,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim 0.10.0",
"syn",
"syn 2.0.66",
]
[[package]]
@ -616,7 +628,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
dependencies = [
"darling_core",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -742,6 +754,16 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "eyre"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec"
dependencies = [
"indenter",
"once_cell",
]
[[package]]
name = "fastrand"
version = "2.0.2"
@ -879,6 +901,21 @@ dependencies = [
"allocator-api2",
]
[[package]]
name = "hashlink"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
dependencies = [
"hashbrown 0.14.5",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heck"
version = "0.5.0"
@ -981,6 +1018,12 @@ dependencies = [
"rust-stemmers",
]
[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]]
name = "indexmap"
version = "2.2.6"
@ -1086,7 +1129,7 @@ dependencies = [
"Inflector",
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -1212,7 +1255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2ae40017ac09cd2c6a53504cb3c871c7f2b41466eac5bc66ba63f39073b467b"
dependencies = [
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -1976,6 +2019,19 @@ dependencies = [
"seahash",
]
[[package]]
name = "ruff_db"
version = "0.0.0"
dependencies = [
"camino",
"countme",
"dashmap",
"filetime",
"rustc-hash",
"salsa-2022",
"tracing",
]
[[package]]
name = "ruff_dev"
version = "0.0.0"
@ -2120,7 +2176,7 @@ dependencies = [
"proc-macro2",
"quote",
"ruff_python_trivia",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2493,6 +2549,36 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
[[package]]
name = "salsa-2022"
version = "0.1.0"
source = "git+https://github.com/salsa-rs/salsa.git?rev=05b4e3ebdcdc47730cdd359e7e97fb2470527279#05b4e3ebdcdc47730cdd359e7e97fb2470527279"
dependencies = [
"arc-swap",
"crossbeam",
"crossbeam-utils",
"dashmap",
"hashlink",
"indexmap",
"log",
"parking_lot",
"rustc-hash",
"salsa-2022-macros",
"smallvec",
]
[[package]]
name = "salsa-2022-macros"
version = "0.1.0"
source = "git+https://github.com/salsa-rs/salsa.git?rev=05b4e3ebdcdc47730cdd359e7e97fb2470527279#05b4e3ebdcdc47730cdd359e7e97fb2470527279"
dependencies = [
"eyre",
"heck 0.4.1",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "same-file"
version = "1.0.6"
@ -2523,7 +2609,7 @@ dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2572,7 +2658,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2583,7 +2669,7 @@ checksum = "330f01ce65a3a5fe59a60c82f3c9a024b573b8a6e875bd233fe5f934e71d54e3"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2605,7 +2691,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2646,7 +2732,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2742,11 +2828,11 @@ version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"heck 0.5.0",
"proc-macro2",
"quote",
"rustversion",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2755,6 +2841,17 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.66"
@ -2819,7 +2916,7 @@ dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2830,7 +2927,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
"test-case-core",
]
@ -2851,7 +2948,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -2963,7 +3060,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -3193,7 +3290,7 @@ checksum = "9881bea7cbe687e36c9ab3b778c36cd0487402e270304e8b1296d5085303c1a2"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -3278,7 +3375,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
"wasm-bindgen-shared",
]
@ -3312,7 +3409,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -3345,7 +3442,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]
@ -3614,7 +3711,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.66",
]
[[package]]

View file

@ -42,6 +42,7 @@ bincode = { version = "1.3.3" }
bitflags = { version = "2.5.0" }
bstr = { version = "1.9.1" }
cachedir = { version = "0.3.1" }
camino = { version = "1.1.7" }
chrono = { version = "0.4.35", default-features = false, features = ["clock"] }
clap = { version = "4.5.3", features = ["derive"] }
clap_complete_command = { version = "0.5.1" }
@ -79,7 +80,9 @@ libc = { version = "0.2.153" }
libcst = { version = "1.1.0", default-features = false }
log = { version = "0.4.17" }
lsp-server = { version = "0.7.6" }
lsp-types = { git = "https://github.com/astral-sh/lsp-types.git", rev = "3512a9f", features = ["proposed"] }
lsp-types = { git = "https://github.com/astral-sh/lsp-types.git", rev = "3512a9f", features = [
"proposed",
] }
matchit = { version = "0.8.1" }
memchr = { version = "2.7.1" }
mimalloc = { version = "0.1.39" }
@ -100,13 +103,16 @@ rand = { version = "0.8.5" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
rustc-hash = { version = "1.1.0" }
salsa = { git = "https://github.com/salsa-rs/salsa.git", package = "salsa-2022", rev = "05b4e3ebdcdc47730cdd359e7e97fb2470527279" }
schemars = { version = "0.8.16" }
seahash = { version = "4.1.0" }
serde = { version = "1.0.197", features = ["derive"] }
serde-wasm-bindgen = { version = "0.6.4" }
serde_json = { version = "1.0.113" }
serde_test = { version = "1.0.152" }
serde_with = { version = "3.6.0", default-features = false, features = ["macros"] }
serde_with = { version = "3.6.0", default-features = false, features = [
"macros",
] }
shellexpand = { version = "3.0.0" }
similar = { version = "2.4.0", features = ["inline"] }
smallvec = { version = "1.13.2" }
@ -131,7 +137,12 @@ unicode_names2 = { version = "1.2.2" }
unicode-normalization = { version = "0.1.23" }
ureq = { version = "2.9.6" }
url = { version = "2.5.0" }
uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
uuid = { version = "1.6.1", features = [
"v4",
"fast-rng",
"macro-diagnostics",
"js",
] }
walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }

20
crates/ruff_db/Cargo.toml Normal file
View file

@ -0,0 +1,20 @@
[package]
name = "ruff_db"
version = "0.0.0"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[dependencies]
camino = { workspace = true }
countme = { workspace = true }
dashmap = { workspace = true }
filetime = { workspace = true }
salsa = { workspace = true }
tracing = { workspace = true }
rustc-hash = { workspace = true }

View file

@ -0,0 +1,270 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::path::Path;
use camino::{Utf8Path, Utf8PathBuf};
use filetime::FileTime;
pub use memory::MemoryFileSystem;
pub use os::OsFileSystem;
mod memory;
mod os;
pub type Result<T> = std::io::Result<T>;
/// A file system that can be used to read and write files.
///
/// The file system is agnostic to the actual storage medium, it could be a real file system, a combination
/// of a real file system and an in-memory file system in the case of an LSP where unsaved changes are stored in memory,
/// or an all in-memory file system for testing.
pub trait FileSystem {
/// Reads the metadata of the file or directory at `path`.
fn metadata(&self, path: &FileSystemPath) -> Result<Metadata>;
/// Reads the content of the file at `path`.
fn read(&self, path: &FileSystemPath) -> Result<String>;
/// Returns `true` if `path` exists.
fn exists(&self, path: &FileSystemPath) -> bool;
}
// TODO support untitled files for the LSP use case. Wrap a `str` and `String`
// The main question is how `as_std_path` would work for untitled files, that can only exist in the LSP case
// but there's no compile time guarantee that a [`OsFileSystem`] never gets an untitled file path.
/// Path to a file or directory stored in [`FileSystem`].
///
/// The path is guaranteed to be valid UTF-8.
#[repr(transparent)]
#[derive(Eq, PartialEq, Hash)]
pub struct FileSystemPath(Utf8Path);
impl FileSystemPath {
pub fn new(path: &(impl AsRef<Utf8Path> + ?Sized)) -> &Self {
let path = path.as_ref();
// SAFETY: FsPath is marked as #[repr(transparent)] so the conversion from a
// *const Utf8Path to a *const FsPath is valid.
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
}
/// Converts the path to an owned [`FileSystemPathBuf`].
pub fn to_path_buf(&self) -> FileSystemPathBuf {
FileSystemPathBuf(self.0.to_path_buf())
}
/// Returns the path as a string slice.
#[inline]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
/// Returns the std path for the file.
#[inline]
pub fn as_std_path(&self) -> &Path {
self.0.as_std_path()
}
}
/// Owned path to a file or directory stored in [`FileSystem`].
///
/// The path is guaranteed to be valid UTF-8.
#[repr(transparent)]
#[derive(Eq, PartialEq, Clone, Hash)]
pub struct FileSystemPathBuf(Utf8PathBuf);
impl Default for FileSystemPathBuf {
fn default() -> Self {
Self::new()
}
}
impl FileSystemPathBuf {
pub fn new() -> Self {
Self(Utf8PathBuf::new())
}
#[inline]
pub fn as_path(&self) -> &FileSystemPath {
FileSystemPath::new(&self.0)
}
}
impl AsRef<FileSystemPath> for FileSystemPathBuf {
#[inline]
fn as_ref(&self) -> &FileSystemPath {
self.as_path()
}
}
impl AsRef<FileSystemPath> for FileSystemPath {
#[inline]
fn as_ref(&self) -> &FileSystemPath {
self
}
}
impl AsRef<FileSystemPath> for str {
#[inline]
fn as_ref(&self) -> &FileSystemPath {
FileSystemPath::new(self)
}
}
impl AsRef<FileSystemPath> for String {
#[inline]
fn as_ref(&self) -> &FileSystemPath {
FileSystemPath::new(self)
}
}
impl AsRef<Path> for FileSystemPath {
#[inline]
fn as_ref(&self) -> &Path {
self.0.as_std_path()
}
}
impl Deref for FileSystemPathBuf {
type Target = FileSystemPath;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_path()
}
}
impl std::fmt::Debug for FileSystemPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Display for FileSystemPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Debug for FileSystemPathBuf {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Display for FileSystemPathBuf {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Metadata {
revision: FileRevision,
permissions: Option<u32>,
file_type: FileType,
}
impl Metadata {
pub fn revision(&self) -> FileRevision {
self.revision
}
pub fn permissions(&self) -> Option<u32> {
self.permissions
}
pub fn file_type(&self) -> FileType {
self.file_type
}
}
/// A number representing the revision of a file.
///
/// Two revisions that don't compare equal signify that the file has been modified.
/// Revisions aren't guaranteed to be monotonically increasing or in any specific order.
///
/// Possible revisions are:
/// * The last modification time of the file.
/// * The hash of the file's content.
/// * The revision as it comes from an external system, for example the LSP.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct FileRevision(u128);
impl FileRevision {
pub fn new(value: u128) -> Self {
Self(value)
}
pub const fn zero() -> Self {
Self(0)
}
#[must_use]
pub fn as_u128(self) -> u128 {
self.0
}
}
impl From<u128> for FileRevision {
fn from(value: u128) -> Self {
FileRevision(value)
}
}
impl From<u64> for FileRevision {
fn from(value: u64) -> Self {
FileRevision(u128::from(value))
}
}
impl From<FileTime> for FileRevision {
fn from(value: FileTime) -> Self {
let seconds = value.seconds() as u128;
let seconds = seconds << 64;
let nanos = u128::from(value.nanoseconds());
FileRevision(seconds | nanos)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
pub enum FileType {
File,
Directory,
Symlink,
}
impl FileType {
pub const fn is_file(self) -> bool {
matches!(self, FileType::File)
}
pub const fn is_directory(self) -> bool {
matches!(self, FileType::Directory)
}
pub const fn is_symlink(self) -> bool {
matches!(self, FileType::Symlink)
}
}
#[cfg(test)]
mod tests {
use crate::file_system::FileRevision;
use filetime::FileTime;
#[test]
fn revision_from_file_time() {
let file_time = FileTime::now();
let revision = FileRevision::from(file_time);
let revision = revision.as_u128();
let nano = revision & 0xFFFF_FFFF_FFFF_FFFF;
let seconds = revision >> 64;
assert_eq!(file_time.nanoseconds(), nano as u32);
assert_eq!(file_time.seconds(), seconds as i64);
}
}

View file

@ -0,0 +1,136 @@
use crate::file_system::{
FileSystem, FileSystemPath, FileSystemPathBuf, FileType, Metadata, Result,
};
use crate::FxDashMap;
use dashmap::mapref::one::RefMut;
use filetime::FileTime;
use rustc_hash::FxHasher;
use std::hash::BuildHasherDefault;
use std::io::ErrorKind;
use std::sync::Arc;
/// In memory file system.
///
/// Only intended for testing purposes. Directories aren't yet supported.
#[derive(Default)]
pub struct MemoryFileSystem {
inner: Arc<MemoryFileSystemInner>,
}
impl MemoryFileSystem {
pub fn snapshot(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
/// Writes the files to the file system.
pub fn write_files<P, C>(&self, files: impl IntoIterator<Item = (P, C)>)
where
P: AsRef<FileSystemPath>,
C: ToString,
{
for (path, content) in files {
self.write_file(path.as_ref(), content.to_string());
}
}
/// Stores a new file in the file system
pub fn write_file(&self, path: &FileSystemPath, content: String) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
value.content = content;
value.last_modified = FileTime::now();
}
/// Sets the permissions of the file at `path`.
///
/// Creates a new file with an empty content if the file doesn't exist.
pub fn set_permissions(&self, path: &FileSystemPath, permissions: u32) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
value.permission = permissions;
}
/// Updates the last modified time of the file at `path` to now.
///
/// Creates a new file with an empty content if the file doesn't exist.
pub fn touch(&self, path: &FileSystemPath) {
let mut entry = self.entry_or_insert(path);
let value = entry.value_mut();
value.last_modified = FileTime::now();
}
fn entry_or_insert(
&self,
path: &FileSystemPath,
) -> RefMut<FileSystemPathBuf, FileData, BuildHasherDefault<FxHasher>> {
self.inner
.files
.entry(path.to_path_buf())
.or_insert_with(|| FileData {
content: String::new(),
last_modified: FileTime::now(),
permission: 0o755,
})
}
}
impl FileSystem for MemoryFileSystem {
fn metadata(&self, path: &FileSystemPath) -> Result<Metadata> {
let entry = self
.inner
.files
.get(&path.to_path_buf())
.ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?;
let value = entry.value();
Ok(Metadata {
revision: value.last_modified.into(),
permissions: Some(value.permission),
file_type: FileType::File,
})
}
fn read(&self, path: &FileSystemPath) -> Result<String> {
let entry = self
.inner
.files
.get(&path.to_path_buf())
.ok_or_else(|| std::io::Error::new(ErrorKind::NotFound, "File not found"))?;
let value = entry.value();
Ok(value.content.clone())
}
fn exists(&self, path: &FileSystemPath) -> bool {
self.inner.files.contains_key(&path.to_path_buf())
}
}
impl std::fmt::Debug for MemoryFileSystem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for entry in self.inner.files.iter() {
map.entry(entry.key(), entry.value());
}
map.finish()
}
}
#[derive(Default)]
struct MemoryFileSystemInner {
files: FxDashMap<FileSystemPathBuf, FileData>,
}
#[derive(Debug)]
struct FileData {
content: String,
last_modified: FileTime,
permission: u32,
}

View file

@ -0,0 +1,51 @@
use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result};
use filetime::FileTime;
pub struct OsFileSystem;
impl OsFileSystem {
#[cfg(unix)]
fn permissions(metadata: &std::fs::Metadata) -> Option<u32> {
use std::os::unix::fs::PermissionsExt;
Some(metadata.permissions().mode())
}
#[cfg(not(unix))]
fn permissions(_metadata: &std::fs::Metadata) -> Option<u32> {
None
}
}
impl FileSystem for OsFileSystem {
fn metadata(&self, path: &FileSystemPath) -> Result<Metadata> {
let metadata = path.as_std_path().metadata()?;
let last_modified = FileTime::from_last_modification_time(&metadata);
Ok(Metadata {
revision: last_modified.into(),
permissions: Self::permissions(&metadata),
file_type: metadata.file_type().into(),
})
}
fn read(&self, path: &FileSystemPath) -> Result<String> {
std::fs::read_to_string(path)
}
fn exists(&self, path: &FileSystemPath) -> bool {
path.as_std_path().exists()
}
}
impl From<std::fs::FileType> for FileType {
fn from(file_type: std::fs::FileType) -> Self {
if file_type.is_file() {
FileType::File
} else if file_type.is_dir() {
FileType::Directory
} else {
FileType::Symlink
}
}
}

126
crates/ruff_db/src/lib.rs Normal file
View file

@ -0,0 +1,126 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
use salsa::DbWithJar;
use crate::file_system::{FileSystem, FileSystemPath};
use crate::vfs::{VendoredPath, Vfs, VfsFile};
pub mod file_system;
pub mod vfs;
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
#[salsa::jar(db=Db)]
pub struct Jar(VfsFile);
/// Database that gives access to the virtual filesystem, source code, and parsed AST.
pub trait Db: DbWithJar<Jar> {
/// Interns a file system path and returns a salsa `File` ingredient.
///
/// The operation is guaranteed to always succeed, even if the path doesn't exist, isn't accessible, or if the path points to a directory.
/// In these cases, a file with status [`FileStatus::Deleted`](vfs::FileStatus::Deleted) is returned.
fn file(&self, path: &FileSystemPath) -> VfsFile
where
Self: Sized,
{
self.vfs().file(self, path)
}
/// Interns a vendored file path. Returns `None` if no such vendored file exists and `Some` otherwise.
fn vendored_file(&self, path: &VendoredPath) -> Option<VfsFile>
where
Self: Sized,
{
self.vfs().vendored(self, path)
}
fn file_system(&self) -> &dyn FileSystem;
fn vfs(&self) -> &Vfs;
}
/// Trait for upcasting a reference to a base trait object.
pub trait Upcast<T: ?Sized> {
fn upcast(&self) -> &T;
}
#[cfg(test)]
mod tests {
use crate::file_system::{FileSystem, MemoryFileSystem};
use crate::vfs::{VendoredPathBuf, Vfs};
use crate::{Db, Jar};
/// Database that can be used for testing.
///
/// Uses an in memory filesystem and it stubs out the vendored files by default.
#[salsa::db(Jar)]
pub struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: MemoryFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}
impl TestDb {
#[allow(unused)]
pub fn new() -> Self {
let mut vfs = Vfs::default();
vfs.stub_vendored::<VendoredPathBuf, String>([]);
Self {
storage: salsa::Storage::default(),
file_system: MemoryFileSystem::default(),
events: std::sync::Arc::default(),
vfs,
}
}
#[allow(unused)]
pub fn file_system(&self) -> &MemoryFileSystem {
&self.file_system
}
#[allow(unused)]
pub fn events(&self) -> std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>> {
self.events.clone()
}
pub fn file_system_mut(&mut self) -> &mut MemoryFileSystem {
&mut self.file_system
}
pub fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
}
impl Db for TestDb {
fn file_system(&self) -> &dyn FileSystem {
&self.file_system
}
fn vfs(&self) -> &Vfs {
&self.vfs
}
}
impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event);
let mut events = self.events.lock().unwrap();
events.push(event);
}
}
impl salsa::ParallelDatabase for TestDb {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
file_system: self.file_system.snapshot(),
vfs: self.vfs.snapshot(),
events: self.events.clone(),
})
}
}
}

318
crates/ruff_db/src/vfs.rs Normal file
View file

@ -0,0 +1,318 @@
use std::sync::Arc;
use countme::Count;
use dashmap::mapref::entry::Entry;
pub use path::{VendoredPath, VendoredPathBuf, VfsPath};
use crate::file_system::{FileRevision, FileSystemPath};
use crate::{Db, FxDashMap};
mod path;
/// Virtual file system that supports files from different sources.
///
/// The [`Vfs`] supports accessing files from:
///
/// * The file system
/// * Vendored files that are part of the distributed Ruff binary
///
/// ## Why do both the [`Vfs`] and [`FileSystem`](crate::FileSystem) trait exist?
///
/// It would have been an option to define [`FileSystem`](crate::FileSystem) in a way that all its operation accept
/// a [`VfsPath`]. This would have allowed to unify most of [`Vfs`] and [`FileSystem`](crate::FileSystem). The reason why they are
/// separate is that not all operations are supported for all [`VfsPath`]s:
///
/// * The only relevant operations for [`VendoredPath`]s are testing for existence and reading the content.
/// * The vendored file system is immutable and doesn't support writing nor does it require watching for changes.
/// * There's no requirement to walk the vendored typesystem.
///
/// The other reason is that most operations know if they are working with vendored or file system paths.
/// Requiring them to convert the path to an `VfsPath` to test if the file exist is cumbersome.
///
/// The main downside of the approach is that vendored files needs their own stubbing mechanism.
#[derive(Default)]
pub struct Vfs {
inner: Arc<VfsInner>,
}
#[derive(Default)]
struct VfsInner {
/// Lookup table that maps the path to a salsa interned [`VfsFile`] instance.
///
/// The map also stores entries for files that don't exist on the file system. This is necessary
/// so that queries that depend on the existence of a file are re-executed when the file is created.
///
files_by_path: FxDashMap<VfsPath, VfsFile>,
vendored: VendoredVfs,
}
impl Vfs {
/// Creates a new [`Vfs`] instance where the vendored files are stubbed out.
pub fn with_stubbed_vendored() -> Self {
Self {
inner: Arc::new(VfsInner {
vendored: VendoredVfs::Stubbed(FxDashMap::default()),
..VfsInner::default()
}),
}
}
/// Looks up a file by its path.
///
/// For a non-existing file, creates a new salsa [`VfsFile`] ingredient and stores it for future lookups.
///
/// The operation always succeeds even if the path doesn't exist on disk, isn't accessible or if the path points to a directory.
/// In these cases, a file with status [`FileStatus::Deleted`] is returned.
pub fn file(&self, db: &dyn Db, path: &FileSystemPath) -> VfsFile {
*self
.inner
.files_by_path
.entry(VfsPath::FileSystem(path.to_path_buf()))
.or_insert_with(|| {
let metadata = db.file_system().metadata(path);
match metadata {
Ok(metadata) if metadata.file_type().is_file() => VfsFile::new(
db,
VfsPath::FileSystem(path.to_path_buf()),
metadata.permissions(),
metadata.revision(),
FileStatus::Exists,
Count::default(),
),
_ => VfsFile::new(
db,
VfsPath::FileSystem(path.to_path_buf()),
None,
FileRevision::zero(),
FileStatus::Deleted,
Count::default(),
),
}
})
}
/// Lookups a vendored file by its path. Returns `Some` if a vendored file for the given path
/// exists and `None` otherwise.
pub fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option<VfsFile> {
let file = match self
.inner
.files_by_path
.entry(VfsPath::Vendored(path.to_path_buf()))
{
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let revision = self.inner.vendored.revision(path)?;
let file = VfsFile::new(
db,
VfsPath::Vendored(path.to_path_buf()),
Some(0o444),
revision,
FileStatus::Exists,
Count::default(),
);
entry.insert(file);
file
}
};
Some(file)
}
/// Stubs out the vendored files with the given content.
///
/// ## Panics
/// If there are pending snapshots referencing this `Vfs` instance.
pub fn stub_vendored<P, S>(&mut self, vendored: impl IntoIterator<Item = (P, S)>)
where
P: AsRef<VendoredPath>,
S: ToString,
{
let inner = Arc::get_mut(&mut self.inner).unwrap();
let stubbed = FxDashMap::default();
for (path, content) in vendored {
stubbed.insert(path.as_ref().to_path_buf(), content.to_string());
}
inner.vendored = VendoredVfs::Stubbed(stubbed);
}
/// Creates a salsa like snapshot of the files. The instances share
/// the same path to file mapping.
pub fn snapshot(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
fn read(&self, db: &dyn Db, path: &VfsPath) -> String {
match path {
VfsPath::FileSystem(path) => db.file_system().read(path).unwrap_or_default(),
VfsPath::Vendored(vendored) => db
.vfs()
.inner
.vendored
.read(vendored)
.expect("Vendored file to exist"),
}
}
}
impl std::fmt::Debug for Vfs {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for entry in self.inner.files_by_path.iter() {
map.entry(entry.key(), entry.value());
}
map.finish()
}
}
#[salsa::input]
pub struct VfsFile {
/// The path of the file.
#[id]
#[return_ref]
pub path: VfsPath,
/// The unix permissions of the file. Only supported on unix systems. Always `None` on Windows
/// or when the file has been deleted.
pub permissions: Option<u32>,
/// The file revision. A file has changed if the revisions don't compare equal.
pub revision: FileRevision,
/// The status of the file.
///
/// Salsa doesn't support deleting inputs. The only way to signal to the depending queries that
/// the file has been deleted is to change the status to `Deleted`.
pub status: FileStatus,
/// Counter that counts the number of created file instances and active file instances.
/// Only enabled in debug builds.
#[allow(unused)]
count: Count<VfsFile>,
}
impl VfsFile {
/// Reads the content of the file into a [`String`].
///
/// Reading the same file multiple times isn't guaranteed to return the same content. It's possible
/// that the file has been modified in between the reads. It's even possible that a file that
/// is considered to exist has been deleted in the meantime. If this happens, then the method returns
/// an empty string, which is the closest to the content that the file contains now. Returning
/// an empty string shouldn't be a problem because the query will be re-executed as soon as the
/// changes are applied to the database.
#[allow(unused)]
pub(crate) fn read(&self, db: &dyn Db) -> String {
let path = self.path(db);
if path.is_file_system_path() {
// Add a dependency on the revision to ensure the operation gets re-executed when the file changes.
let _ = self.revision(db);
}
db.vfs().read(db, path)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileStatus {
/// The file exists.
Exists,
/// The file was deleted, didn't exist to begin with or the path isn't a file.
Deleted,
}
#[derive(Default, Debug)]
enum VendoredVfs {
#[default]
Real,
Stubbed(FxDashMap<VendoredPathBuf, String>),
}
impl VendoredVfs {
fn revision(&self, path: &VendoredPath) -> Option<FileRevision> {
match self {
VendoredVfs::Real => todo!(),
VendoredVfs::Stubbed(stubbed) => stubbed
.contains_key(&path.to_path_buf())
.then_some(FileRevision::new(1)),
}
}
fn read(&self, path: &VendoredPath) -> Option<String> {
match self {
VendoredVfs::Real => todo!(),
VendoredVfs::Stubbed(stubbed) => stubbed.get(&path.to_path_buf()).as_deref().cloned(),
}
}
}
#[cfg(test)]
mod tests {
use crate::file_system::{FileRevision, FileSystemPath};
use crate::tests::TestDb;
use crate::vfs::{FileStatus, VendoredPath};
use crate::Db;
#[test]
fn file_system_existing_file() {
let mut db = TestDb::new();
db.file_system_mut()
.write_files([("test.py", "print('Hello world')")]);
let test = db.file(FileSystemPath::new("test.py"));
assert_eq!(test.status(&db), FileStatus::Exists);
assert_eq!(test.permissions(&db), Some(0o755));
assert_ne!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read(&db), "print('Hello world')");
}
#[test]
fn file_system_non_existing_file() {
let db = TestDb::new();
let test = db.file(FileSystemPath::new("test.py"));
assert_eq!(test.status(&db), FileStatus::Deleted);
assert_eq!(test.permissions(&db), None);
assert_eq!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read(&db), "");
}
#[test]
fn stubbed_vendored_file() {
let mut db = TestDb::new();
db.vfs_mut()
.stub_vendored([("test.py", "def foo() -> str")]);
let test = db
.vendored_file(VendoredPath::new("test.py"))
.expect("Vendored file to exist.");
assert_eq!(test.status(&db), FileStatus::Exists);
assert_eq!(test.permissions(&db), Some(0o444));
assert_ne!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read(&db), "def foo() -> str");
}
#[test]
fn stubbed_vendored_file_non_existing() {
let db = TestDb::new();
assert_eq!(db.vendored_file(VendoredPath::new("test.py")), None);
}
}

View file

@ -0,0 +1,140 @@
use std::ops::Deref;
use std::path::Path;
use camino::{Utf8Path, Utf8PathBuf};
use crate::file_system::{FileSystemPath, FileSystemPathBuf};
#[repr(transparent)]
#[derive(Debug, Eq, PartialEq, Hash)]
pub struct VendoredPath(Utf8Path);
impl VendoredPath {
pub fn new(path: &(impl AsRef<Utf8Path> + ?Sized)) -> &Self {
let path = path.as_ref();
// SAFETY: VendoredPath is marked as #[repr(transparent)] so the conversion from a
// *const Utf8Path to a *const VendoredPath is valid.
unsafe { &*(path as *const Utf8Path as *const VendoredPath) }
}
pub fn to_path_buf(&self) -> VendoredPathBuf {
VendoredPathBuf(self.0.to_path_buf())
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[repr(transparent)]
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub struct VendoredPathBuf(Utf8PathBuf);
impl Default for VendoredPathBuf {
fn default() -> Self {
Self::new()
}
}
impl VendoredPathBuf {
pub fn new() -> Self {
Self(Utf8PathBuf::new())
}
pub fn as_path(&self) -> &VendoredPath {
VendoredPath::new(&self.0)
}
}
impl AsRef<VendoredPath> for VendoredPathBuf {
fn as_ref(&self) -> &VendoredPath {
self.as_path()
}
}
impl AsRef<VendoredPath> for VendoredPath {
#[inline]
fn as_ref(&self) -> &VendoredPath {
self
}
}
impl AsRef<VendoredPath> for str {
#[inline]
fn as_ref(&self) -> &VendoredPath {
VendoredPath::new(self)
}
}
impl AsRef<VendoredPath> for String {
#[inline]
fn as_ref(&self) -> &VendoredPath {
VendoredPath::new(self)
}
}
impl AsRef<Path> for VendoredPath {
#[inline]
fn as_ref(&self) -> &Path {
self.0.as_std_path()
}
}
impl Deref for VendoredPathBuf {
type Target = VendoredPath;
fn deref(&self) -> &Self::Target {
self.as_path()
}
}
/// Path to a file.
///
/// The path abstracts that files in Ruff can come from different sources:
///
/// * a file stored on disk
/// * a vendored file that ships as part of the ruff binary
/// * Future: A virtual file that references a slice of another file. For example, the CSS code in a python file.
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub enum VfsPath {
/// Path that points to a file on disk.
FileSystem(FileSystemPathBuf),
Vendored(VendoredPathBuf),
}
impl VfsPath {
/// Create a new path to a file on the file system.
#[must_use]
pub fn file_system(path: impl AsRef<FileSystemPath>) -> Self {
VfsPath::FileSystem(path.as_ref().to_path_buf())
}
/// Returns `Some` if the path is a file system path that points to a path on disk.
#[must_use]
pub fn into_file_system_path_buf(self) -> Option<FileSystemPathBuf> {
match self {
VfsPath::FileSystem(path) => Some(path),
VfsPath::Vendored(_) => None,
}
}
/// Returns `true` if the path is a file system path that points to a path on disk.
#[must_use]
pub const fn is_file_system_path(&self) -> bool {
matches!(self, VfsPath::FileSystem(_))
}
/// Yields the underlying [`str`] slice.
pub fn as_str(&self) -> &str {
match self {
VfsPath::FileSystem(path) => path.as_str(),
VfsPath::Vendored(path) => path.as_str(),
}
}
}
impl AsRef<str> for VfsPath {
fn as_ref(&self) -> &str {
self.as_str()
}
}