mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-02 21:03:11 +00:00
[red-knot] Case sensitive module resolver (#16521)
## Summary This PR implements the first part of https://github.com/astral-sh/ruff/discussions/16440. It ensures that Red Knot's module resolver is case sensitive on all systems. This PR combines a few approaches: 1. It uses `canonicalize` on non-case-sensitive systems to get the real casing of a path. This works for as long as no symlinks or mapped network drives (the windows `E:\` is mapped to `\\server\share` thingy). This is the same as what Pyright does 2. If 1. fails, fall back to recursively list the parent directory and test if the path's file name matches the casing exactly as listed in by list dir. This is the same approach as CPython takes in its module resolver. The main downside is that it requires more syscalls because, unlike CPython, we Red Knot needs to invalidate its caches if a file name gets renamed (CPython assumes that the folders are immutable). It's worth noting that the file watching test that I added that renames `lib.py` to `Lib.py` currently doesn't pass on case-insensitive systems. Making it pass requires some more involved changes to `Files`. I plan to work on this next. There's the argument that landing this PR on its own isn't worth it without this issue being addressed. I think it's still a good step in the right direction even when some of the details on how and where the path case sensitive comparison is implemented. ## Test plan I added multiple integration tests (including a failing one). I tested that the `case-sensitivity` detection works as expected on Windows, MacOS and Linux and that the fast-paths are taken accordingly.
This commit is contained in:
parent
a128ca761f
commit
a467e7c8d3
14 changed files with 543 additions and 27 deletions
|
|
@ -159,7 +159,7 @@ pub enum SourceTextError {
|
|||
/// Computes the [`LineIndex`] for `file`.
|
||||
#[salsa::tracked]
|
||||
pub fn line_index(db: &dyn Db, file: File) -> LineIndex {
|
||||
let _span = tracing::trace_span!("line_index", file = ?file).entered();
|
||||
let _span = tracing::trace_span!("line_index", file = ?file.path(db)).entered();
|
||||
|
||||
let source = source_text(db, file);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ pub use os::OsSystem;
|
|||
|
||||
use ruff_notebook::{Notebook, NotebookError};
|
||||
use std::error::Error;
|
||||
use std::fmt::Debug;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{fmt, io};
|
||||
pub use test::{DbWithTestSystem, DbWithWritableSystem, InMemorySystem, TestSystem};
|
||||
|
|
@ -89,6 +89,20 @@ pub trait System: Debug {
|
|||
self.path_metadata(path).is_ok()
|
||||
}
|
||||
|
||||
/// Returns `true` if `path` exists on disk using the exact casing as specified in `path` for the parts after `prefix`.
|
||||
///
|
||||
/// This is the same as [`Self::path_exists`] on case-sensitive systems.
|
||||
///
|
||||
/// ## The use of prefix
|
||||
///
|
||||
/// Prefix is only intended as an optimization for systems that can't efficiently check
|
||||
/// if an entire path exists with the exact casing as specified in `path`. However,
|
||||
/// implementations are allowed to check the casing of the entire path if they can do so efficiently.
|
||||
fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool;
|
||||
|
||||
/// Returns the [`CaseSensitivity`] of the system's file system.
|
||||
fn case_sensitivity(&self) -> CaseSensitivity;
|
||||
|
||||
/// Returns `true` if `path` exists and is a directory.
|
||||
fn is_directory(&self, path: &SystemPath) -> bool {
|
||||
self.path_metadata(path)
|
||||
|
|
@ -161,6 +175,39 @@ pub trait System: Debug {
|
|||
fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum CaseSensitivity {
|
||||
/// The case sensitivity of the file system is unknown.
|
||||
///
|
||||
/// The file system is either case-sensitive or case-insensitive. A caller
|
||||
/// should not assume either case.
|
||||
#[default]
|
||||
Unknown,
|
||||
|
||||
/// The file system is case-sensitive.
|
||||
CaseSensitive,
|
||||
|
||||
/// The file system is case-insensitive.
|
||||
CaseInsensitive,
|
||||
}
|
||||
|
||||
impl CaseSensitivity {
|
||||
/// Returns `true` if the file system is known to be case-sensitive.
|
||||
pub const fn is_case_sensitive(self) -> bool {
|
||||
matches!(self, Self::CaseSensitive)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CaseSensitivity {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
CaseSensitivity::Unknown => f.write_str("unknown"),
|
||||
CaseSensitivity::CaseSensitive => f.write_str("case-sensitive"),
|
||||
CaseSensitivity::CaseInsensitive => f.write_str("case-insensitive"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// System trait for non-readonly systems.
|
||||
pub trait WritableSystem: System {
|
||||
/// Writes the given content to the file at the given path.
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
use filetime::FileTime;
|
||||
use ruff_notebook::{Notebook, NotebookError};
|
||||
use rustc_hash::FxHashSet;
|
||||
use std::panic::RefUnwindSafe;
|
||||
use std::sync::Arc;
|
||||
use std::{any::Any, path::PathBuf};
|
||||
|
||||
use filetime::FileTime;
|
||||
|
||||
use ruff_notebook::{Notebook, NotebookError};
|
||||
|
||||
use crate::system::{
|
||||
DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, System, SystemPath,
|
||||
SystemPathBuf, SystemVirtualPath, WritableSystem,
|
||||
CaseSensitivity, DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, System,
|
||||
SystemPath, SystemPathBuf, SystemVirtualPath, WritableSystem,
|
||||
};
|
||||
|
||||
use super::walk_directory::{
|
||||
|
|
@ -16,7 +16,7 @@ use super::walk_directory::{
|
|||
};
|
||||
|
||||
/// A system implementation that uses the OS file system.
|
||||
#[derive(Default, Debug, Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OsSystem {
|
||||
inner: Arc<OsSystemInner>,
|
||||
}
|
||||
|
|
@ -25,6 +25,10 @@ pub struct OsSystem {
|
|||
struct OsSystemInner {
|
||||
cwd: SystemPathBuf,
|
||||
|
||||
real_case_cache: CaseSensitivePathsCache,
|
||||
|
||||
case_sensitivity: CaseSensitivity,
|
||||
|
||||
/// Overrides the user's configuration directory for testing.
|
||||
/// This is an `Option<Option<..>>` to allow setting an override of `None`.
|
||||
#[cfg(feature = "testing")]
|
||||
|
|
@ -36,11 +40,20 @@ impl OsSystem {
|
|||
let cwd = cwd.as_ref();
|
||||
assert!(cwd.as_utf8_path().is_absolute());
|
||||
|
||||
let case_sensitivity = detect_case_sensitivity(cwd);
|
||||
|
||||
tracing::debug!(
|
||||
"Architecture: {}, OS: {}, case-sensitive: {case_sensitivity}",
|
||||
std::env::consts::ARCH,
|
||||
std::env::consts::OS,
|
||||
);
|
||||
|
||||
Self {
|
||||
// Spreading `..Default` because it isn't possible to feature gate the initializer of a single field.
|
||||
#[allow(clippy::needless_update)]
|
||||
inner: Arc::new(OsSystemInner {
|
||||
cwd: cwd.to_path_buf(),
|
||||
case_sensitivity,
|
||||
..Default::default()
|
||||
}),
|
||||
}
|
||||
|
|
@ -102,6 +115,19 @@ impl System for OsSystem {
|
|||
path.as_std_path().exists()
|
||||
}
|
||||
|
||||
fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool {
|
||||
if self.case_sensitivity().is_case_sensitive() {
|
||||
self.path_exists(path)
|
||||
} else {
|
||||
self.path_exists_case_sensitive_fast(path)
|
||||
.unwrap_or_else(|| self.path_exists_case_sensitive_slow(path, prefix))
|
||||
}
|
||||
}
|
||||
|
||||
fn case_sensitivity(&self) -> CaseSensitivity {
|
||||
self.inner.case_sensitivity
|
||||
}
|
||||
|
||||
fn current_directory(&self) -> &SystemPath {
|
||||
&self.inner.cwd
|
||||
}
|
||||
|
|
@ -191,6 +217,91 @@ impl System for OsSystem {
|
|||
}
|
||||
}
|
||||
|
||||
impl OsSystem {
|
||||
/// Path sensitive testing if a path exists by canonicalization the path and comparing it with `path`.
|
||||
///
|
||||
/// This is faster than the slow path, because it requires a single system call for each path
|
||||
/// instead of at least one system call for each component between `path` and `prefix`.
|
||||
///
|
||||
/// However, using `canonicalize` to resolve the path's casing doesn't work in two cases:
|
||||
/// * if `path` is a symlink because `canonicalize` then returns the symlink's target and not the symlink's source path.
|
||||
/// * on Windows: If `path` is a mapped network drive because `canonicalize` then returns the UNC path
|
||||
/// (e.g. `Z:\` is mapped to `\\server\share` and `canonicalize` then returns `\\?\UNC\server\share`).
|
||||
///
|
||||
/// Symlinks and mapped network drives should be rare enough that this fast path is worth trying first,
|
||||
/// even if it comes at a cost for those rare use cases.
|
||||
fn path_exists_case_sensitive_fast(&self, path: &SystemPath) -> Option<bool> {
|
||||
// This is a more forgiving version of `dunce::simplified` that removes all `\\?\` prefixes on Windows.
|
||||
// We use this more forgiving version because we don't intend on using either path for anything other than comparison
|
||||
// and the prefix is only relevant when passing the path to other programs and its longer than 200 something
|
||||
// characters.
|
||||
fn simplify_ignore_verbatim(path: &SystemPath) -> &SystemPath {
|
||||
if cfg!(windows) {
|
||||
if path.as_utf8_path().as_str().starts_with(r"\\?\") {
|
||||
SystemPath::new(&path.as_utf8_path().as_str()[r"\\?\".len()..])
|
||||
} else {
|
||||
path
|
||||
}
|
||||
} else {
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
let simplified = simplify_ignore_verbatim(path);
|
||||
|
||||
let Ok(canonicalized) = simplified.as_std_path().canonicalize() else {
|
||||
// The path doesn't exist or can't be accessed. The path doesn't exist.
|
||||
return Some(false);
|
||||
};
|
||||
|
||||
let Ok(canonicalized) = SystemPathBuf::from_path_buf(canonicalized) else {
|
||||
// The original path is valid UTF8 but the canonicalized path isn't. This definitely suggests
|
||||
// that a symlink is involved. Fall back to the slow path.
|
||||
tracing::debug!("Falling back to the slow case-sensitive path existence check because the canonicalized path of `{simplified}` is not valid UTF-8");
|
||||
return None;
|
||||
};
|
||||
|
||||
let simplified_canonicalized = simplify_ignore_verbatim(&canonicalized);
|
||||
|
||||
// Test if the paths differ by anything other than casing. If so, that suggests that
|
||||
// `path` pointed to a symlink (or some other none reversible path normalization happened).
|
||||
// In this case, fall back to the slow path.
|
||||
if simplified_canonicalized.as_str().to_lowercase() != simplified.as_str().to_lowercase() {
|
||||
tracing::debug!("Falling back to the slow case-sensitive path existence check for `{simplified}` because the canonicalized path `{simplified_canonicalized}` differs not only by casing");
|
||||
return None;
|
||||
}
|
||||
|
||||
// If there are no symlinks involved, then `path` exists only if it is the same as the canonicalized path.
|
||||
Some(simplified_canonicalized == simplified)
|
||||
}
|
||||
|
||||
fn path_exists_case_sensitive_slow(&self, path: &SystemPath, prefix: &SystemPath) -> bool {
|
||||
// Iterate over the sub-paths up to prefix and check if they match the casing as on disk.
|
||||
for ancestor in path.ancestors() {
|
||||
if ancestor == prefix {
|
||||
break;
|
||||
}
|
||||
|
||||
match self.inner.real_case_cache.has_name_case(ancestor) {
|
||||
Ok(true) => {
|
||||
// Component has correct casing, continue with next component
|
||||
}
|
||||
Ok(false) => {
|
||||
// Component has incorrect casing
|
||||
return false;
|
||||
}
|
||||
Err(_) => {
|
||||
// Directory doesn't exist or can't be accessed. We can assume that the file with
|
||||
// the given casing doesn't exist.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl WritableSystem for OsSystem {
|
||||
fn write_file(&self, path: &SystemPath, content: &str) -> Result<()> {
|
||||
std::fs::write(path.as_std_path(), content)
|
||||
|
|
@ -201,6 +312,93 @@ impl WritableSystem for OsSystem {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for OsSystem {
|
||||
fn default() -> Self {
|
||||
Self::new(
|
||||
SystemPathBuf::from_path_buf(std::env::current_dir().unwrap_or_default())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct CaseSensitivePathsCache {
|
||||
by_lower_case: dashmap::DashMap<SystemPathBuf, ListedDirectory>,
|
||||
}
|
||||
|
||||
impl CaseSensitivePathsCache {
|
||||
/// Test if `path`'s file name uses the exact same casing as the file on disk.
|
||||
///
|
||||
/// Returns `false` if the file doesn't exist.
|
||||
///
|
||||
/// Components other than the file portion are ignored.
|
||||
fn has_name_case(&self, path: &SystemPath) -> Result<bool> {
|
||||
let Some(parent) = path.parent() else {
|
||||
// The root path is always considered to exist.
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
let Some(file_name) = path.file_name() else {
|
||||
// We can only get here for paths ending in `..` or the root path. Root paths are handled above.
|
||||
// Return `true` for paths ending in `..` because `..` is the same regardless of casing.
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
let lower_case_path = SystemPathBuf::from(parent.as_str().to_lowercase());
|
||||
let last_modification_time =
|
||||
FileTime::from_last_modification_time(&parent.as_std_path().metadata()?);
|
||||
|
||||
let entry = self.by_lower_case.entry(lower_case_path);
|
||||
|
||||
if let dashmap::Entry::Occupied(entry) = &entry {
|
||||
// Only do a cached lookup if the directory hasn't changed.
|
||||
if entry.get().last_modification_time == last_modification_time {
|
||||
tracing::trace!("Use cached case-sensitive entry for directory `{}`", parent);
|
||||
return Ok(entry.get().names.contains(file_name));
|
||||
}
|
||||
}
|
||||
|
||||
tracing::trace!(
|
||||
"Reading directory `{}` for its case-sensitive filenames",
|
||||
parent
|
||||
);
|
||||
let start = std::time::Instant::now();
|
||||
let mut names = FxHashSet::default();
|
||||
|
||||
for entry in parent.as_std_path().read_dir()? {
|
||||
let Ok(entry) = entry else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Ok(name) = entry.file_name().into_string() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
names.insert(name.into_boxed_str());
|
||||
}
|
||||
|
||||
let directory = entry.insert(ListedDirectory {
|
||||
last_modification_time,
|
||||
names,
|
||||
});
|
||||
|
||||
tracing::debug!(
|
||||
"Caching the case-sensitive paths for directory `{parent}` took {:?}",
|
||||
start.elapsed()
|
||||
);
|
||||
|
||||
Ok(directory.names.contains(file_name))
|
||||
}
|
||||
}
|
||||
|
||||
impl RefUnwindSafe for CaseSensitivePathsCache {}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
struct ListedDirectory {
|
||||
last_modification_time: FileTime,
|
||||
names: FxHashSet<Box<str>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct OsDirectoryWalker;
|
||||
|
||||
|
|
@ -426,6 +624,45 @@ pub(super) mod testing {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn detect_case_sensitivity(_path: &SystemPath) -> CaseSensitivity {
|
||||
// 99% of windows systems aren't case sensitive Don't bother checking.
|
||||
CaseSensitivity::Unknown
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn detect_case_sensitivity(path: &SystemPath) -> CaseSensitivity {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
|
||||
let Ok(original_case_metadata) = path.as_std_path().metadata() else {
|
||||
return CaseSensitivity::Unknown;
|
||||
};
|
||||
|
||||
let upper_case = SystemPathBuf::from(path.as_str().to_uppercase());
|
||||
if &*upper_case == path {
|
||||
return CaseSensitivity::Unknown;
|
||||
}
|
||||
|
||||
match upper_case.as_std_path().metadata() {
|
||||
Ok(uppercase_meta) => {
|
||||
// The file system is case insensitive if the upper case and mixed case paths have the same inode.
|
||||
if uppercase_meta.ino() == original_case_metadata.ino() {
|
||||
CaseSensitivity::CaseInsensitive
|
||||
} else {
|
||||
CaseSensitivity::CaseSensitive
|
||||
}
|
||||
}
|
||||
// In the error case, the file system is case sensitive if the file in all upper case doesn't exist.
|
||||
Err(error) => {
|
||||
if error.kind() == std::io::ErrorKind::NotFound {
|
||||
CaseSensitivity::CaseSensitive
|
||||
} else {
|
||||
CaseSensitivity::Unknown
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::TempDir;
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ use std::sync::{Arc, Mutex};
|
|||
|
||||
use crate::files::File;
|
||||
use crate::system::{
|
||||
DirectoryEntry, GlobError, MemoryFileSystem, Metadata, Result, System, SystemPath,
|
||||
SystemPathBuf, SystemVirtualPath,
|
||||
CaseSensitivity, DirectoryEntry, GlobError, MemoryFileSystem, Metadata, Result, System,
|
||||
SystemPath, SystemPathBuf, SystemVirtualPath,
|
||||
};
|
||||
use crate::Db;
|
||||
|
||||
|
|
@ -130,6 +130,14 @@ impl System for TestSystem {
|
|||
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool {
|
||||
self.system().path_exists_case_sensitive(path, prefix)
|
||||
}
|
||||
|
||||
fn case_sensitivity(&self) -> CaseSensitivity {
|
||||
self.system().case_sensitivity()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TestSystem {
|
||||
|
|
@ -349,6 +357,16 @@ impl System for InMemorySystem {
|
|||
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path_exists_case_sensitive(&self, path: &SystemPath, _prefix: &SystemPath) -> bool {
|
||||
// The memory file system is case-sensitive.
|
||||
self.path_exists(path)
|
||||
}
|
||||
|
||||
fn case_sensitivity(&self) -> CaseSensitivity {
|
||||
CaseSensitivity::CaseSensitive
|
||||
}
|
||||
}
|
||||
|
||||
impl WritableSystem for InMemorySystem {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue