diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 0bb8b26efa..f6139358ad 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -80,6 +80,8 @@ fn run_check(args: CheckCommand) -> anyhow::Result { countme::enable(verbosity.is_trace()); let _guard = setup_tracing(verbosity)?; + tracing::debug!("Version: {}", version::version()); + // The base path to which all CLI arguments are relative to. let cwd = { let cwd = std::env::current_dir().context("Failed to get the current working directory")?; diff --git a/crates/red_knot/tests/file_watching.rs b/crates/red_knot/tests/file_watching.rs index 5c6a78f21c..bbe8e8f8cb 100644 --- a/crates/red_knot/tests/file_watching.rs +++ b/crates/red_knot/tests/file_watching.rs @@ -1,5 +1,3 @@ -#![allow(clippy::disallowed_names)] - use std::collections::HashSet; use std::io::Write; use std::time::{Duration, Instant}; @@ -16,7 +14,7 @@ use ruff_db::source::source_text; use ruff_db::system::{ OsSystem, System, SystemPath, SystemPathBuf, UserConfigDirectoryOverrideGuard, }; -use ruff_db::Upcast; +use ruff_db::{Db as _, Upcast}; use ruff_python_ast::PythonVersion; struct TestCase { @@ -1790,3 +1788,82 @@ fn changes_to_user_configuration() -> anyhow::Result<()> { Ok(()) } + +/// Tests that renaming a file from `lib.py` to `Lib.py` is correctly reflected. +/// +/// This test currently fails on case-insensitive systems because `Files` is case-sensitive +/// but the `System::metadata` call isn't. This means that +/// Red Knot considers both `Lib.py` and `lib.py` to exist when only `lib.py` does +/// +/// The incoming change events then are no-ops because they don't change either file's +/// status nor does it update their last modified time (renaming a file doesn't bump it's +/// last modified timestamp). +/// +/// Fixing this requires to either make `Files` case-insensitive and store the +/// real-case path (if it differs) on `File` or make `Files` use a +/// case-sensitive `System::metadata` call. This does open the question if all +/// `System` calls should be case sensitive. This would be the most consistent +/// but might be hard to pull off. +/// +/// What the right solution is also depends on if Red Knot itself should be case +/// sensitive or not. E.g. should `include="src"` be case sensitive on all systems +/// or only on case-sensitive systems? +/// +/// Lastly, whatever solution we pick must also work well with VS Code which, +/// unfortunately ,doesn't propagate casing-only renames. +/// +#[ignore] +#[test] +fn rename_files_casing_only() -> anyhow::Result<()> { + let mut case = setup([("lib.py", "class Foo: ...")])?; + + assert!( + resolve_module(case.db(), &ModuleName::new("lib").unwrap()).is_some(), + "Expected `lib` module to exist." + ); + assert_eq!( + resolve_module(case.db(), &ModuleName::new("Lib").unwrap()), + None, + "Expected `Lib` module not to exist" + ); + + // Now rename `lib.py` to `Lib.py` + if case.db().system().case_sensitivity().is_case_sensitive() { + std::fs::rename( + case.project_path("lib.py").as_std_path(), + case.project_path("Lib.py").as_std_path(), + ) + .context("Failed to rename `lib.py` to `Lib.py`")?; + } else { + // On case-insensitive file systems, renaming a file to a different casing is a no-op. + // Rename to a different name first + std::fs::rename( + case.project_path("lib.py").as_std_path(), + case.project_path("temp.py").as_std_path(), + ) + .context("Failed to rename `lib.py` to `temp.py`")?; + + std::fs::rename( + case.project_path("temp.py").as_std_path(), + case.project_path("Lib.py").as_std_path(), + ) + .context("Failed to rename `temp.py` to `Lib.py`")?; + } + + let changes = case.stop_watch(event_for_file("Lib.py")); + case.apply_changes(changes); + + // Resolving `lib` should now fail but `Lib` should now succeed + assert_eq!( + resolve_module(case.db(), &ModuleName::new("lib").unwrap()), + None, + "Expected `lib` module to no longer exist." + ); + + assert!( + resolve_module(case.db(), &ModuleName::new("Lib").unwrap()).is_some(), + "Expected `Lib` module to exist" + ); + + Ok(()) +} diff --git a/crates/red_knot_python_semantic/resources/mdtest/import/basic.md b/crates/red_knot_python_semantic/resources/mdtest/import/basic.md index 69061b71bc..df890bffda 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/import/basic.md +++ b/crates/red_knot_python_semantic/resources/mdtest/import/basic.md @@ -140,3 +140,27 @@ import b.foo # error: [unresolved-import] "Cannot resolve import `b.foo`" ```py ``` + +## Long paths + +It's unlikely that a single module component is as long as in this example, but Windows treats paths +that are longer than 200 and something specially. This test ensures that Red Knot can handle those +paths gracefully. + +```toml +system = "os" +``` + +`AveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPath/__init__.py`: + +```py +class Foo: ... +``` + +```py +from AveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPathAveryLongPath import ( + Foo, +) + +reveal_type(Foo()) # revealed: Foo +``` diff --git a/crates/red_knot_python_semantic/resources/mdtest/import/case_sensitive.md b/crates/red_knot_python_semantic/resources/mdtest/import/case_sensitive.md index 568aa7c57c..f822ba47e2 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/import/case_sensitive.md +++ b/crates/red_knot_python_semantic/resources/mdtest/import/case_sensitive.md @@ -1,10 +1,7 @@ # Case Sensitive Imports ```toml -# TODO: This test should use the real file system instead of the memory file system. -# but we can't change the file system yet because the tests would then start failing for -# case-insensitive file systems. -#system = "os" +system = "os" ``` Python's import system is case-sensitive even on case-insensitive file system. This means, importing diff --git a/crates/red_knot_python_semantic/src/module_resolver/path.rs b/crates/red_knot_python_semantic/src/module_resolver/path.rs index f4ec2f8613..d1ee3869a0 100644 --- a/crates/red_knot_python_semantic/src/module_resolver/path.rs +++ b/crates/red_knot_python_semantic/src/module_resolver/path.rs @@ -63,6 +63,10 @@ impl ModulePath { self.relative_path.pop() } + pub(super) fn search_path(&self) -> &SearchPath { + &self.search_path + } + #[must_use] pub(super) fn is_directory(&self, resolver: &ResolverContext) -> bool { let ModulePath { diff --git a/crates/red_knot_python_semantic/src/module_resolver/resolver.rs b/crates/red_knot_python_semantic/src/module_resolver/resolver.rs index 51fd4fb4be..26efbccf10 100644 --- a/crates/red_knot_python_semantic/src/module_resolver/resolver.rs +++ b/crates/red_knot_python_semantic/src/module_resolver/resolver.rs @@ -604,14 +604,29 @@ fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(SearchPath, File, Mod /// resolving modules. fn resolve_file_module(module: &ModulePath, resolver_state: &ResolverContext) -> Option { // Stubs have precedence over source files - module + let file = module .with_pyi_extension() .to_file(resolver_state) .or_else(|| { module .with_py_extension() .and_then(|path| path.to_file(resolver_state)) - }) + })?; + + // For system files, test if the path has the correct casing. + // We can skip this step for vendored files or virtual files because + // those file systems are case sensitive (we wouldn't get to this point). + if let Some(path) = file.path(resolver_state.db).as_system_path() { + let system = resolver_state.db.system(); + if !system.case_sensitivity().is_case_sensitive() + && !system + .path_exists_case_sensitive(path, module.search_path().as_system_path().unwrap()) + { + return None; + } + } + + Some(file) } fn resolve_package<'a, 'db, I>( @@ -1842,4 +1857,72 @@ not_a_directory let a_module = resolve_module(&db, &a_module_name).unwrap(); assert_eq!(a_module.file().path(&db), &system_site_packages_location); } + + #[test] + #[cfg(unix)] + fn case_sensitive_resolution_with_symlinked_directory() -> anyhow::Result<()> { + use anyhow::Context; + use ruff_db::system::OsSystem; + + let temp_dir = tempfile::TempDir::new()?; + let root = SystemPathBuf::from_path_buf( + temp_dir + .path() + .canonicalize() + .context("Failed to canonicalized path")?, + ) + .expect("UTF8 path for temp dir"); + + let mut db = TestDb::new(); + + let src = root.join("src"); + let a_package_target = root.join("a-package"); + let a_src = src.join("a"); + + db.use_system(OsSystem::new(&root)); + + db.write_file( + a_package_target.join("__init__.py"), + "class Foo: x: int = 4", + ) + .context("Failed to write `a-package/__init__.py`")?; + + db.write_file(src.join("main.py"), "print('Hy')") + .context("Failed to write `main.py`")?; + + // The symlink triggers the slow-path in the `OsSystem`'s `exists_path_case_sensitive` + // code because canonicalizing the path for `a/__init__.py` results in `a-package/__init__.py` + std::os::unix::fs::symlink(a_package_target.as_std_path(), a_src.as_std_path()) + .context("Failed to symlink `src/a` to `a-package`")?; + + Program::from_settings( + &db, + ProgramSettings { + python_version: PythonVersion::default(), + python_platform: PythonPlatform::default(), + search_paths: SearchPathSettings { + extra_paths: vec![], + src_roots: vec![src], + custom_typeshed: None, + python_path: PythonPath::KnownSitePackages(vec![]), + }, + }, + ) + .expect("Valid program settings"); + + // Now try to resolve the module `A` (note the capital `A` instead of `a`). + let a_module_name = ModuleName::new_static("A").unwrap(); + assert_eq!(resolve_module(&db, &a_module_name), None); + + // Now lookup the same module using the lowercase `a` and it should resolve to the file in the system site-packages + let a_module_name = ModuleName::new_static("a").unwrap(); + let a_module = resolve_module(&db, &a_module_name).expect("a.py to resolve"); + assert!(a_module + .file() + .path(&db) + .as_str() + .ends_with("src/a/__init__.py"),); + + Ok(()) + } } diff --git a/crates/red_knot_server/src/system.rs b/crates/red_knot_server/src/system.rs index faf13e6e1e..44c780c3f5 100644 --- a/crates/red_knot_server/src/system.rs +++ b/crates/red_knot_server/src/system.rs @@ -7,8 +7,8 @@ use lsp_types::Url; use ruff_db::file_revision::FileRevision; use ruff_db::system::walk_directory::WalkDirectoryBuilder; use ruff_db::system::{ - DirectoryEntry, FileType, GlobError, Metadata, OsSystem, PatternError, Result, System, - SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf, + CaseSensitivity, DirectoryEntry, FileType, GlobError, Metadata, OsSystem, PatternError, Result, + System, SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf, }; use ruff_notebook::{Notebook, NotebookError}; @@ -136,6 +136,10 @@ impl System for LSPSystem { self.os_system.canonicalize_path(path) } + fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool { + self.os_system.path_exists_case_sensitive(path, prefix) + } + fn read_to_string(&self, path: &SystemPath) -> Result { let document = self.system_path_to_document_ref(path)?; @@ -219,6 +223,10 @@ impl System for LSPSystem { fn as_any_mut(&mut self) -> &mut dyn Any { self } + + fn case_sensitivity(&self) -> CaseSensitivity { + self.os_system.case_sensitivity() + } } fn not_a_text_document(path: impl Display) -> std::io::Error { diff --git a/crates/red_knot_test/src/db.rs b/crates/red_knot_test/src/db.rs index aaa770fc54..770be68128 100644 --- a/crates/red_knot_test/src/db.rs +++ b/crates/red_knot_test/src/db.rs @@ -3,8 +3,8 @@ use red_knot_python_semantic::lint::{LintRegistry, RuleSelection}; use red_knot_python_semantic::{default_lint_registry, Db as SemanticDb}; use ruff_db::files::{File, Files}; use ruff_db::system::{ - DbWithWritableSystem, InMemorySystem, OsSystem, System, SystemPath, SystemPathBuf, - WritableSystem, + CaseSensitivity, DbWithWritableSystem, InMemorySystem, OsSystem, System, SystemPath, + SystemPathBuf, WritableSystem, }; use ruff_db::vendored::VendoredFileSystem; use ruff_db::{Db as SourceDb, Upcast}; @@ -211,6 +211,15 @@ impl System for MdtestSystem { self.as_system().read_virtual_path_to_notebook(path) } + fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool { + self.as_system() + .path_exists_case_sensitive(&self.normalize_path(path), &self.normalize_path(prefix)) + } + + fn case_sensitivity(&self) -> CaseSensitivity { + self.as_system().case_sensitivity() + } + fn current_directory(&self) -> &SystemPath { self.as_system().current_directory() } diff --git a/crates/red_knot_test/src/lib.rs b/crates/red_knot_test/src/lib.rs index 528506157a..ec95d14f0e 100644 --- a/crates/red_knot_test/src/lib.rs +++ b/crates/red_knot_test/src/lib.rs @@ -113,7 +113,9 @@ fn run_test( .canonicalize() .expect("Canonicalizing to succeed"); let root_path = SystemPathBuf::from_path_buf(root_path) - .expect("Temp directory to be a valid UTF8 path"); + .expect("Temp directory to be a valid UTF8 path") + .simplified() + .to_path_buf(); db.use_os_system_with_temp_dir(root_path, dir); } diff --git a/crates/red_knot_wasm/src/lib.rs b/crates/red_knot_wasm/src/lib.rs index 3edac448e4..e7e4203e27 100644 --- a/crates/red_knot_wasm/src/lib.rs +++ b/crates/red_knot_wasm/src/lib.rs @@ -11,8 +11,8 @@ use ruff_db::diagnostic::{DisplayDiagnosticConfig, OldDiagnosticTrait}; use ruff_db::files::{system_path_to_file, File}; use ruff_db::system::walk_directory::WalkDirectoryBuilder; use ruff_db::system::{ - DirectoryEntry, GlobError, MemoryFileSystem, Metadata, PatternError, System, SystemPath, - SystemPathBuf, SystemVirtualPath, + CaseSensitivity, DirectoryEntry, GlobError, MemoryFileSystem, Metadata, PatternError, System, + SystemPath, SystemPathBuf, SystemVirtualPath, }; use ruff_notebook::Notebook; @@ -260,6 +260,14 @@ impl System for WasmSystem { Err(ruff_notebook::NotebookError::Io(not_found())) } + fn path_exists_case_sensitive(&self, path: &SystemPath, _prefix: &SystemPath) -> bool { + self.path_exists(path) + } + + fn case_sensitivity(&self) -> CaseSensitivity { + CaseSensitivity::CaseSensitive + } + fn current_directory(&self) -> &SystemPath { self.fs.current_directory() } diff --git a/crates/ruff_db/src/source.rs b/crates/ruff_db/src/source.rs index a50cf5ed62..6eb1f5e708 100644 --- a/crates/ruff_db/src/source.rs +++ b/crates/ruff_db/src/source.rs @@ -159,7 +159,7 @@ pub enum SourceTextError { /// Computes the [`LineIndex`] for `file`. #[salsa::tracked] pub fn line_index(db: &dyn Db, file: File) -> LineIndex { - let _span = tracing::trace_span!("line_index", file = ?file).entered(); + let _span = tracing::trace_span!("line_index", file = ?file.path(db)).entered(); let source = source_text(db, file); diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 6d9ed9c42b..2139975733 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -9,7 +9,7 @@ pub use os::OsSystem; use ruff_notebook::{Notebook, NotebookError}; use std::error::Error; -use std::fmt::Debug; +use std::fmt::{Debug, Formatter}; use std::path::{Path, PathBuf}; use std::{fmt, io}; pub use test::{DbWithTestSystem, DbWithWritableSystem, InMemorySystem, TestSystem}; @@ -89,6 +89,20 @@ pub trait System: Debug { self.path_metadata(path).is_ok() } + /// Returns `true` if `path` exists on disk using the exact casing as specified in `path` for the parts after `prefix`. + /// + /// This is the same as [`Self::path_exists`] on case-sensitive systems. + /// + /// ## The use of prefix + /// + /// Prefix is only intended as an optimization for systems that can't efficiently check + /// if an entire path exists with the exact casing as specified in `path`. However, + /// implementations are allowed to check the casing of the entire path if they can do so efficiently. + fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool; + + /// Returns the [`CaseSensitivity`] of the system's file system. + fn case_sensitivity(&self) -> CaseSensitivity; + /// Returns `true` if `path` exists and is a directory. fn is_directory(&self, path: &SystemPath) -> bool { self.path_metadata(path) @@ -161,6 +175,39 @@ pub trait System: Debug { fn as_any_mut(&mut self) -> &mut dyn std::any::Any; } +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +pub enum CaseSensitivity { + /// The case sensitivity of the file system is unknown. + /// + /// The file system is either case-sensitive or case-insensitive. A caller + /// should not assume either case. + #[default] + Unknown, + + /// The file system is case-sensitive. + CaseSensitive, + + /// The file system is case-insensitive. + CaseInsensitive, +} + +impl CaseSensitivity { + /// Returns `true` if the file system is known to be case-sensitive. + pub const fn is_case_sensitive(self) -> bool { + matches!(self, Self::CaseSensitive) + } +} + +impl fmt::Display for CaseSensitivity { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + CaseSensitivity::Unknown => f.write_str("unknown"), + CaseSensitivity::CaseSensitive => f.write_str("case-sensitive"), + CaseSensitivity::CaseInsensitive => f.write_str("case-insensitive"), + } + } +} + /// System trait for non-readonly systems. pub trait WritableSystem: System { /// Writes the given content to the file at the given path. diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 95a5fe9bbc..91f97a63aa 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -1,13 +1,13 @@ +use filetime::FileTime; +use ruff_notebook::{Notebook, NotebookError}; +use rustc_hash::FxHashSet; +use std::panic::RefUnwindSafe; use std::sync::Arc; use std::{any::Any, path::PathBuf}; -use filetime::FileTime; - -use ruff_notebook::{Notebook, NotebookError}; - use crate::system::{ - DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, System, SystemPath, - SystemPathBuf, SystemVirtualPath, WritableSystem, + CaseSensitivity, DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, System, + SystemPath, SystemPathBuf, SystemVirtualPath, WritableSystem, }; use super::walk_directory::{ @@ -16,7 +16,7 @@ use super::walk_directory::{ }; /// A system implementation that uses the OS file system. -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone)] pub struct OsSystem { inner: Arc, } @@ -25,6 +25,10 @@ pub struct OsSystem { struct OsSystemInner { cwd: SystemPathBuf, + real_case_cache: CaseSensitivePathsCache, + + case_sensitivity: CaseSensitivity, + /// Overrides the user's configuration directory for testing. /// This is an `Option>` to allow setting an override of `None`. #[cfg(feature = "testing")] @@ -36,11 +40,20 @@ impl OsSystem { let cwd = cwd.as_ref(); assert!(cwd.as_utf8_path().is_absolute()); + let case_sensitivity = detect_case_sensitivity(cwd); + + tracing::debug!( + "Architecture: {}, OS: {}, case-sensitive: {case_sensitivity}", + std::env::consts::ARCH, + std::env::consts::OS, + ); + Self { // Spreading `..Default` because it isn't possible to feature gate the initializer of a single field. #[allow(clippy::needless_update)] inner: Arc::new(OsSystemInner { cwd: cwd.to_path_buf(), + case_sensitivity, ..Default::default() }), } @@ -102,6 +115,19 @@ impl System for OsSystem { path.as_std_path().exists() } + fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool { + if self.case_sensitivity().is_case_sensitive() { + self.path_exists(path) + } else { + self.path_exists_case_sensitive_fast(path) + .unwrap_or_else(|| self.path_exists_case_sensitive_slow(path, prefix)) + } + } + + fn case_sensitivity(&self) -> CaseSensitivity { + self.inner.case_sensitivity + } + fn current_directory(&self) -> &SystemPath { &self.inner.cwd } @@ -191,6 +217,91 @@ impl System for OsSystem { } } +impl OsSystem { + /// Path sensitive testing if a path exists by canonicalization the path and comparing it with `path`. + /// + /// This is faster than the slow path, because it requires a single system call for each path + /// instead of at least one system call for each component between `path` and `prefix`. + /// + /// However, using `canonicalize` to resolve the path's casing doesn't work in two cases: + /// * if `path` is a symlink because `canonicalize` then returns the symlink's target and not the symlink's source path. + /// * on Windows: If `path` is a mapped network drive because `canonicalize` then returns the UNC path + /// (e.g. `Z:\` is mapped to `\\server\share` and `canonicalize` then returns `\\?\UNC\server\share`). + /// + /// Symlinks and mapped network drives should be rare enough that this fast path is worth trying first, + /// even if it comes at a cost for those rare use cases. + fn path_exists_case_sensitive_fast(&self, path: &SystemPath) -> Option { + // This is a more forgiving version of `dunce::simplified` that removes all `\\?\` prefixes on Windows. + // We use this more forgiving version because we don't intend on using either path for anything other than comparison + // and the prefix is only relevant when passing the path to other programs and its longer than 200 something + // characters. + fn simplify_ignore_verbatim(path: &SystemPath) -> &SystemPath { + if cfg!(windows) { + if path.as_utf8_path().as_str().starts_with(r"\\?\") { + SystemPath::new(&path.as_utf8_path().as_str()[r"\\?\".len()..]) + } else { + path + } + } else { + path + } + } + + let simplified = simplify_ignore_verbatim(path); + + let Ok(canonicalized) = simplified.as_std_path().canonicalize() else { + // The path doesn't exist or can't be accessed. The path doesn't exist. + return Some(false); + }; + + let Ok(canonicalized) = SystemPathBuf::from_path_buf(canonicalized) else { + // The original path is valid UTF8 but the canonicalized path isn't. This definitely suggests + // that a symlink is involved. Fall back to the slow path. + tracing::debug!("Falling back to the slow case-sensitive path existence check because the canonicalized path of `{simplified}` is not valid UTF-8"); + return None; + }; + + let simplified_canonicalized = simplify_ignore_verbatim(&canonicalized); + + // Test if the paths differ by anything other than casing. If so, that suggests that + // `path` pointed to a symlink (or some other none reversible path normalization happened). + // In this case, fall back to the slow path. + if simplified_canonicalized.as_str().to_lowercase() != simplified.as_str().to_lowercase() { + tracing::debug!("Falling back to the slow case-sensitive path existence check for `{simplified}` because the canonicalized path `{simplified_canonicalized}` differs not only by casing"); + return None; + } + + // If there are no symlinks involved, then `path` exists only if it is the same as the canonicalized path. + Some(simplified_canonicalized == simplified) + } + + fn path_exists_case_sensitive_slow(&self, path: &SystemPath, prefix: &SystemPath) -> bool { + // Iterate over the sub-paths up to prefix and check if they match the casing as on disk. + for ancestor in path.ancestors() { + if ancestor == prefix { + break; + } + + match self.inner.real_case_cache.has_name_case(ancestor) { + Ok(true) => { + // Component has correct casing, continue with next component + } + Ok(false) => { + // Component has incorrect casing + return false; + } + Err(_) => { + // Directory doesn't exist or can't be accessed. We can assume that the file with + // the given casing doesn't exist. + return false; + } + } + } + + true + } +} + impl WritableSystem for OsSystem { fn write_file(&self, path: &SystemPath, content: &str) -> Result<()> { std::fs::write(path.as_std_path(), content) @@ -201,6 +312,93 @@ impl WritableSystem for OsSystem { } } +impl Default for OsSystem { + fn default() -> Self { + Self::new( + SystemPathBuf::from_path_buf(std::env::current_dir().unwrap_or_default()) + .unwrap_or_default(), + ) + } +} + +#[derive(Debug, Default)] +struct CaseSensitivePathsCache { + by_lower_case: dashmap::DashMap, +} + +impl CaseSensitivePathsCache { + /// Test if `path`'s file name uses the exact same casing as the file on disk. + /// + /// Returns `false` if the file doesn't exist. + /// + /// Components other than the file portion are ignored. + fn has_name_case(&self, path: &SystemPath) -> Result { + let Some(parent) = path.parent() else { + // The root path is always considered to exist. + return Ok(true); + }; + + let Some(file_name) = path.file_name() else { + // We can only get here for paths ending in `..` or the root path. Root paths are handled above. + // Return `true` for paths ending in `..` because `..` is the same regardless of casing. + return Ok(true); + }; + + let lower_case_path = SystemPathBuf::from(parent.as_str().to_lowercase()); + let last_modification_time = + FileTime::from_last_modification_time(&parent.as_std_path().metadata()?); + + let entry = self.by_lower_case.entry(lower_case_path); + + if let dashmap::Entry::Occupied(entry) = &entry { + // Only do a cached lookup if the directory hasn't changed. + if entry.get().last_modification_time == last_modification_time { + tracing::trace!("Use cached case-sensitive entry for directory `{}`", parent); + return Ok(entry.get().names.contains(file_name)); + } + } + + tracing::trace!( + "Reading directory `{}` for its case-sensitive filenames", + parent + ); + let start = std::time::Instant::now(); + let mut names = FxHashSet::default(); + + for entry in parent.as_std_path().read_dir()? { + let Ok(entry) = entry else { + continue; + }; + + let Ok(name) = entry.file_name().into_string() else { + continue; + }; + + names.insert(name.into_boxed_str()); + } + + let directory = entry.insert(ListedDirectory { + last_modification_time, + names, + }); + + tracing::debug!( + "Caching the case-sensitive paths for directory `{parent}` took {:?}", + start.elapsed() + ); + + Ok(directory.names.contains(file_name)) + } +} + +impl RefUnwindSafe for CaseSensitivePathsCache {} + +#[derive(Debug, Eq, PartialEq)] +struct ListedDirectory { + last_modification_time: FileTime, + names: FxHashSet>, +} + #[derive(Debug)] struct OsDirectoryWalker; @@ -426,6 +624,45 @@ pub(super) mod testing { } } +#[cfg(not(unix))] +fn detect_case_sensitivity(_path: &SystemPath) -> CaseSensitivity { + // 99% of windows systems aren't case sensitive Don't bother checking. + CaseSensitivity::Unknown +} + +#[cfg(unix)] +fn detect_case_sensitivity(path: &SystemPath) -> CaseSensitivity { + use std::os::unix::fs::MetadataExt; + + let Ok(original_case_metadata) = path.as_std_path().metadata() else { + return CaseSensitivity::Unknown; + }; + + let upper_case = SystemPathBuf::from(path.as_str().to_uppercase()); + if &*upper_case == path { + return CaseSensitivity::Unknown; + } + + match upper_case.as_std_path().metadata() { + Ok(uppercase_meta) => { + // The file system is case insensitive if the upper case and mixed case paths have the same inode. + if uppercase_meta.ino() == original_case_metadata.ino() { + CaseSensitivity::CaseInsensitive + } else { + CaseSensitivity::CaseSensitive + } + } + // In the error case, the file system is case sensitive if the file in all upper case doesn't exist. + Err(error) => { + if error.kind() == std::io::ErrorKind::NotFound { + CaseSensitivity::CaseSensitive + } else { + CaseSensitivity::Unknown + } + } + } +} + #[cfg(test)] mod tests { use tempfile::TempDir; diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index c171420f1d..bb57354664 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -5,8 +5,8 @@ use std::sync::{Arc, Mutex}; use crate::files::File; use crate::system::{ - DirectoryEntry, GlobError, MemoryFileSystem, Metadata, Result, System, SystemPath, - SystemPathBuf, SystemVirtualPath, + CaseSensitivity, DirectoryEntry, GlobError, MemoryFileSystem, Metadata, Result, System, + SystemPath, SystemPathBuf, SystemVirtualPath, }; use crate::Db; @@ -130,6 +130,14 @@ impl System for TestSystem { fn as_any_mut(&mut self) -> &mut dyn std::any::Any { self } + + fn path_exists_case_sensitive(&self, path: &SystemPath, prefix: &SystemPath) -> bool { + self.system().path_exists_case_sensitive(path, prefix) + } + + fn case_sensitivity(&self) -> CaseSensitivity { + self.system().case_sensitivity() + } } impl Default for TestSystem { @@ -349,6 +357,16 @@ impl System for InMemorySystem { fn as_any_mut(&mut self) -> &mut dyn std::any::Any { self } + + #[inline] + fn path_exists_case_sensitive(&self, path: &SystemPath, _prefix: &SystemPath) -> bool { + // The memory file system is case-sensitive. + self.path_exists(path) + } + + fn case_sensitivity(&self) -> CaseSensitivity { + CaseSensitivity::CaseSensitive + } } impl WritableSystem for InMemorySystem {