[red-knot] Case sensitive module resolver (#16521)

## Summary

This PR implements the first part of
https://github.com/astral-sh/ruff/discussions/16440. It ensures that Red
Knot's module resolver is case sensitive on all systems.

This PR combines a few approaches:

1. It uses `canonicalize` on non-case-sensitive systems to get the real
casing of a path. This works for as long as no symlinks or mapped
network drives (the windows `E:\` is mapped to `\\server\share` thingy).
This is the same as what Pyright does
2. If 1. fails, fall back to recursively list the parent directory and
test if the path's file name matches the casing exactly as listed in by
list dir. This is the same approach as CPython takes in its module
resolver. The main downside is that it requires more syscalls because,
unlike CPython, we Red Knot needs to invalidate its caches if a file
name gets renamed (CPython assumes that the folders are immutable).

It's worth noting that the file watching test that I added that renames
`lib.py` to `Lib.py` currently doesn't pass on case-insensitive systems.
Making it pass requires some more involved changes to `Files`. I plan to
work on this next. There's the argument that landing this PR on its own
isn't worth it without this issue being addressed. I think it's still a
good step in the right direction even when some of the details on how
and where the path case sensitive comparison is implemented.

## Test plan

I added multiple integration tests (including a failing one). I tested
that the `case-sensitivity` detection works as expected on Windows,
MacOS and Linux and that the fast-paths are taken accordingly.
This commit is contained in:
Micha Reiser 2025-03-14 20:16:44 +01:00 committed by GitHub
parent a128ca761f
commit a467e7c8d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 543 additions and 27 deletions

View file

@ -604,14 +604,29 @@ fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(SearchPath, File, Mod
/// resolving modules.
fn resolve_file_module(module: &ModulePath, resolver_state: &ResolverContext) -> Option<File> {
// Stubs have precedence over source files
module
let file = module
.with_pyi_extension()
.to_file(resolver_state)
.or_else(|| {
module
.with_py_extension()
.and_then(|path| path.to_file(resolver_state))
})
})?;
// For system files, test if the path has the correct casing.
// We can skip this step for vendored files or virtual files because
// those file systems are case sensitive (we wouldn't get to this point).
if let Some(path) = file.path(resolver_state.db).as_system_path() {
let system = resolver_state.db.system();
if !system.case_sensitivity().is_case_sensitive()
&& !system
.path_exists_case_sensitive(path, module.search_path().as_system_path().unwrap())
{
return None;
}
}
Some(file)
}
fn resolve_package<'a, 'db, I>(
@ -1842,4 +1857,72 @@ not_a_directory
let a_module = resolve_module(&db, &a_module_name).unwrap();
assert_eq!(a_module.file().path(&db), &system_site_packages_location);
}
#[test]
#[cfg(unix)]
fn case_sensitive_resolution_with_symlinked_directory() -> anyhow::Result<()> {
use anyhow::Context;
use ruff_db::system::OsSystem;
let temp_dir = tempfile::TempDir::new()?;
let root = SystemPathBuf::from_path_buf(
temp_dir
.path()
.canonicalize()
.context("Failed to canonicalized path")?,
)
.expect("UTF8 path for temp dir");
let mut db = TestDb::new();
let src = root.join("src");
let a_package_target = root.join("a-package");
let a_src = src.join("a");
db.use_system(OsSystem::new(&root));
db.write_file(
a_package_target.join("__init__.py"),
"class Foo: x: int = 4",
)
.context("Failed to write `a-package/__init__.py`")?;
db.write_file(src.join("main.py"), "print('Hy')")
.context("Failed to write `main.py`")?;
// The symlink triggers the slow-path in the `OsSystem`'s `exists_path_case_sensitive`
// code because canonicalizing the path for `a/__init__.py` results in `a-package/__init__.py`
std::os::unix::fs::symlink(a_package_target.as_std_path(), a_src.as_std_path())
.context("Failed to symlink `src/a` to `a-package`")?;
Program::from_settings(
&db,
ProgramSettings {
python_version: PythonVersion::default(),
python_platform: PythonPlatform::default(),
search_paths: SearchPathSettings {
extra_paths: vec![],
src_roots: vec![src],
custom_typeshed: None,
python_path: PythonPath::KnownSitePackages(vec![]),
},
},
)
.expect("Valid program settings");
// Now try to resolve the module `A` (note the capital `A` instead of `a`).
let a_module_name = ModuleName::new_static("A").unwrap();
assert_eq!(resolve_module(&db, &a_module_name), None);
// Now lookup the same module using the lowercase `a` and it should resolve to the file in the system site-packages
let a_module_name = ModuleName::new_static("a").unwrap();
let a_module = resolve_module(&db, &a_module_name).expect("a.py to resolve");
assert!(a_module
.file()
.path(&db)
.as_str()
.ends_with("src/a/__init__.py"),);
Ok(())
}
}