[red-knot] Move module-resolution logic to its own crate (#11964)

This commit is contained in:
Alex Waygood 2024-06-21 14:25:44 +01:00 committed by GitHub
parent 27ebff36ec
commit 736a4ead14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 257 additions and 56 deletions

View file

@ -11,10 +11,10 @@ repository = { workspace = true }
license = { workspace = true }
[dependencies]
red_knot_module_resolver = { workspace = true }
ruff_db = { workspace = true }
ruff_index = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_python_stdlib = { workspace = true }
ruff_text_size = { workspace = true }
bitflags = { workspace = true }
@ -29,7 +29,6 @@ hashbrown = { workspace = true }
[dev-dependencies]
anyhow = { workspace = true }
ruff_python_parser = { workspace = true }
tempfile = { workspace = true }
[lints]
workspace = true

View file

@ -2,10 +2,7 @@ use salsa::DbWithJar;
use ruff_db::{Db as SourceDb, Upcast};
use crate::module::resolver::{
file_to_module, internal::ModuleNameIngredient, internal::ModuleResolverSearchPaths,
resolve_module_query,
};
use red_knot_module_resolver::Db as ResolverDb;
use crate::semantic_index::symbol::{public_symbols_map, scopes_map, PublicSymbolId, ScopeId};
use crate::semantic_index::{root_scope, semantic_index, symbol_table};
@ -13,13 +10,9 @@ use crate::types::{infer_types, public_symbol_ty};
#[salsa::jar(db=Db)]
pub struct Jar(
ModuleNameIngredient<'_>,
ModuleResolverSearchPaths,
ScopeId<'_>,
PublicSymbolId<'_>,
symbol_table,
resolve_module_query,
file_to_module,
scopes_map,
root_scope,
semantic_index,
@ -29,7 +22,10 @@ pub struct Jar(
);
/// Database giving access to semantic information about a Python program.
pub trait Db: SourceDb + DbWithJar<Jar> + Upcast<dyn SourceDb> {}
pub trait Db:
SourceDb + ResolverDb + DbWithJar<Jar> + Upcast<dyn SourceDb> + Upcast<dyn ResolverDb>
{
}
#[cfg(test)]
pub(crate) mod tests {
@ -42,13 +38,14 @@ pub(crate) mod tests {
use salsa::storage::HasIngredientsFor;
use salsa::DebugWithDb;
use red_knot_module_resolver::{Db as ResolverDb, Jar as ResolverJar};
use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem};
use ruff_db::vfs::Vfs;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use super::{Db, Jar};
#[salsa::db(Jar, SourceJar)]
#[salsa::db(Jar, ResolverJar, SourceJar)]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
@ -78,15 +75,6 @@ pub(crate) mod tests {
}
}
/// Uses the real file system instead of the memory file system.
///
/// This useful for testing advanced file system features like permissions, symlinks, etc.
///
/// Note that any files written to the memory file system won't be copied over.
pub(crate) fn with_os_file_system(&mut self) {
self.file_system = TestFileSystem::Os(OsFileSystem);
}
#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
@ -131,6 +119,13 @@ pub(crate) mod tests {
}
}
impl Upcast<dyn ResolverDb> for TestDb {
fn upcast(&self) -> &(dyn ResolverDb + 'static) {
self
}
}
impl red_knot_module_resolver::Db for TestDb {}
impl Db for TestDb {}
impl salsa::Database for TestDb {
@ -157,6 +152,7 @@ pub(crate) mod tests {
enum TestFileSystem {
Memory(MemoryFileSystem),
#[allow(dead_code)]
Os(OsFileSystem),
}

View file

@ -1,6 +1,5 @@
pub mod ast_node_ref;
mod db;
pub mod module;
pub mod name;
mod node_key;
pub mod semantic_index;

View file

@ -1,332 +0,0 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::FileSystemPath;
use ruff_db::vfs::{VfsFile, VfsPath};
use ruff_python_stdlib::identifiers::is_identifier;
use crate::Db;
pub mod resolver;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
pub fn new(name: &str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new(name))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new_static(name))
}
fn new_from_smol(name: smol_str::SmolStr) -> Option<Self> {
if name.is_empty() {
return None;
}
if name.split('.').all(is_identifier) {
Some(Self(name))
} else {
None
}
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(smol_str::SmolStr::new(parent)))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
fn from_relative_path(path: &FileSystemPath) -> Option<Self> {
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
path.parent()?
} else {
path
};
let name = if let Some(parent) = path.parent() {
let mut name = String::with_capacity(path.as_str().len());
for component in parent.components() {
name.push_str(component.as_os_str().to_str()?);
name.push('.');
}
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
name.push_str(path.file_stem().unwrap());
smol_str::SmolStr::from(name)
} else {
smol_str::SmolStr::new(path.file_stem()?)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> VfsFile {
self.inner.file
}
/// The search path from which the module was resolved.
pub fn search_path(&self) -> &ModuleSearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
impl salsa::DebugWithDb<dyn Db> for Module {
fn fmt(&self, f: &mut Formatter<'_>, db: &dyn Db) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file().debug(db.upcast()))
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: ModuleSearchPath,
file: VfsFile,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}
/// A search path in which to search modules.
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
///
/// Cloning a search path is cheap because it's an `Arc`.
#[derive(Clone, PartialEq, Eq)]
pub struct ModuleSearchPath {
inner: Arc<ModuleSearchPathInner>,
}
impl ModuleSearchPath {
pub fn new<P>(path: P, kind: ModuleSearchPathKind) -> Self
where
P: Into<VfsPath>,
{
Self {
inner: Arc::new(ModuleSearchPathInner {
path: path.into(),
kind,
}),
}
}
/// Determine whether this is a first-party, third-party or standard-library search path
pub fn kind(&self) -> ModuleSearchPathKind {
self.inner.kind
}
/// Return the location of the search path on the file system
pub fn path(&self) -> &VfsPath {
&self.inner.path
}
}
impl std::fmt::Debug for ModuleSearchPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ModuleSearchPath")
.field("path", &self.inner.path)
.field("kind", &self.kind())
.finish()
}
}
#[derive(Eq, PartialEq)]
struct ModuleSearchPathInner {
path: VfsPath,
kind: ModuleSearchPathKind,
}
/// Enumeration of the different kinds of search paths type checkers are expected to support.
///
/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the
/// priority that we want to give these modules when resolving them.
/// This is roughly [the order given in the typing spec], but typeshed's stubs
/// for the standard library are moved higher up to match Python's semantics at runtime.
///
/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleSearchPathKind {
/// "Extra" paths provided by the user in a config file, env var or CLI flag.
/// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting
Extra,
/// Files in the project we're directly being invoked on
FirstParty,
/// The `stdlib` directory of typeshed (either vendored or custom)
StandardLibrary,
/// Stubs or runtime modules installed in site-packages
SitePackagesThirdParty,
/// Vendored third-party stubs from typeshed
VendoredThirdParty,
}

View file

@ -1,947 +0,0 @@
use salsa::DebugWithDb;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath};
use crate::module::resolver::internal::ModuleResolverSearchPaths;
use crate::module::{
Module, ModuleInner, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind,
};
use crate::Db;
const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib";
/// Configures the module search paths for the module resolver.
///
/// Must be called before calling any other module resolution functions.
pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) {
// There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other
// thread can mutate the `Db` while we're in this call, so using `try_get` to test if
// the settings have already been set is safe.
if let Some(existing) = ModuleResolverSearchPaths::try_get(db) {
existing
.set_search_paths(db)
.to(config.into_ordered_search_paths());
} else {
ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths());
}
}
/// Resolves a module name to a module.
pub fn resolve_module(db: &dyn Db, module_name: ModuleName) -> Option<Module> {
let interned_name = internal::ModuleNameIngredient::new(db, module_name);
resolve_module_query(db, interned_name)
}
/// Salsa query that resolves an interned [`ModuleNameIngredient`] to a module.
///
/// This query should not be called directly. Instead, use [`resolve_module`]. It only exists
/// because Salsa requires the module name to be an ingredient.
#[salsa::tracked]
pub(crate) fn resolve_module_query<'db>(
db: &'db dyn Db,
module_name: internal::ModuleNameIngredient<'db>,
) -> Option<Module> {
let _ = tracing::trace_span!("resolve_module", module_name = ?module_name.debug(db)).enter();
let name = module_name.name(db);
let (search_path, module_file, kind) = resolve_name(db, name)?;
let module = Module {
inner: Arc::new(ModuleInner {
name: name.clone(),
kind,
search_path,
file: module_file,
}),
};
Some(module)
}
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option<Module> {
// It's not entirely clear on first sight why this method calls `file_to_module` instead of
// it being the other way round, considering that the first thing that `file_to_module` does
// is to retrieve the file's path.
//
// The reason is that `file_to_module` is a tracked Salsa query and salsa queries require that
// all arguments are Salsa ingredients (something stored in Salsa). `Path`s aren't salsa ingredients but
// `VfsFile` is. So what we do here is to retrieve the `path`'s `VfsFile` so that we can make
// use of Salsa's caching and invalidation.
let file = vfs_path_to_file(db.upcast(), path)?;
file_to_module(db, file)
}
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[salsa::tracked]
pub(crate) fn file_to_module(db: &dyn Db, file: VfsFile) -> Option<Module> {
let _ = tracing::trace_span!("file_to_module", file = ?file.debug(db.upcast())).enter();
let path = file.path(db.upcast());
let search_paths = module_search_paths(db);
let relative_path = search_paths
.iter()
.find_map(|root| match (root.path(), path) {
(VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => {
let relative_path = path.strip_prefix(root_path).ok()?;
Some(relative_path)
}
(VfsPath::Vendored(_), VfsPath::Vendored(_)) => {
todo!("Add support for vendored modules")
}
(VfsPath::Vendored(_), VfsPath::FileSystem(_))
| (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None,
})?;
let module_name = ModuleName::from_relative_path(relative_path)?;
// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same name in different
// root paths, but that the module corresponding to `path` is in a lower priority search path,
// in which case we ignore it.
let module = resolve_module(db, module_name)?;
if file == module.file() {
Some(module)
} else {
// This path is for a module with the same name but with a different precedence. For example:
// ```
// src/foo.py
// src/foo/__init__.py
// ```
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
None
}
}
/// Configures the [`ModuleSearchPath`]s that are used to resolve modules.
#[derive(Eq, PartialEq, Debug)]
pub struct ModuleResolutionSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<FileSystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub workspace_root: FileSystemPathBuf,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Option<FileSystemPathBuf>,
/// Optional path to standard-library typeshed stubs.
/// Currently this has to be a directory that exists on disk.
///
/// (TODO: fall back to vendored stubs if no custom directory is provided.)
pub custom_typeshed: Option<FileSystemPathBuf>,
}
impl ModuleResolutionSettings {
/// Implementation of PEP 561's module resolution order
/// (with some small, deliberate, differences)
fn into_ordered_search_paths(self) -> OrderedSearchPaths {
let ModuleResolutionSettings {
extra_paths,
workspace_root,
site_packages,
custom_typeshed,
} = self;
let mut paths: Vec<_> = extra_paths
.into_iter()
.map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra))
.collect();
paths.push(ModuleSearchPath::new(
workspace_root,
ModuleSearchPathKind::FirstParty,
));
// TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user
if let Some(custom_typeshed) = custom_typeshed {
paths.push(ModuleSearchPath::new(
custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY),
ModuleSearchPathKind::StandardLibrary,
));
}
// TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step
if let Some(site_packages) = site_packages {
paths.push(ModuleSearchPath::new(
site_packages,
ModuleSearchPathKind::SitePackagesThirdParty,
));
}
OrderedSearchPaths(paths)
}
}
/// A resolved module resolution order, implementing PEP 561
/// (with some small, deliberate differences)
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub(crate) struct OrderedSearchPaths(Vec<ModuleSearchPath>);
impl Deref for OrderedSearchPaths {
type Target = [ModuleSearchPath];
fn deref(&self) -> &Self::Target {
&self.0
}
}
// The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers
// `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it.
// Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too
// TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct.
#[allow(unreachable_pub, clippy::used_underscore_binding)]
pub(crate) mod internal {
use crate::module::resolver::OrderedSearchPaths;
use crate::module::ModuleName;
#[salsa::input(singleton)]
pub(crate) struct ModuleResolverSearchPaths {
#[return_ref]
pub(super) search_paths: OrderedSearchPaths,
}
/// A thin wrapper around `ModuleName` to make it a Salsa ingredient.
///
/// This is needed because Salsa requires that all query arguments are salsa ingredients.
#[salsa::interned]
pub(crate) struct ModuleNameIngredient<'db> {
#[return_ref]
pub(super) name: ModuleName,
}
}
fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] {
ModuleResolverSearchPaths::get(db).search_paths(db)
}
/// Given a module name and a list of search paths in which to lookup modules,
/// attempt to resolve the module name
fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> {
let search_paths = module_search_paths(db);
for search_path in search_paths {
let mut components = name.components();
let module_name = components.next_back()?;
let VfsPath::FileSystem(fs_search_path) = search_path.path() else {
todo!("Vendored search paths are not yet supported");
};
match resolve_package(db.file_system(), fs_search_path, components) {
Ok(resolved_package) => {
let mut package_path = resolved_package.path;
package_path.push(module_name);
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
let kind = if db.file_system().is_directory(&package_path) {
package_path.push("__init__");
ModuleKind::Package
} else {
ModuleKind::Module
};
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
let stub = package_path.with_extension("pyi");
if let Some(stub) = system_path_to_file(db.upcast(), &stub) {
return Some((search_path.clone(), stub, kind));
}
let module = package_path.with_extension("py");
if let Some(module) = system_path_to_file(db.upcast(), &module) {
return Some((search_path.clone(), module, kind));
}
// For regular packages, don't search the next search path. All files of that
// package must be in the same location
if resolved_package.kind.is_regular_package() {
return None;
}
}
Err(parent_kind) => {
if parent_kind.is_regular_package() {
// For regular packages, don't search the next search path.
return None;
}
}
}
}
None
}
fn resolve_package<'a, I>(
fs: &dyn FileSystem,
module_search_path: &FileSystemPath,
components: I,
) -> Result<ResolvedPackage, PackageKind>
where
I: Iterator<Item = &'a str>,
{
let mut package_path = module_search_path.to_path_buf();
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
// Namespace packages are special because they can be spread across multiple search paths.
// https://peps.python.org/pep-0420/
let mut in_namespace_package = false;
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
let mut in_sub_package = false;
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
for folder in components {
package_path.push(folder);
let has_init_py = fs.is_file(&package_path.join("__init__.py"))
|| fs.is_file(&package_path.join("__init__.pyi"));
if has_init_py {
in_namespace_package = false;
} else if fs.is_directory(&package_path) {
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
in_namespace_package = true;
} else if in_namespace_package {
// Package not found but it is part of a namespace package.
return Err(PackageKind::Namespace);
} else if in_sub_package {
// A regular sub package wasn't found.
return Err(PackageKind::Regular);
} else {
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
return Err(PackageKind::Root);
}
in_sub_package = true;
}
let kind = if in_namespace_package {
PackageKind::Namespace
} else if in_sub_package {
PackageKind::Regular
} else {
PackageKind::Root
};
Ok(ResolvedPackage {
kind,
path: package_path,
})
}
#[derive(Debug)]
struct ResolvedPackage {
path: FileSystemPathBuf,
kind: PackageKind,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum PackageKind {
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
Root,
/// A regular sub-package where the parent contains an `__init__.py`.
///
/// For example, `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
Regular,
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
///
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
Namespace,
}
impl PackageKind {
const fn is_regular_package(self) -> bool {
matches!(self, PackageKind::Regular)
}
}
#[cfg(test)]
mod tests {
use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath};
use crate::db::tests::TestDb;
use crate::module::{ModuleKind, ModuleName};
use super::{
path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings,
TYPESHED_STDLIB_DIRECTORY,
};
struct TestCase {
db: TestDb,
src: FileSystemPathBuf,
custom_typeshed: FileSystemPathBuf,
site_packages: FileSystemPathBuf,
}
fn create_resolver() -> std::io::Result<TestCase> {
let mut db = TestDb::new();
let src = FileSystemPath::new("src").to_path_buf();
let site_packages = FileSystemPath::new("site_packages").to_path_buf();
let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf();
let fs = db.memory_file_system();
fs.create_directory_all(&src)?;
fs.create_directory_all(&site_packages)?;
fs.create_directory_all(&custom_typeshed)?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages.clone()),
custom_typeshed: Some(custom_typeshed.clone()),
};
set_module_resolution_settings(&mut db, settings);
Ok(TestCase {
db,
src,
custom_typeshed,
site_packages,
})
}
#[test]
fn first_party_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_path = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
assert_eq!(
Some(&foo_module),
resolve_module(&db, foo_module_name.clone()).as_ref()
);
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(ModuleKind::Module, foo_module.kind());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path))
);
Ok(())
}
#[test]
fn stdlib() -> anyhow::Result<()> {
let TestCase {
db,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let functools_path = stdlib_dir.join("functools.py");
db.memory_file_system()
.write_file(&functools_path, "def update_wrapper(): ...")?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&stdlib_dir, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(&functools_path.clone(), functools_module.file().path(&db));
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(functools_path))
);
Ok(())
}
#[test]
fn first_party_precedence_over_stdlib() -> anyhow::Result<()> {
let TestCase {
db,
src,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let stdlib_functools_path = stdlib_dir.join("functools.py");
let first_party_functools_path = src.join("functools.py");
db.memory_file_system().write_files([
(&stdlib_functools_path, "def update_wrapper(): ..."),
(&first_party_functools_path, "def update_wrapper(): ..."),
])?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&src, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(
&first_party_functools_path.clone(),
functools_module.file().path(&db)
);
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(first_party_functools_path))
);
Ok(())
}
// TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip
// is part of the red knot crate
// #[test]
// fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> {
// // The file path here is hardcoded in this crate's `build.rs` script.
// // Luckily this crate will fail to build if this file isn't available at build time.
// const TYPESHED_ZIP_BYTES: &[u8] =
// include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
// assert!(!TYPESHED_ZIP_BYTES.is_empty());
// let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?;
//
// let path_to_functools = Path::new("stdlib").join("functools.pyi");
// let mut functools_module_stub = typeshed_zip_archive
// .by_name(path_to_functools.to_str().unwrap())
// .unwrap();
// assert!(functools_module_stub.is_file());
//
// let mut functools_module_stub_source = String::new();
// functools_module_stub.read_to_string(&mut functools_module_stub_source)?;
//
// assert!(functools_module_stub_source.contains("def update_wrapper("));
// Ok(())
// }
#[test]
fn resolve_package() -> anyhow::Result<()> {
let TestCase { src, db, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_path = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(&foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path)).as_ref()
);
// Resolving by directory doesn't resolve to the init file.
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_dir)));
Ok(())
}
#[test]
fn package_priority_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_init = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_init, "print('Hello, world!')")?;
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_py, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_init, foo_module.file().path(&db));
assert_eq!(ModuleKind::Package, foo_module.kind());
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_init))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn typing_stub_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_stub = src.join("foo.pyi");
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_files([(&foo_stub, "x: int"), (&foo_py, "print('Hello, world!')")])?;
let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo.search_path().path());
assert_eq!(&foo_stub, foo.file().path(&db));
assert_eq!(
Some(foo),
path_to_module(&db, &VfsPath::FileSystem(foo_stub))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn sub_packages() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo = src.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz.py");
db.memory_file_system().write_files([
(&foo.join("__init__.py"), ""),
(&bar.join("__init__.py"), ""),
(&baz, "print('Hello, world!')"),
])?;
let baz_module =
resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap();
assert_eq!(&src, baz_module.search_path().path());
assert_eq!(&baz, baz_module.file().path(&db));
assert_eq!(
Some(baz_module),
path_to_module(&db, &VfsPath::FileSystem(baz))
);
Ok(())
}
#[test]
fn namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
// But uses `src` for `project1` and `site_packages2` for `project2`.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
let two_module =
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()).unwrap();
assert_eq!(
Some(two_module),
path_to_module(&db, &VfsPath::FileSystem(two))
);
Ok(())
}
#[test]
fn regular_package_in_namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&child1.join("__init__.py"), "print('Hello, world!')"),
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
assert_eq!(
None,
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap())
);
Ok(())
}
#[test]
fn module_search_path_priority() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
let foo_src = src.join("foo.py");
let foo_site_packages = site_packages.join("foo.py");
db.memory_file_system()
.write_files([(&foo_src, ""), (&foo_site_packages, "")])?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_src, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_src))
);
assert_eq!(
None,
path_to_module(&db, &VfsPath::FileSystem(foo_site_packages))
);
Ok(())
}
#[test]
#[cfg(target_family = "unix")]
fn symlink() -> anyhow::Result<()> {
let TestCase {
mut db,
src,
site_packages,
custom_typeshed,
} = create_resolver()?;
db.with_os_file_system();
let temp_dir = tempfile::tempdir()?;
let root = FileSystemPath::from_std_path(temp_dir.path()).unwrap();
let src = root.join(src);
let site_packages = root.join(site_packages);
let custom_typeshed = root.join(custom_typeshed);
let foo = src.join("foo.py");
let bar = src.join("bar.py");
std::fs::create_dir_all(src.as_std_path())?;
std::fs::create_dir_all(site_packages.as_std_path())?;
std::fs::create_dir_all(custom_typeshed.as_std_path())?;
std::fs::write(foo.as_std_path(), "")?;
std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages),
custom_typeshed: Some(custom_typeshed),
};
set_module_resolution_settings(&mut db, settings);
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
let bar_module = resolve_module(&db, ModuleName::new_static("bar").unwrap()).unwrap();
assert_ne!(foo_module, bar_module);
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo, foo_module.file().path(&db));
// `foo` and `bar` shouldn't resolve to the same file
assert_eq!(&src, bar_module.search_path().path());
assert_eq!(&bar, bar_module.file().path(&db));
assert_eq!(&foo, foo_module.file().path(&db));
assert_ne!(&foo_module, &bar_module);
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo))
);
assert_eq!(
Some(bar_module),
path_to_module(&db, &VfsPath::FileSystem(bar))
);
Ok(())
}
#[test]
fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let bar_path = src.join("bar.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&bar_path, "y = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
let bar = system_path_to_file(&db, &bar_path).expect("bar.py to exist");
db.clear_salsa_events();
// Delete `bar.py`
db.memory_file_system().remove_file(&bar_path)?;
bar.touch(&mut db);
// Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant
// for resolving `foo`.
let foo_module2 = resolve_module(&db, foo_module_name);
assert!(!db
.take_salsa_events()
.iter()
.any(|event| { matches!(event.kind, salsa::EventKind::WillExecute { .. }) }));
assert_eq!(Some(foo_module), foo_module2);
Ok(())
}
#[test]
fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_module_name = ModuleName::new_static("foo").unwrap();
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
// Now write the foo file
db.memory_file_system().write_file(&foo_path, "x = 1")?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_path.clone()));
let foo_file = system_path_to_file(&db, &foo_path).expect("foo.py to exist");
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(foo_file, foo_module.file());
Ok(())
}
#[test]
fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_init_path = src.join("foo/__init__.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&foo_init_path, "x = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).expect("foo module to exist");
assert_eq!(&foo_init_path, foo_module.file().path(&db));
// Delete `foo/__init__.py` and the `foo` folder. `foo` should now resolve to `foo.py`
db.memory_file_system().remove_file(&foo_init_path)?;
db.memory_file_system()
.remove_directory(foo_init_path.parent().unwrap())?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone()));
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(&foo_path, foo_module.file().path(&db));
Ok(())
}
}

View file

@ -513,9 +513,9 @@ mod tests {
use crate::db::tests::{
assert_will_not_run_function_query, assert_will_run_function_query, TestDb,
};
use crate::module::resolver::{set_module_resolution_settings, ModuleResolutionSettings};
use crate::semantic_index::root_scope;
use crate::types::{expression_ty, infer_types, public_symbol_ty_by_name, TypingContext};
use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings};
fn setup_db() -> TestDb {
let mut db = TestDb::new();

View file

@ -2,13 +2,13 @@ use std::sync::Arc;
use rustc_hash::FxHashMap;
use red_knot_module_resolver::resolve_module;
use red_knot_module_resolver::ModuleName;
use ruff_db::vfs::VfsFile;
use ruff_index::IndexVec;
use ruff_python_ast as ast;
use ruff_python_ast::{ExprContext, TypeParams};
use crate::module::resolver::resolve_module;
use crate::module::ModuleName;
use crate::name::Name;
use crate::semantic_index::ast_ids::{ScopeAstIdNode, ScopeExpressionId};
use crate::semantic_index::definition::{Definition, ImportDefinition, ImportFromDefinition};
@ -358,7 +358,7 @@ impl<'db> TypeInferenceBuilder<'db> {
} = alias;
let module_name = ModuleName::new(&name.id);
let module = module_name.and_then(|name| resolve_module(self.db, name));
let module = module_name.and_then(|name| resolve_module(self.db.upcast(), name));
let module_ty = module
.map(|module| self.typing_context().module_ty(module.file()))
.unwrap_or(Type::Unknown);
@ -384,7 +384,8 @@ impl<'db> TypeInferenceBuilder<'db> {
let import_id = import.scope_ast_id(self.db, self.file_id, self.file_scope_id);
let module_name = ModuleName::new(module.as_deref().expect("Support relative imports"));
let module = module_name.and_then(|module_name| resolve_module(self.db, module_name));
let module =
module_name.and_then(|module_name| resolve_module(self.db.upcast(), module_name));
let module_ty = module
.map(|module| self.typing_context().module_ty(module.file()))
.unwrap_or(Type::Unknown);
@ -694,9 +695,9 @@ mod tests {
use ruff_db::vfs::system_path_to_file;
use crate::db::tests::TestDb;
use crate::module::resolver::{set_module_resolution_settings, ModuleResolutionSettings};
use crate::name::Name;
use crate::types::{public_symbol_ty_by_name, Type, TypingContext};
use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings};
fn setup_db() -> TestDb {
let mut db = TestDb::new();