diff --git a/Cargo.lock b/Cargo.lock index ca7a28371e..1da9a1e543 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1891,6 +1891,7 @@ name = "red_knot_module_resolver" version = "0.0.0" dependencies = [ "anyhow", + "camino", "compact_str", "insta", "path-slash", diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 0a34e38dd2..85d26458c3 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -12,7 +12,9 @@ use tracing_tree::time::Uptime; use red_knot::program::{FileWatcherChange, Program}; use red_knot::watch::FileWatcher; use red_knot::Workspace; -use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings}; +use red_knot_module_resolver::{ + set_module_resolution_settings, RawModuleResolutionSettings, TargetVersion, +}; use ruff_db::file_system::{FileSystem, FileSystemPath, OsFileSystem}; use ruff_db::vfs::system_path_to_file; @@ -57,11 +59,12 @@ pub fn main() -> anyhow::Result<()> { set_module_resolution_settings( &mut program, - ModuleResolutionSettings { + RawModuleResolutionSettings { extra_paths: vec![], workspace_root: workspace_search_path, site_packages: None, custom_typeshed: None, + target_version: TargetVersion::Py38, }, ); diff --git a/crates/red_knot_module_resolver/Cargo.toml b/crates/red_knot_module_resolver/Cargo.toml index ec05ec525b..99e69f35cc 100644 --- a/crates/red_knot_module_resolver/Cargo.toml +++ b/crates/red_knot_module_resolver/Cargo.toml @@ -15,6 +15,7 @@ ruff_db = { workspace = true } ruff_python_stdlib = { workspace = true } compact_str = { workspace = true } +camino = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } tracing = { workspace = true } diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index c1d4e274ec..3d64ee76f4 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -2,28 +2,34 @@ use ruff_db::Upcast; use crate::resolver::{ file_to_module, - internal::{ModuleNameIngredient, ModuleResolverSearchPaths}, + internal::{ModuleNameIngredient, ModuleResolverSettings}, resolve_module_query, }; +use crate::typeshed::parse_typeshed_versions; #[salsa::jar(db=Db)] pub struct Jar( ModuleNameIngredient<'_>, - ModuleResolverSearchPaths, + ModuleResolverSettings, resolve_module_query, file_to_module, + parse_typeshed_versions, ); pub trait Db: salsa::DbWithJar + ruff_db::Db + Upcast {} +#[cfg(test)] pub(crate) mod tests { use std::sync; use salsa::DebugWithDb; - use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem}; + use ruff_db::file_system::{FileSystem, FileSystemPathBuf, MemoryFileSystem, OsFileSystem}; use ruff_db::vfs::Vfs; + use crate::resolver::{set_module_resolution_settings, RawModuleResolutionSettings}; + use crate::supported_py_version::TargetVersion; + use super::*; #[salsa::db(Jar, ruff_db::Jar)] @@ -35,7 +41,6 @@ pub(crate) mod tests { } impl TestDb { - #[allow(unused)] pub(crate) fn new() -> Self { Self { storage: salsa::Storage::default(), @@ -49,7 +54,6 @@ pub(crate) mod tests { /// /// ## Panics /// If this test db isn't using a memory file system. - #[allow(unused)] pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem { if let TestFileSystem::Memory(fs) = &self.file_system { fs @@ -63,7 +67,6 @@ pub(crate) mod tests { /// This useful for testing advanced file system features like permissions, symlinks, etc. /// /// Note that any files written to the memory file system won't be copied over. - #[allow(unused)] pub(crate) fn with_os_file_system(&mut self) { self.file_system = TestFileSystem::Os(OsFileSystem); } @@ -77,7 +80,6 @@ pub(crate) mod tests { /// /// ## Panics /// If there are any pending salsa snapshots. - #[allow(unused)] pub(crate) fn take_salsa_events(&mut self) -> Vec { let inner = sync::Arc::get_mut(&mut self.events).expect("no pending salsa snapshots"); @@ -89,7 +91,6 @@ pub(crate) mod tests { /// /// ## Panics /// If there are any pending salsa snapshots. - #[allow(unused)] pub(crate) fn clear_salsa_events(&mut self) { self.take_salsa_events(); } @@ -153,4 +154,111 @@ pub(crate) mod tests { } } } + + pub(crate) struct TestCaseBuilder { + db: TestDb, + src: FileSystemPathBuf, + custom_typeshed: FileSystemPathBuf, + site_packages: FileSystemPathBuf, + target_version: Option, + } + + impl TestCaseBuilder { + #[must_use] + pub(crate) fn with_target_version(mut self, target_version: TargetVersion) -> Self { + self.target_version = Some(target_version); + self + } + + pub(crate) fn build(self) -> TestCase { + let TestCaseBuilder { + mut db, + src, + custom_typeshed, + site_packages, + target_version, + } = self; + + let settings = RawModuleResolutionSettings { + target_version: target_version.unwrap_or_default(), + extra_paths: vec![], + workspace_root: src.clone(), + custom_typeshed: Some(custom_typeshed.clone()), + site_packages: Some(site_packages.clone()), + }; + + set_module_resolution_settings(&mut db, settings); + + TestCase { + db, + src, + custom_typeshed, + site_packages, + } + } + } + + pub(crate) struct TestCase { + pub(crate) db: TestDb, + pub(crate) src: FileSystemPathBuf, + pub(crate) custom_typeshed: FileSystemPathBuf, + pub(crate) site_packages: FileSystemPathBuf, + } + + pub(crate) fn create_resolver_builder() -> std::io::Result { + static VERSIONS_DATA: &str = "\ + asyncio: 3.8- # 'Regular' package on py38+ + asyncio.tasks: 3.9-3.11 + collections: 3.9- # 'Regular' package on py39+ + functools: 3.8- + importlib: 3.9- # Namespace package on py39+ + xml: 3.8-3.8 # Namespace package on py38 only + "; + + let db = TestDb::new(); + + let src = FileSystemPathBuf::from("src"); + let site_packages = FileSystemPathBuf::from("site_packages"); + let custom_typeshed = FileSystemPathBuf::from("typeshed"); + + let fs = db.memory_file_system(); + + fs.create_directory_all(&src)?; + fs.create_directory_all(&site_packages)?; + fs.create_directory_all(&custom_typeshed)?; + fs.write_file(custom_typeshed.join("stdlib/VERSIONS"), VERSIONS_DATA)?; + + // Regular package on py38+ + fs.create_directory_all(custom_typeshed.join("stdlib/asyncio"))?; + fs.touch(custom_typeshed.join("stdlib/asyncio/__init__.pyi"))?; + fs.write_file( + custom_typeshed.join("stdlib/asyncio/tasks.pyi"), + "class Task: ...", + )?; + + // Regular package on py39+ + fs.create_directory_all(custom_typeshed.join("stdlib/collections"))?; + fs.touch(custom_typeshed.join("stdlib/collections/__init__.pyi"))?; + + // Namespace package on py38 only + fs.create_directory_all(custom_typeshed.join("stdlib/xml"))?; + fs.touch(custom_typeshed.join("stdlib/xml/etree.pyi"))?; + + // Namespace package on py39+ + fs.create_directory_all(custom_typeshed.join("stdlib/importlib"))?; + fs.touch(custom_typeshed.join("stdlib/importlib/abc.pyi"))?; + + fs.write_file( + custom_typeshed.join("stdlib/functools.pyi"), + "def update_wrapper(): ...", + )?; + + Ok(TestCaseBuilder { + db, + src, + custom_typeshed, + site_packages, + target_version: None, + }) + } } diff --git a/crates/red_knot_module_resolver/src/lib.rs b/crates/red_knot_module_resolver/src/lib.rs index 72be73c55d..d6ec501ccb 100644 --- a/crates/red_knot_module_resolver/src/lib.rs +++ b/crates/red_knot_module_resolver/src/lib.rs @@ -1,9 +1,15 @@ mod db; mod module; +mod module_name; +mod path; mod resolver; +mod state; +mod supported_py_version; mod typeshed; pub use db::{Db, Jar}; -pub use module::{Module, ModuleKind, ModuleName}; -pub use resolver::{resolve_module, set_module_resolution_settings, ModuleResolutionSettings}; -pub use typeshed::versions::TypeshedVersions; +pub use module::{Module, ModuleKind}; +pub use module_name::ModuleName; +pub use resolver::{resolve_module, set_module_resolution_settings, RawModuleResolutionSettings}; +pub use supported_py_version::TargetVersion; +pub use typeshed::{TypeshedVersionsParseError, TypeshedVersionsParseErrorKind}; diff --git a/crates/red_knot_module_resolver/src/module.rs b/crates/red_knot_module_resolver/src/module.rs index 8657c4a196..bc2eb4358f 100644 --- a/crates/red_knot_module_resolver/src/module.rs +++ b/crates/red_knot_module_resolver/src/module.rs @@ -1,188 +1,11 @@ -use compact_str::ToCompactString; use std::fmt::Formatter; -use std::ops::Deref; use std::sync::Arc; -use ruff_db::file_system::FileSystemPath; -use ruff_db::vfs::{VfsFile, VfsPath}; -use ruff_python_stdlib::identifiers::is_identifier; +use ruff_db::vfs::VfsFile; -use crate::Db; - -/// A module name, e.g. `foo.bar`. -/// -/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`). -#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] -pub struct ModuleName(compact_str::CompactString); - -impl ModuleName { - /// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute - /// module name and `None` otherwise. - /// - /// The module name is invalid if: - /// - /// * The name is empty - /// * The name is relative - /// * The name ends with a `.` - /// * The name contains a sequence of multiple dots - /// * A component of a name (the part between two dots) isn't a valid python identifier. - #[inline] - pub fn new(name: &str) -> Option { - Self::is_valid_name(name).then(|| Self(compact_str::CompactString::from(name))) - } - - /// Creates a new module name for `name` where `name` is a static string. - /// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise. - /// - /// The module name is invalid if: - /// - /// * The name is empty - /// * The name is relative - /// * The name ends with a `.` - /// * The name contains a sequence of multiple dots - /// * A component of a name (the part between two dots) isn't a valid python identifier. - /// - /// ## Examples - /// - /// ``` - /// use red_knot_module_resolver::ModuleName; - /// - /// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar")); - /// assert_eq!(ModuleName::new_static(""), None); - /// assert_eq!(ModuleName::new_static("..foo"), None); - /// assert_eq!(ModuleName::new_static(".foo"), None); - /// assert_eq!(ModuleName::new_static("foo."), None); - /// assert_eq!(ModuleName::new_static("foo..bar"), None); - /// assert_eq!(ModuleName::new_static("2000"), None); - /// ``` - #[inline] - pub fn new_static(name: &'static str) -> Option { - // TODO(Micha): Use CompactString::const_new once we upgrade to 0.8 https://github.com/ParkMyCar/compact_str/pull/336 - Self::is_valid_name(name).then(|| Self(compact_str::CompactString::from(name))) - } - - fn is_valid_name(name: &str) -> bool { - if name.is_empty() { - return false; - } - - name.split('.').all(is_identifier) - } - - /// An iterator over the components of the module name: - /// - /// # Examples - /// - /// ``` - /// use red_knot_module_resolver::ModuleName; - /// - /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::>(), vec!["foo", "bar", "baz"]); - /// ``` - pub fn components(&self) -> impl DoubleEndedIterator { - self.0.split('.') - } - - /// The name of this module's immediate parent, if it has a parent. - /// - /// # Examples - /// - /// ``` - /// use red_knot_module_resolver::ModuleName; - /// - /// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap())); - /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap())); - /// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None); - /// ``` - pub fn parent(&self) -> Option { - let (parent, _) = self.0.rsplit_once('.')?; - Some(Self(parent.to_compact_string())) - } - - /// Returns `true` if the name starts with `other`. - /// - /// This is equivalent to checking if `self` is a sub-module of `other`. - /// - /// # Examples - /// - /// ``` - /// use red_knot_module_resolver::ModuleName; - /// - /// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); - /// - /// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap())); - /// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); - /// ``` - pub fn starts_with(&self, other: &ModuleName) -> bool { - let mut self_components = self.components(); - let other_components = other.components(); - - for other_component in other_components { - if self_components.next() != Some(other_component) { - return false; - } - } - - true - } - - #[inline] - pub fn as_str(&self) -> &str { - &self.0 - } - - pub(crate) fn from_relative_path(path: &FileSystemPath) -> Option { - let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") { - path.parent()? - } else { - path - }; - - let name = if let Some(parent) = path.parent() { - let mut name = compact_str::CompactString::with_capacity(path.as_str().len()); - - for component in parent.components() { - name.push_str(component.as_os_str().to_str()?); - name.push('.'); - } - - // SAFETY: Unwrap is safe here or `parent` would have returned `None`. - name.push_str(path.file_stem().unwrap()); - - name - } else { - path.file_stem()?.to_compact_string() - }; - - Some(Self(name)) - } -} - -impl Deref for ModuleName { - type Target = str; - - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -impl PartialEq for ModuleName { - fn eq(&self, other: &str) -> bool { - self.as_str() == other - } -} - -impl PartialEq for str { - fn eq(&self, other: &ModuleName) -> bool { - self == other.as_str() - } -} - -impl std::fmt::Display for ModuleName { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.0) - } -} +use crate::db::Db; +use crate::module_name::ModuleName; +use crate::path::{ModuleResolutionPathBuf, ModuleResolutionPathRef}; /// Representation of a Python module. #[derive(Clone, PartialEq, Eq)] @@ -194,7 +17,7 @@ impl Module { pub(crate) fn new( name: ModuleName, kind: ModuleKind, - search_path: ModuleSearchPath, + search_path: Arc, file: VfsFile, ) -> Self { Self { @@ -218,8 +41,8 @@ impl Module { } /// The search path from which the module was resolved. - pub fn search_path(&self) -> &ModuleSearchPath { - &self.inner.search_path + pub(crate) fn search_path(&self) -> ModuleResolutionPathRef { + ModuleResolutionPathRef::from(&*self.inner.search_path) } /// Determine whether this module is a single-file module or a package @@ -254,7 +77,7 @@ impl salsa::DebugWithDb for Module { struct ModuleInner { name: ModuleName, kind: ModuleKind, - search_path: ModuleSearchPath, + search_path: Arc, file: VfsFile, } @@ -266,78 +89,3 @@ pub enum ModuleKind { /// A python package (`foo/__init__.py` or `foo/__init__.pyi`) Package, } - -/// A search path in which to search modules. -/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime. -/// -/// Cloning a search path is cheap because it's an `Arc`. -#[derive(Clone, PartialEq, Eq)] -pub struct ModuleSearchPath { - inner: Arc, -} - -impl ModuleSearchPath { - pub fn new

(path: P, kind: ModuleSearchPathKind) -> Self - where - P: Into, - { - Self { - inner: Arc::new(ModuleSearchPathInner { - path: path.into(), - kind, - }), - } - } - - /// Determine whether this is a first-party, third-party or standard-library search path - pub fn kind(&self) -> ModuleSearchPathKind { - self.inner.kind - } - - /// Return the location of the search path on the file system - pub fn path(&self) -> &VfsPath { - &self.inner.path - } -} - -impl std::fmt::Debug for ModuleSearchPath { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ModuleSearchPath") - .field("path", &self.inner.path) - .field("kind", &self.kind()) - .finish() - } -} - -#[derive(Eq, PartialEq)] -struct ModuleSearchPathInner { - path: VfsPath, - kind: ModuleSearchPathKind, -} - -/// Enumeration of the different kinds of search paths type checkers are expected to support. -/// -/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the -/// priority that we want to give these modules when resolving them. -/// This is roughly [the order given in the typing spec], but typeshed's stubs -/// for the standard library are moved higher up to match Python's semantics at runtime. -/// -/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub enum ModuleSearchPathKind { - /// "Extra" paths provided by the user in a config file, env var or CLI flag. - /// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting - Extra, - - /// Files in the project we're directly being invoked on - FirstParty, - - /// The `stdlib` directory of typeshed (either vendored or custom) - StandardLibrary, - - /// Stubs or runtime modules installed in site-packages - SitePackagesThirdParty, - - /// Vendored third-party stubs from typeshed - VendoredThirdParty, -} diff --git a/crates/red_knot_module_resolver/src/module_name.rs b/crates/red_knot_module_resolver/src/module_name.rs new file mode 100644 index 0000000000..8752f5577f --- /dev/null +++ b/crates/red_knot_module_resolver/src/module_name.rs @@ -0,0 +1,199 @@ +use std::fmt; +use std::ops::Deref; + +use compact_str::{CompactString, ToCompactString}; + +use ruff_python_stdlib::identifiers::is_identifier; + +/// A module name, e.g. `foo.bar`. +/// +/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`). +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +pub struct ModuleName(compact_str::CompactString); + +impl ModuleName { + /// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute + /// module name and `None` otherwise. + /// + /// The module name is invalid if: + /// + /// * The name is empty + /// * The name is relative + /// * The name ends with a `.` + /// * The name contains a sequence of multiple dots + /// * A component of a name (the part between two dots) isn't a valid python identifier. + #[inline] + #[must_use] + pub fn new(name: &str) -> Option { + Self::is_valid_name(name).then(|| Self(CompactString::from(name))) + } + + /// Creates a new module name for `name` where `name` is a static string. + /// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise. + /// + /// The module name is invalid if: + /// + /// * The name is empty + /// * The name is relative + /// * The name ends with a `.` + /// * The name contains a sequence of multiple dots + /// * A component of a name (the part between two dots) isn't a valid python identifier. + /// + /// ## Examples + /// + /// ``` + /// use red_knot_module_resolver::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar")); + /// assert_eq!(ModuleName::new_static(""), None); + /// assert_eq!(ModuleName::new_static("..foo"), None); + /// assert_eq!(ModuleName::new_static(".foo"), None); + /// assert_eq!(ModuleName::new_static("foo."), None); + /// assert_eq!(ModuleName::new_static("foo..bar"), None); + /// assert_eq!(ModuleName::new_static("2000"), None); + /// ``` + #[inline] + #[must_use] + pub fn new_static(name: &'static str) -> Option { + // TODO(Micha): Use CompactString::const_new once we upgrade to 0.8 https://github.com/ParkMyCar/compact_str/pull/336 + Self::is_valid_name(name).then(|| Self(CompactString::from(name))) + } + + #[must_use] + fn is_valid_name(name: &str) -> bool { + !name.is_empty() && name.split('.').all(is_identifier) + } + + /// An iterator over the components of the module name: + /// + /// # Examples + /// + /// ``` + /// use red_knot_module_resolver::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::>(), vec!["foo", "bar", "baz"]); + /// ``` + #[must_use] + pub fn components(&self) -> impl DoubleEndedIterator { + self.0.split('.') + } + + /// The name of this module's immediate parent, if it has a parent. + /// + /// # Examples + /// + /// ``` + /// use red_knot_module_resolver::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap())); + /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap())); + /// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None); + /// ``` + #[must_use] + pub fn parent(&self) -> Option { + let (parent, _) = self.0.rsplit_once('.')?; + Some(Self(parent.to_compact_string())) + } + + /// Returns `true` if the name starts with `other`. + /// + /// This is equivalent to checking if `self` is a sub-module of `other`. + /// + /// # Examples + /// + /// ``` + /// use red_knot_module_resolver::ModuleName; + /// + /// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); + /// + /// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap())); + /// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); + /// ``` + #[must_use] + pub fn starts_with(&self, other: &ModuleName) -> bool { + let mut self_components = self.components(); + let other_components = other.components(); + + for other_component in other_components { + if self_components.next() != Some(other_component) { + return false; + } + } + + true + } + + #[must_use] + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Construct a [`ModuleName`] from a sequence of parts. + /// + /// # Examples + /// + /// ``` + /// use red_knot_module_resolver::ModuleName; + /// + /// assert_eq!(&*ModuleName::from_components(["a"]).unwrap(), "a"); + /// assert_eq!(&*ModuleName::from_components(["a", "b"]).unwrap(), "a.b"); + /// assert_eq!(&*ModuleName::from_components(["a", "b", "c"]).unwrap(), "a.b.c"); + /// + /// assert_eq!(ModuleName::from_components(["a-b"]), None); + /// assert_eq!(ModuleName::from_components(["a", "a-b"]), None); + /// assert_eq!(ModuleName::from_components(["a", "b", "a-b-c"]), None); + /// ``` + #[must_use] + pub fn from_components<'a>(components: impl IntoIterator) -> Option { + let mut components = components.into_iter(); + let first_part = components.next()?; + if !is_identifier(first_part) { + return None; + } + let name = if let Some(second_part) = components.next() { + if !is_identifier(second_part) { + return None; + } + let mut name = format!("{first_part}.{second_part}"); + for part in components { + if !is_identifier(part) { + return None; + } + name.push('.'); + name.push_str(part); + } + CompactString::from(&name) + } else { + CompactString::from(first_part) + }; + Some(Self(name)) + } +} + +impl Deref for ModuleName { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl PartialEq for ModuleName { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &ModuleName) -> bool { + self == other.as_str() + } +} + +impl std::fmt::Display for ModuleName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs new file mode 100644 index 0000000000..70a8ea4832 --- /dev/null +++ b/crates/red_knot_module_resolver/src/path.rs @@ -0,0 +1,997 @@ +/// Internal abstractions for differentiating between different kinds of search paths. +/// +/// TODO(Alex): Should we use different types for absolute vs relative paths? +/// +use std::fmt; + +use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; +use ruff_db::vfs::{system_path_to_file, VfsFile}; + +use crate::module_name::ModuleName; +use crate::state::ResolverState; +use crate::typeshed::TypeshedVersionsQueryResult; + +/// Enumeration of the different kinds of search paths type checkers are expected to support. +/// +/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the +/// priority that we want to give these modules when resolving them, +/// as per [the order given in the typing spec] +/// +/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum ModuleResolutionPathBufInner { + Extra(FileSystemPathBuf), + FirstParty(FileSystemPathBuf), + StandardLibrary(FileSystemPathBuf), + SitePackages(FileSystemPathBuf), +} + +impl ModuleResolutionPathBufInner { + fn push(&mut self, component: &str) { + let extension = camino::Utf8Path::new(component).extension(); + let inner = match self { + Self::Extra(ref mut path) => { + if let Some(extension) = extension { + assert!( + matches!(extension, "pyi" | "py"), + "Extension must be `py` or `pyi`; got `{extension}`" + ); + } + path + } + Self::FirstParty(ref mut path) => { + if let Some(extension) = extension { + assert!( + matches!(extension, "pyi" | "py"), + "Extension must be `py` or `pyi`; got `{extension}`" + ); + } + path + } + Self::StandardLibrary(ref mut path) => { + if let Some(extension) = extension { + assert_eq!( + extension, "pyi", + "Extension must be `pyi`; got `{extension}`" + ); + } + path + } + Self::SitePackages(ref mut path) => { + if let Some(extension) = extension { + assert!( + matches!(extension, "pyi" | "py"), + "Extension must be `py` or `pyi`; got `{extension}`" + ); + } + path + } + }; + assert!( + inner.extension().is_none(), + "Cannot push part {component} to {inner}, which already has an extension" + ); + inner.push(component); + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct ModuleResolutionPathBuf(ModuleResolutionPathBufInner); + +impl ModuleResolutionPathBuf { + /// Push a new part to the path, + /// while maintaining the invariant that the path can only have `.py` or `.pyi` extensions. + /// For the stdlib variant specifically, it may only have a `.pyi` extension. + /// + /// ## Panics: + /// If a component with an invalid extension is passed + pub(crate) fn push(&mut self, component: &str) { + self.0.push(component); + } + + #[must_use] + pub(crate) fn extra(path: impl Into) -> Option { + let path = path.into(); + path.extension() + .map_or(true, |ext| matches!(ext, "py" | "pyi")) + .then_some(Self(ModuleResolutionPathBufInner::Extra(path))) + } + + #[must_use] + pub(crate) fn first_party(path: impl Into) -> Option { + let path = path.into(); + path.extension() + .map_or(true, |ext| matches!(ext, "pyi" | "py")) + .then_some(Self(ModuleResolutionPathBufInner::FirstParty(path))) + } + + #[must_use] + pub(crate) fn standard_library(path: impl Into) -> Option { + let path = path.into(); + path.extension() + .map_or(true, |ext| ext == "pyi") + .then_some(Self(ModuleResolutionPathBufInner::StandardLibrary(path))) + } + + #[must_use] + pub(crate) fn stdlib_from_typeshed_root(typeshed_root: &FileSystemPath) -> Option { + Self::standard_library(typeshed_root.join(FileSystemPath::new("stdlib"))) + } + + #[must_use] + pub(crate) fn site_packages(path: impl Into) -> Option { + let path = path.into(); + path.extension() + .map_or(true, |ext| matches!(ext, "pyi" | "py")) + .then_some(Self(ModuleResolutionPathBufInner::SitePackages(path))) + } + + #[must_use] + pub(crate) fn is_regular_package(&self, search_path: &Self, resolver: &ResolverState) -> bool { + ModuleResolutionPathRef::from(self).is_regular_package(search_path, resolver) + } + + #[must_use] + pub(crate) fn is_directory(&self, search_path: &Self, resolver: &ResolverState) -> bool { + ModuleResolutionPathRef::from(self).is_directory(search_path, resolver) + } + + #[must_use] + pub(crate) fn with_pyi_extension(&self) -> Self { + ModuleResolutionPathRef::from(self).with_pyi_extension() + } + + #[must_use] + pub(crate) fn with_py_extension(&self) -> Option { + ModuleResolutionPathRef::from(self).with_py_extension() + } + + #[must_use] + pub(crate) fn relativize_path<'a>( + &'a self, + absolute_path: &'a (impl AsRef + ?Sized), + ) -> Option> { + ModuleResolutionPathRef::from(self).relativize_path(absolute_path.as_ref()) + } + + /// Returns `None` if the path doesn't exist, isn't accessible, or if the path points to a directory. + pub(crate) fn to_vfs_file( + &self, + search_path: &Self, + resolver: &ResolverState, + ) -> Option { + ModuleResolutionPathRef::from(self).to_vfs_file(search_path, resolver) + } +} + +impl fmt::Debug for ModuleResolutionPathBuf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (name, path) = match &self.0 { + ModuleResolutionPathBufInner::Extra(path) => ("Extra", path), + ModuleResolutionPathBufInner::FirstParty(path) => ("FirstParty", path), + ModuleResolutionPathBufInner::SitePackages(path) => ("SitePackages", path), + ModuleResolutionPathBufInner::StandardLibrary(path) => ("StandardLibrary", path), + }; + f.debug_tuple(&format!("ModuleResolutionPathBuf::{name}")) + .field(path) + .finish() + } +} + +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +enum ModuleResolutionPathRefInner<'a> { + Extra(&'a FileSystemPath), + FirstParty(&'a FileSystemPath), + StandardLibrary(&'a FileSystemPath), + SitePackages(&'a FileSystemPath), +} + +impl<'a> ModuleResolutionPathRefInner<'a> { + #[must_use] + fn query_stdlib_version<'db>( + module_path: &'a FileSystemPath, + stdlib_search_path: Self, + stdlib_root: &FileSystemPath, + resolver_state: &ResolverState<'db>, + ) -> TypeshedVersionsQueryResult { + let Some(module_name) = stdlib_search_path + .relativize_path(module_path) + .and_then(Self::to_module_name) + else { + return TypeshedVersionsQueryResult::DoesNotExist; + }; + let ResolverState { + db, + typeshed_versions, + target_version, + } = resolver_state; + typeshed_versions.query_module(&module_name, *db, stdlib_root, *target_version) + } + + #[must_use] + fn is_directory(&self, search_path: Self, resolver: &ResolverState) -> bool { + match (self, search_path) { + (Self::Extra(path), Self::Extra(_)) => resolver.file_system().is_directory(path), + (Self::FirstParty(path), Self::FirstParty(_)) => resolver.file_system().is_directory(path), + (Self::SitePackages(path), Self::SitePackages(_)) => resolver.file_system().is_directory(path), + (Self::StandardLibrary(path), Self::StandardLibrary(stdlib_root)) => { + match Self::query_stdlib_version( path, search_path, stdlib_root, resolver) { + TypeshedVersionsQueryResult::DoesNotExist => false, + TypeshedVersionsQueryResult::Exists => resolver.file_system().is_directory(path), + TypeshedVersionsQueryResult::MaybeExists => resolver.file_system().is_directory(path), + } + } + (path, root) => unreachable!( + "The search path should always be the same variant as `self` (got: {path:?}, {root:?})" + ) + } + } + + #[must_use] + fn is_regular_package(&self, search_path: Self, resolver: &ResolverState) -> bool { + fn is_non_stdlib_pkg(state: &ResolverState, path: &FileSystemPath) -> bool { + let file_system = state.file_system(); + file_system.exists(&path.join("__init__.py")) + || file_system.exists(&path.join("__init__.pyi")) + } + + match (self, search_path) { + (Self::Extra(path), Self::Extra(_)) => is_non_stdlib_pkg(resolver, path), + (Self::FirstParty(path), Self::FirstParty(_)) => is_non_stdlib_pkg(resolver, path), + (Self::SitePackages(path), Self::SitePackages(_)) => is_non_stdlib_pkg(resolver, path), + // Unlike the other variants: + // (1) Account for VERSIONS + // (2) Only test for `__init__.pyi`, not `__init__.py` + (Self::StandardLibrary(path), Self::StandardLibrary(stdlib_root)) => { + match Self::query_stdlib_version( path, search_path, stdlib_root, resolver) { + TypeshedVersionsQueryResult::DoesNotExist => false, + TypeshedVersionsQueryResult::Exists => resolver.db.file_system().exists(&path.join("__init__.pyi")), + TypeshedVersionsQueryResult::MaybeExists => resolver.db.file_system().exists(&path.join("__init__.pyi")), + } + } + (path, root) => unreachable!( + "The search path should always be the same variant as `self` (got: {path:?}, {root:?})" + ) + } + } + + fn to_vfs_file(self, search_path: Self, resolver: &ResolverState) -> Option { + match (self, search_path) { + (Self::Extra(path), Self::Extra(_)) => system_path_to_file(resolver.db.upcast(), path), + (Self::FirstParty(path), Self::FirstParty(_)) => system_path_to_file(resolver.db.upcast(), path), + (Self::SitePackages(path), Self::SitePackages(_)) => { + system_path_to_file(resolver.db.upcast(), path) + } + (Self::StandardLibrary(path), Self::StandardLibrary(stdlib_root)) => { + match Self::query_stdlib_version(path, search_path, stdlib_root, resolver) { + TypeshedVersionsQueryResult::DoesNotExist => None, + TypeshedVersionsQueryResult::Exists => system_path_to_file(resolver.db.upcast(), path), + TypeshedVersionsQueryResult::MaybeExists => system_path_to_file(resolver.db.upcast(), path) + } + } + (path, root) => unreachable!( + "The search path should always be the same variant as `self` (got: {path:?}, {root:?})" + ) + } + } + + #[must_use] + fn to_module_name(self) -> Option { + let (fs_path, skip_final_part) = match self { + Self::Extra(path) | Self::FirstParty(path) | Self::SitePackages(path) => ( + path, + path.ends_with("__init__.py") || path.ends_with("__init__.pyi"), + ), + Self::StandardLibrary(path) => (path, path.ends_with("__init__.pyi")), + }; + + let parent_components = fs_path + .parent()? + .components() + .map(|component| component.as_str()); + + if skip_final_part { + ModuleName::from_components(parent_components) + } else { + ModuleName::from_components(parent_components.chain(fs_path.file_stem())) + } + } + + #[must_use] + fn with_pyi_extension(&self) -> ModuleResolutionPathBufInner { + match self { + Self::Extra(path) => ModuleResolutionPathBufInner::Extra(path.with_extension("pyi")), + Self::FirstParty(path) => { + ModuleResolutionPathBufInner::FirstParty(path.with_extension("pyi")) + } + Self::StandardLibrary(path) => { + ModuleResolutionPathBufInner::StandardLibrary(path.with_extension("pyi")) + } + Self::SitePackages(path) => { + ModuleResolutionPathBufInner::SitePackages(path.with_extension("pyi")) + } + } + } + + #[must_use] + fn with_py_extension(&self) -> Option { + match self { + Self::Extra(path) => Some(ModuleResolutionPathBufInner::Extra( + path.with_extension("py"), + )), + Self::FirstParty(path) => Some(ModuleResolutionPathBufInner::FirstParty( + path.with_extension("py"), + )), + Self::StandardLibrary(_) => None, + Self::SitePackages(path) => Some(ModuleResolutionPathBufInner::SitePackages( + path.with_extension("py"), + )), + } + } + + #[must_use] + fn relativize_path(&self, absolute_path: &'a FileSystemPath) -> Option { + match self { + Self::Extra(root) => absolute_path.strip_prefix(root).ok().and_then(|path| { + path.extension() + .map_or(true, |ext| matches!(ext, "py" | "pyi")) + .then_some(Self::Extra(path)) + }), + Self::FirstParty(root) => absolute_path.strip_prefix(root).ok().and_then(|path| { + path.extension() + .map_or(true, |ext| matches!(ext, "pyi" | "py")) + .then_some(Self::FirstParty(path)) + }), + Self::StandardLibrary(root) => absolute_path.strip_prefix(root).ok().and_then(|path| { + path.extension() + .map_or(true, |ext| ext == "pyi") + .then_some(Self::StandardLibrary(path)) + }), + Self::SitePackages(root) => absolute_path.strip_prefix(root).ok().and_then(|path| { + path.extension() + .map_or(true, |ext| matches!(ext, "pyi" | "py")) + .then_some(Self::SitePackages(path)) + }), + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct ModuleResolutionPathRef<'a>(ModuleResolutionPathRefInner<'a>); + +impl<'a> ModuleResolutionPathRef<'a> { + #[must_use] + pub(crate) fn is_directory( + &self, + search_path: impl Into, + resolver: &ResolverState, + ) -> bool { + self.0.is_directory(search_path.into().0, resolver) + } + + #[must_use] + pub(crate) fn is_regular_package( + &self, + search_path: impl Into, + resolver: &ResolverState, + ) -> bool { + self.0.is_regular_package(search_path.into().0, resolver) + } + + #[must_use] + pub(crate) fn to_vfs_file( + self, + search_path: impl Into, + resolver: &ResolverState, + ) -> Option { + self.0.to_vfs_file(search_path.into().0, resolver) + } + + #[must_use] + pub(crate) fn to_module_name(self) -> Option { + self.0.to_module_name() + } + + #[must_use] + pub(crate) fn with_pyi_extension(&self) -> ModuleResolutionPathBuf { + ModuleResolutionPathBuf(self.0.with_pyi_extension()) + } + + #[must_use] + pub(crate) fn with_py_extension(self) -> Option { + self.0.with_py_extension().map(ModuleResolutionPathBuf) + } + + #[must_use] + pub(crate) fn relativize_path(&self, absolute_path: &'a FileSystemPath) -> Option { + self.0.relativize_path(absolute_path).map(Self) + } +} + +impl fmt::Debug for ModuleResolutionPathRef<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (name, path) = match &self.0 { + ModuleResolutionPathRefInner::Extra(path) => ("Extra", path), + ModuleResolutionPathRefInner::FirstParty(path) => ("FirstParty", path), + ModuleResolutionPathRefInner::SitePackages(path) => ("SitePackages", path), + ModuleResolutionPathRefInner::StandardLibrary(path) => ("StandardLibrary", path), + }; + f.debug_tuple(&format!("ModuleResolutionPathRef::{name}")) + .field(path) + .finish() + } +} + +impl<'a> From<&'a ModuleResolutionPathBuf> for ModuleResolutionPathRef<'a> { + fn from(value: &'a ModuleResolutionPathBuf) -> Self { + let inner = match &value.0 { + ModuleResolutionPathBufInner::Extra(path) => ModuleResolutionPathRefInner::Extra(path), + ModuleResolutionPathBufInner::FirstParty(path) => { + ModuleResolutionPathRefInner::FirstParty(path) + } + ModuleResolutionPathBufInner::StandardLibrary(path) => { + ModuleResolutionPathRefInner::StandardLibrary(path) + } + ModuleResolutionPathBufInner::SitePackages(path) => { + ModuleResolutionPathRefInner::SitePackages(path) + } + }; + ModuleResolutionPathRef(inner) + } +} + +impl PartialEq for ModuleResolutionPathRef<'_> { + fn eq(&self, other: &FileSystemPath) -> bool { + let fs_path = match self.0 { + ModuleResolutionPathRefInner::Extra(path) => path, + ModuleResolutionPathRefInner::FirstParty(path) => path, + ModuleResolutionPathRefInner::SitePackages(path) => path, + ModuleResolutionPathRefInner::StandardLibrary(path) => path, + }; + fs_path == other + } +} + +impl PartialEq> for FileSystemPath { + fn eq(&self, other: &ModuleResolutionPathRef) -> bool { + other == self + } +} + +impl PartialEq for ModuleResolutionPathRef<'_> { + fn eq(&self, other: &FileSystemPathBuf) -> bool { + self == &**other + } +} + +impl PartialEq> for FileSystemPathBuf { + fn eq(&self, other: &ModuleResolutionPathRef<'_>) -> bool { + &**self == other + } +} + +#[cfg(test)] +mod tests { + use insta::assert_debug_snapshot; + + use crate::db::tests::{create_resolver_builder, TestCase, TestDb}; + use crate::supported_py_version::TargetVersion; + use crate::typeshed::LazyTypeshedVersions; + + use super::*; + + impl ModuleResolutionPathBuf { + #[must_use] + pub(crate) fn join(&self, component: &str) -> Self { + ModuleResolutionPathRef::from(self).join(component) + } + } + + impl<'a> ModuleResolutionPathRef<'a> { + #[must_use] + fn join( + &self, + component: &'a (impl AsRef + ?Sized), + ) -> ModuleResolutionPathBuf { + let mut result = self.to_path_buf(); + result.push(component.as_ref().as_str()); + result + } + + #[must_use] + pub(crate) fn to_path_buf(self) -> ModuleResolutionPathBuf { + let inner = match self.0 { + ModuleResolutionPathRefInner::Extra(path) => { + ModuleResolutionPathBufInner::Extra(path.to_path_buf()) + } + ModuleResolutionPathRefInner::FirstParty(path) => { + ModuleResolutionPathBufInner::FirstParty(path.to_path_buf()) + } + ModuleResolutionPathRefInner::StandardLibrary(path) => { + ModuleResolutionPathBufInner::StandardLibrary(path.to_path_buf()) + } + ModuleResolutionPathRefInner::SitePackages(path) => { + ModuleResolutionPathBufInner::SitePackages(path.to_path_buf()) + } + }; + ModuleResolutionPathBuf(inner) + } + + #[must_use] + pub(crate) const fn is_stdlib_search_path(&self) -> bool { + matches!(&self.0, ModuleResolutionPathRefInner::StandardLibrary(_)) + } + } + + #[test] + fn constructor_rejects_non_pyi_stdlib_paths() { + assert_eq!(ModuleResolutionPathBuf::standard_library("foo.py"), None); + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo/__init__.py"), + None + ); + } + + #[test] + fn path_buf_debug_impl() { + assert_debug_snapshot!( + ModuleResolutionPathBuf::standard_library("foo/bar.pyi").unwrap(), + @r###" + ModuleResolutionPathBuf::StandardLibrary( + "foo/bar.pyi", + ) + "### + ); + } + + #[test] + fn path_ref_debug_impl() { + assert_debug_snapshot!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::Extra(FileSystemPath::new("foo/bar.py"))), + @r###" + ModuleResolutionPathRef::Extra( + "foo/bar.py", + ) + "### + ); + } + + #[test] + fn with_extension_methods() { + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .with_py_extension(), + None + ); + + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .with_pyi_extension(), + ModuleResolutionPathBuf(ModuleResolutionPathBufInner::StandardLibrary( + FileSystemPathBuf::from("foo.pyi") + )) + ); + + assert_eq!( + ModuleResolutionPathBuf::first_party("foo/bar") + .unwrap() + .with_py_extension() + .unwrap(), + ModuleResolutionPathBuf(ModuleResolutionPathBufInner::FirstParty( + FileSystemPathBuf::from("foo/bar.py") + )) + ); + } + + #[test] + fn module_name_1_part() { + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::Extra(FileSystemPath::new( + "foo" + ))) + .to_module_name(), + ModuleName::new_static("foo") + ); + + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::StandardLibrary( + FileSystemPath::new("foo.pyi") + )) + .to_module_name(), + ModuleName::new_static("foo") + ); + + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::FirstParty( + FileSystemPath::new("foo/__init__.py") + )) + .to_module_name(), + ModuleName::new_static("foo") + ); + } + + #[test] + fn module_name_2_parts() { + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::StandardLibrary( + FileSystemPath::new("foo/bar") + )) + .to_module_name(), + ModuleName::new_static("foo.bar") + ); + + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::Extra(FileSystemPath::new( + "foo/bar.pyi" + ))) + .to_module_name(), + ModuleName::new_static("foo.bar") + ); + + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::SitePackages( + FileSystemPath::new("foo/bar/__init__.pyi") + )) + .to_module_name(), + ModuleName::new_static("foo.bar") + ); + } + + #[test] + fn module_name_3_parts() { + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::SitePackages( + FileSystemPath::new("foo/bar/__init__.pyi") + )) + .to_module_name(), + ModuleName::new_static("foo.bar") + ); + + assert_eq!( + ModuleResolutionPathRef(ModuleResolutionPathRefInner::SitePackages( + FileSystemPath::new("foo/bar/baz") + )) + .to_module_name(), + ModuleName::new_static("foo.bar.baz") + ); + } + + #[test] + fn join() { + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .join("bar"), + ModuleResolutionPathBuf(ModuleResolutionPathBufInner::StandardLibrary( + FileSystemPathBuf::from("foo/bar") + )) + ); + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .join("bar.pyi"), + ModuleResolutionPathBuf(ModuleResolutionPathBufInner::StandardLibrary( + FileSystemPathBuf::from("foo/bar.pyi") + )) + ); + assert_eq!( + ModuleResolutionPathBuf::extra("foo") + .unwrap() + .join("bar.py"), + ModuleResolutionPathBuf(ModuleResolutionPathBufInner::Extra( + FileSystemPathBuf::from("foo/bar.py") + )) + ); + } + + #[test] + #[should_panic(expected = "Extension must be `pyi`; got `py`")] + fn stdlib_path_invalid_join_py() { + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .push("bar.py"); + } + + #[test] + #[should_panic(expected = "Extension must be `pyi`; got `rs`")] + fn stdlib_path_invalid_join_rs() { + ModuleResolutionPathBuf::standard_library("foo") + .unwrap() + .push("bar.rs"); + } + + #[test] + #[should_panic(expected = "Extension must be `py` or `pyi`; got `rs`")] + fn non_stdlib_path_invalid_join_rs() { + ModuleResolutionPathBuf::site_packages("foo") + .unwrap() + .push("bar.rs"); + } + + #[test] + #[should_panic(expected = "already has an extension")] + fn invalid_stdlib_join_too_many_extensions() { + ModuleResolutionPathBuf::standard_library("foo.pyi") + .unwrap() + .push("bar.pyi"); + } + + #[test] + fn relativize_stdlib_path_errors() { + let root = ModuleResolutionPathBuf::standard_library("foo/stdlib").unwrap(); + + // Must have a `.pyi` extension or no extension: + let bad_absolute_path = FileSystemPath::new("foo/stdlib/x.py"); + assert_eq!(root.relativize_path(bad_absolute_path), None); + let second_bad_absolute_path = FileSystemPath::new("foo/stdlib/x.rs"); + assert_eq!(root.relativize_path(second_bad_absolute_path), None); + + // Must be a path that is a child of `root`: + let third_bad_absolute_path = FileSystemPath::new("bar/stdlib/x.pyi"); + assert_eq!(root.relativize_path(third_bad_absolute_path), None); + } + + #[test] + fn relativize_non_stdlib_path_errors() { + let root = ModuleResolutionPathBuf::extra("foo/stdlib").unwrap(); + // Must have a `.py` extension, a `.pyi` extension, or no extension: + let bad_absolute_path = FileSystemPath::new("foo/stdlib/x.rs"); + assert_eq!(root.relativize_path(bad_absolute_path), None); + // Must be a path that is a child of `root`: + let second_bad_absolute_path = FileSystemPath::new("bar/stdlib/x.pyi"); + assert_eq!(root.relativize_path(second_bad_absolute_path), None); + } + + #[test] + fn relativize_path() { + assert_eq!( + ModuleResolutionPathBuf::standard_library("foo/baz") + .unwrap() + .relativize_path("foo/baz/eggs/__init__.pyi") + .unwrap(), + ModuleResolutionPathRef(ModuleResolutionPathRefInner::StandardLibrary( + FileSystemPath::new("eggs/__init__.pyi") + )) + ); + } + + fn py38_stdlib_test_case() -> (TestDb, ModuleResolutionPathBuf) { + let TestCase { + db, + custom_typeshed, + .. + } = create_resolver_builder().unwrap().build(); + let stdlib_module_path = + ModuleResolutionPathBuf::stdlib_from_typeshed_root(&custom_typeshed).unwrap(); + (db, stdlib_module_path) + } + + #[test] + fn mocked_typeshed_existing_regular_stdlib_pkg_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let asyncio_regular_package = stdlib_path.join("asyncio"); + assert!(asyncio_regular_package.is_directory(&stdlib_path, &resolver)); + assert!(asyncio_regular_package.is_regular_package(&stdlib_path, &resolver)); + // Paths to directories don't resolve to VfsFiles + assert_eq!( + asyncio_regular_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(asyncio_regular_package + .join("__init__.pyi") + .to_vfs_file(&stdlib_path, &resolver) + .is_some()); + + // The `asyncio` package exists on Python 3.8, but the `asyncio.tasks` submodule does not, + // according to the `VERSIONS` file in our typeshed mock: + let asyncio_tasks_module = stdlib_path.join("asyncio/tasks.pyi"); + assert_eq!( + asyncio_tasks_module.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(!asyncio_tasks_module.is_directory(&stdlib_path, &resolver)); + assert!(!asyncio_tasks_module.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_existing_namespace_stdlib_pkg_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let xml_namespace_package = stdlib_path.join("xml"); + assert!(xml_namespace_package.is_directory(&stdlib_path, &resolver)); + // Paths to directories don't resolve to VfsFiles + assert_eq!( + xml_namespace_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(!xml_namespace_package.is_regular_package(&stdlib_path, &resolver)); + + let xml_etree = stdlib_path.join("xml/etree.pyi"); + assert!(!xml_etree.is_directory(&stdlib_path, &resolver)); + assert!(xml_etree.to_vfs_file(&stdlib_path, &resolver).is_some()); + assert!(!xml_etree.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_single_file_stdlib_module_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let functools_module = stdlib_path.join("functools.pyi"); + assert!(functools_module + .to_vfs_file(&stdlib_path, &resolver) + .is_some()); + assert!(!functools_module.is_directory(&stdlib_path, &resolver)); + assert!(!functools_module.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_nonexistent_regular_stdlib_pkg_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let collections_regular_package = stdlib_path.join("collections"); + assert_eq!( + collections_regular_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(!collections_regular_package.is_directory(&stdlib_path, &resolver)); + assert!(!collections_regular_package.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_nonexistent_namespace_stdlib_pkg_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let importlib_namespace_package = stdlib_path.join("importlib"); + assert_eq!( + importlib_namespace_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(!importlib_namespace_package.is_directory(&stdlib_path, &resolver)); + assert!(!importlib_namespace_package.is_regular_package(&stdlib_path, &resolver)); + + let importlib_abc = stdlib_path.join("importlib/abc.pyi"); + assert_eq!(importlib_abc.to_vfs_file(&stdlib_path, &resolver), None); + assert!(!importlib_abc.is_directory(&stdlib_path, &resolver)); + assert!(!importlib_abc.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_nonexistent_single_file_module_py38() { + let (db, stdlib_path) = py38_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py38, + }; + + let non_existent = stdlib_path.join("doesnt_even_exist"); + assert_eq!(non_existent.to_vfs_file(&stdlib_path, &resolver), None); + assert!(!non_existent.is_directory(&stdlib_path, &resolver)); + assert!(!non_existent.is_regular_package(&stdlib_path, &resolver)); + } + + fn py39_stdlib_test_case() -> (TestDb, ModuleResolutionPathBuf) { + let TestCase { + db, + custom_typeshed, + .. + } = create_resolver_builder() + .unwrap() + .with_target_version(TargetVersion::Py39) + .build(); + let stdlib_module_path = + ModuleResolutionPathBuf::stdlib_from_typeshed_root(&custom_typeshed).unwrap(); + (db, stdlib_module_path) + } + + #[test] + fn mocked_typeshed_existing_regular_stdlib_pkgs_py39() { + let (db, stdlib_path) = py39_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py39, + }; + + // Since we've set the target version to Py39, + // `collections` should now exist as a directory, according to VERSIONS... + let collections_regular_package = stdlib_path.join("collections"); + assert!(collections_regular_package.is_directory(&stdlib_path, &resolver)); + assert!(collections_regular_package.is_regular_package(&stdlib_path, &resolver)); + // (This is still `None`, as directories don't resolve to `Vfs` files) + assert_eq!( + collections_regular_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(collections_regular_package + .join("__init__.pyi") + .to_vfs_file(&stdlib_path, &resolver) + .is_some()); + + // ...and so should the `asyncio.tasks` submodule (though it's still not a directory): + let asyncio_tasks_module = stdlib_path.join("asyncio/tasks.pyi"); + assert!(asyncio_tasks_module + .to_vfs_file(&stdlib_path, &resolver) + .is_some()); + assert!(!asyncio_tasks_module.is_directory(&stdlib_path, &resolver)); + assert!(!asyncio_tasks_module.is_regular_package(&stdlib_path, &resolver)); + } + + #[test] + fn mocked_typeshed_existing_namespace_stdlib_pkg_py39() { + let (db, stdlib_path) = py39_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py39, + }; + + // The `importlib` directory now also exists... + let importlib_namespace_package = stdlib_path.join("importlib"); + assert!(importlib_namespace_package.is_directory(&stdlib_path, &resolver)); + assert!(!importlib_namespace_package.is_regular_package(&stdlib_path, &resolver)); + // (This is still `None`, as directories don't resolve to `Vfs` files) + assert_eq!( + importlib_namespace_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + + // ...As do submodules in the `importlib` namespace package: + let importlib_abc = importlib_namespace_package.join("abc.pyi"); + assert!(!importlib_abc.is_directory(&stdlib_path, &resolver)); + assert!(!importlib_abc.is_regular_package(&stdlib_path, &resolver)); + assert!(importlib_abc.to_vfs_file(&stdlib_path, &resolver).is_some()); + } + + #[test] + fn mocked_typeshed_nonexistent_namespace_stdlib_pkg_py39() { + let (db, stdlib_path) = py39_stdlib_test_case(); + let resolver = ResolverState { + db: &db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version: TargetVersion::Py39, + }; + + // The `xml` package no longer exists on py39: + let xml_namespace_package = stdlib_path.join("xml"); + assert_eq!( + xml_namespace_package.to_vfs_file(&stdlib_path, &resolver), + None + ); + assert!(!xml_namespace_package.is_directory(&stdlib_path, &resolver)); + assert!(!xml_namespace_package.is_regular_package(&stdlib_path, &resolver)); + + let xml_etree = xml_namespace_package.join("etree.pyi"); + assert_eq!(xml_etree.to_vfs_file(&stdlib_path, &resolver), None); + assert!(!xml_etree.is_directory(&stdlib_path, &resolver)); + assert!(!xml_etree.is_regular_package(&stdlib_path, &resolver)); + } +} diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index d01f4148c7..08438472cf 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,27 +1,29 @@ use std::ops::Deref; +use std::sync::Arc; -use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf}; -use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath}; +use ruff_db::file_system::FileSystemPathBuf; +use ruff_db::vfs::{vfs_path_to_file, VfsFile, VfsPath}; -use crate::module::{Module, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind}; -use crate::resolver::internal::ModuleResolverSearchPaths; -use crate::Db; +use crate::db::Db; +use crate::module::{Module, ModuleKind}; +use crate::module_name::ModuleName; +use crate::path::ModuleResolutionPathBuf; +use crate::resolver::internal::ModuleResolverSettings; +use crate::state::ResolverState; +use crate::supported_py_version::TargetVersion; -const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib"; - -/// Configures the module search paths for the module resolver. +/// Configures the module resolver settings. /// /// Must be called before calling any other module resolution functions. -pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) { +pub fn set_module_resolution_settings(db: &mut dyn Db, config: RawModuleResolutionSettings) { // There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other // thread can mutate the `Db` while we're in this call, so using `try_get` to test if // the settings have already been set is safe. - if let Some(existing) = ModuleResolverSearchPaths::try_get(db) { - existing - .set_search_paths(db) - .to(config.into_ordered_search_paths()); + let resolved_settings = config.into_configuration_settings(); + if let Some(existing) = ModuleResolverSettings::try_get(db) { + existing.set_settings(db).to(resolved_settings); } else { - ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths()); + ModuleResolverSettings::new(db, resolved_settings); } } @@ -54,7 +56,7 @@ pub(crate) fn resolve_module_query<'db>( /// Resolves the module for the given path. /// -/// Returns `None` if the path is not a module locatable via `sys.path`. +/// Returns `None` if the path is not a module locatable via any of the known search paths. #[allow(unused)] pub(crate) fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option { // It's not entirely clear on first sight why this method calls `file_to_module` instead of @@ -71,30 +73,23 @@ pub(crate) fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option { /// Resolves the module for the file with the given id. /// -/// Returns `None` if the file is not a module locatable via `sys.path`. +/// Returns `None` if the file is not a module locatable via any of the known search paths. #[salsa::tracked] pub(crate) fn file_to_module(db: &dyn Db, file: VfsFile) -> Option { let _span = tracing::trace_span!("file_to_module", ?file).entered(); - let path = file.path(db.upcast()); + let VfsPath::FileSystem(path) = file.path(db.upcast()) else { + todo!("VendoredPaths are not yet supported") + }; - let search_paths = module_search_paths(db); + let resolver_settings = module_resolver_settings(db); - let relative_path = search_paths + let relative_path = resolver_settings + .search_paths() .iter() - .find_map(|root| match (root.path(), path) { - (VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => { - let relative_path = path.strip_prefix(root_path).ok()?; - Some(relative_path) - } - (VfsPath::Vendored(_), VfsPath::Vendored(_)) => { - todo!("Add support for vendored modules") - } - (VfsPath::Vendored(_), VfsPath::FileSystem(_)) - | (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None, - })?; + .find_map(|root| root.relativize_path(path))?; - let module_name = ModuleName::from_relative_path(relative_path)?; + let module_name = relative_path.to_module_name()?; // Resolve the module name to see if Python would resolve the name to the same path. // If it doesn't, then that means that multiple modules have the same name in different @@ -116,9 +111,12 @@ pub(crate) fn file_to_module(db: &dyn Db, file: VfsFile) -> Option { } } -/// Configures the search paths that are used to resolve modules. +/// "Raw" configuration settings for module resolution: unvalidated, unnormalized #[derive(Eq, PartialEq, Debug)] -pub struct ModuleResolutionSettings { +pub struct RawModuleResolutionSettings { + /// The target Python version the user has specified + pub target_version: TargetVersion, + /// List of user-provided paths that should take first priority in the module resolution. /// Examples in other type checkers are mypy's MYPYPATH environment variable, /// or pyright's stubPath configuration setting. @@ -127,83 +125,103 @@ pub struct ModuleResolutionSettings { /// The root of the workspace, used for finding first-party modules. pub workspace_root: FileSystemPathBuf, + /// Optional (already validated) path to standard-library typeshed stubs. + /// If this is not provided, we will fallback to our vendored typeshed stubs + /// bundled as a zip file in the binary + pub custom_typeshed: Option, + /// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed. pub site_packages: Option, - - /// Optional path to standard-library typeshed stubs. - /// Currently this has to be a directory that exists on disk. - /// - /// (TODO: fall back to vendored stubs if no custom directory is provided.) - pub custom_typeshed: Option, } -impl ModuleResolutionSettings { - /// Implementation of PEP 561's module resolution order - /// (with some small, deliberate, differences) - fn into_ordered_search_paths(self) -> OrderedSearchPaths { - let ModuleResolutionSettings { +impl RawModuleResolutionSettings { + /// Implementation of the typing spec's [module resolution order] + /// + /// TODO(Alex): this method does multiple `.unwrap()` calls when it should really return an error. + /// Each `.unwrap()` call is a point where we're validating a setting that the user would pass + /// and transforming it into an internal representation for a validated path. + /// Rather than panicking if a path fails to validate, we should display an error message to the user + /// and exit the process with a nonzero exit code. + /// This validation should probably be done outside of Salsa? + /// + /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering + fn into_configuration_settings(self) -> ModuleResolutionSettings { + let RawModuleResolutionSettings { + target_version, extra_paths, workspace_root, site_packages, custom_typeshed, } = self; - let mut paths: Vec<_> = extra_paths + let mut paths: Vec = extra_paths .into_iter() - .map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra)) + .map(|fs_path| ModuleResolutionPathBuf::extra(fs_path).unwrap()) .collect(); - paths.push(ModuleSearchPath::new( - workspace_root, - ModuleSearchPathKind::FirstParty, - )); + paths.push(ModuleResolutionPathBuf::first_party(workspace_root).unwrap()); - // TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user if let Some(custom_typeshed) = custom_typeshed { - paths.push(ModuleSearchPath::new( - custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY), - ModuleSearchPathKind::StandardLibrary, - )); + paths.push( + ModuleResolutionPathBuf::stdlib_from_typeshed_root(&custom_typeshed).unwrap(), + ); } // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step if let Some(site_packages) = site_packages { - paths.push(ModuleSearchPath::new( - site_packages, - ModuleSearchPathKind::SitePackagesThirdParty, - )); + paths.push(ModuleResolutionPathBuf::site_packages(site_packages).unwrap()); } - OrderedSearchPaths(paths) + ModuleResolutionSettings { + target_version, + search_paths: OrderedSearchPaths(paths.into_iter().map(Arc::new).collect()), + } } } -/// A resolved module resolution order, implementing PEP 561 -/// (with some small, deliberate differences) +/// A resolved module resolution order as per the [typing spec] +/// +/// [typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering #[derive(Clone, Debug, Default, Eq, PartialEq)] -pub(crate) struct OrderedSearchPaths(Vec); +pub(crate) struct OrderedSearchPaths(Vec>); impl Deref for OrderedSearchPaths { - type Target = [ModuleSearchPath]; + type Target = [Arc]; fn deref(&self) -> &Self::Target { &self.0 } } +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct ModuleResolutionSettings { + search_paths: OrderedSearchPaths, + target_version: TargetVersion, +} + +impl ModuleResolutionSettings { + pub(crate) fn search_paths(&self) -> &[Arc] { + &self.search_paths + } + + pub(crate) fn target_version(&self) -> TargetVersion { + self.target_version + } +} + // The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers // `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it. // Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too // TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct. #[allow(unreachable_pub, clippy::used_underscore_binding)] pub(crate) mod internal { - use crate::module::ModuleName; - use crate::resolver::OrderedSearchPaths; + use crate::module_name::ModuleName; + use crate::resolver::ModuleResolutionSettings; #[salsa::input(singleton)] - pub(crate) struct ModuleResolverSearchPaths { + pub(crate) struct ModuleResolverSettings { #[return_ref] - pub(super) search_paths: OrderedSearchPaths, + pub(super) settings: ModuleResolutionSettings, } /// A thin wrapper around `ModuleName` to make it a Salsa ingredient. @@ -216,31 +234,31 @@ pub(crate) mod internal { } } -fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] { - ModuleResolverSearchPaths::get(db).search_paths(db) +fn module_resolver_settings(db: &dyn Db) -> &ModuleResolutionSettings { + ModuleResolverSettings::get(db).settings(db) } /// Given a module name and a list of search paths in which to lookup modules, /// attempt to resolve the module name -fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> { - let search_paths = module_search_paths(db); +fn resolve_name( + db: &dyn Db, + name: &ModuleName, +) -> Option<(Arc, VfsFile, ModuleKind)> { + let resolver_settings = module_resolver_settings(db); + let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - for search_path in search_paths { + for search_path in resolver_settings.search_paths() { let mut components = name.components(); let module_name = components.next_back()?; - let VfsPath::FileSystem(fs_search_path) = search_path.path() else { - todo!("Vendored search paths are not yet supported"); - }; - - match resolve_package(db.file_system(), fs_search_path, components) { + match resolve_package(search_path, components, &resolver_state) { Ok(resolved_package) => { let mut package_path = resolved_package.path; package_path.push(module_name); // Must be a `__init__.pyi` or `__init__.py` or it isn't a package. - let kind = if db.file_system().is_directory(&package_path) { + let kind = if package_path.is_directory(search_path, &resolver_state) { package_path.push("__init__"); ModuleKind::Package } else { @@ -248,15 +266,17 @@ fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, Vfs }; // TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution - let stub = package_path.with_extension("pyi"); - - if let Some(stub) = system_path_to_file(db.upcast(), &stub) { + if let Some(stub) = package_path + .with_pyi_extension() + .to_vfs_file(search_path, &resolver_state) + { return Some((search_path.clone(), stub, kind)); } - let module = package_path.with_extension("py"); - - if let Some(module) = system_path_to_file(db.upcast(), &module) { + if let Some(module) = package_path + .with_py_extension() + .and_then(|path| path.to_vfs_file(search_path, &resolver_state)) + { return Some((search_path.clone(), module, kind)); } @@ -278,15 +298,15 @@ fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, Vfs None } -fn resolve_package<'a, I>( - fs: &dyn FileSystem, - module_search_path: &FileSystemPath, +fn resolve_package<'a, 'db, I>( + module_search_path: &ModuleResolutionPathBuf, components: I, + resolver_state: &ResolverState<'db>, ) -> Result where I: Iterator, { - let mut package_path = module_search_path.to_path_buf(); + let mut package_path = module_search_path.clone(); // `true` if inside a folder that is a namespace package (has no `__init__.py`). // Namespace packages are special because they can be spread across multiple search paths. @@ -300,12 +320,12 @@ where for folder in components { package_path.push(folder); - let has_init_py = fs.is_file(&package_path.join("__init__.py")) - || fs.is_file(&package_path.join("__init__.pyi")); + let is_regular_package = + package_path.is_regular_package(module_search_path, resolver_state); - if has_init_py { + if is_regular_package { in_namespace_package = false; - } else if fs.is_directory(&package_path) { + } else if package_path.is_directory(module_search_path, resolver_state) { // A directory without an `__init__.py` is a namespace package, continue with the next folder. in_namespace_package = true; } else if in_namespace_package { @@ -338,7 +358,7 @@ where #[derive(Debug)] struct ResolvedPackage { - path: FileSystemPathBuf, + path: ModuleResolutionPathBuf, kind: PackageKind, } @@ -366,58 +386,22 @@ impl PackageKind { #[cfg(test)] mod tests { - use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; + use ruff_db::file_system::FileSystemPath; use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath}; - use crate::db::tests::TestDb; - use crate::module::{ModuleKind, ModuleName}; + use crate::db::tests::{create_resolver_builder, TestCase}; + use crate::module::ModuleKind; + use crate::module_name::ModuleName; - use super::{ - path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings, - TYPESHED_STDLIB_DIRECTORY, - }; + use super::*; - struct TestCase { - db: TestDb, - - src: FileSystemPathBuf, - custom_typeshed: FileSystemPathBuf, - site_packages: FileSystemPathBuf, - } - - fn create_resolver() -> std::io::Result { - let mut db = TestDb::new(); - - let src = FileSystemPath::new("src").to_path_buf(); - let site_packages = FileSystemPath::new("site_packages").to_path_buf(); - let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf(); - - let fs = db.memory_file_system(); - - fs.create_directory_all(&src)?; - fs.create_directory_all(&site_packages)?; - fs.create_directory_all(&custom_typeshed)?; - - let settings = ModuleResolutionSettings { - extra_paths: vec![], - workspace_root: src.clone(), - site_packages: Some(site_packages.clone()), - custom_typeshed: Some(custom_typeshed.clone()), - }; - - set_module_resolution_settings(&mut db, settings); - - Ok(TestCase { - db, - src, - custom_typeshed, - site_packages, - }) + fn setup_resolver_test() -> TestCase { + create_resolver_builder().unwrap().build() } #[test] fn first_party_module() -> anyhow::Result<()> { - let TestCase { db, src, .. } = create_resolver()?; + let TestCase { db, src, .. } = setup_resolver_test(); let foo_module_name = ModuleName::new_static("foo").unwrap(); let foo_path = src.join("foo.py"); @@ -432,10 +416,10 @@ mod tests { ); assert_eq!("foo", foo_module.name()); - assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&src, &foo_module.search_path()); assert_eq!(ModuleKind::Module, foo_module.kind()); - assert_eq!(&foo_path, foo_module.file().path(&db)); + assert_eq!(&foo_path, foo_module.file().path(&db)); assert_eq!( Some(foo_module), path_to_module(&db, &VfsPath::FileSystem(foo_path)) @@ -445,18 +429,15 @@ mod tests { } #[test] - fn stdlib() -> anyhow::Result<()> { + fn stdlib() { let TestCase { db, custom_typeshed, .. - } = create_resolver()?; - - let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY); - let functools_path = stdlib_dir.join("functools.py"); - db.memory_file_system() - .write_file(&functools_path, "def update_wrapper(): ...")?; + } = setup_resolver_test(); + let stdlib_dir = + ModuleResolutionPathBuf::stdlib_from_typeshed_root(&custom_typeshed).unwrap(); let functools_module_name = ModuleName::new_static("functools").unwrap(); let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap(); @@ -465,35 +446,127 @@ mod tests { resolve_module(&db, functools_module_name).as_ref() ); - assert_eq!(&stdlib_dir, functools_module.search_path().path()); + assert_eq!(stdlib_dir, functools_module.search_path().to_path_buf()); assert_eq!(ModuleKind::Module, functools_module.kind()); - assert_eq!(&functools_path.clone(), functools_module.file().path(&db)); + + let expected_functools_path = + VfsPath::FileSystem(custom_typeshed.join("stdlib/functools.pyi")); + assert_eq!(&expected_functools_path, functools_module.file().path(&db)); assert_eq!( Some(functools_module), - path_to_module(&db, &VfsPath::FileSystem(functools_path)) + path_to_module(&db, &expected_functools_path) ); + } - Ok(()) + fn create_module_names(raw_names: &[&str]) -> Vec { + raw_names + .iter() + .map(|raw| ModuleName::new(raw).unwrap()) + .collect() + } + + #[test] + fn stdlib_resolution_respects_versions_file_py38_existing_modules() { + let TestCase { + db, + custom_typeshed, + .. + } = setup_resolver_test(); + + let existing_modules = create_module_names(&["asyncio", "functools", "xml.etree"]); + for module_name in existing_modules { + let resolved_module = resolve_module(&db, module_name.clone()).unwrap_or_else(|| { + panic!("Expected module {module_name} to exist in the mock stdlib") + }); + let search_path = resolved_module.search_path(); + assert_eq!( + &custom_typeshed.join("stdlib"), + &search_path, + "Search path for {module_name} was unexpectedly {search_path:?}" + ); + assert!( + search_path.is_stdlib_search_path(), + "Expected a stdlib search path, but got {search_path:?}" + ); + } + } + + #[test] + fn stdlib_resolution_respects_versions_file_py38_nonexisting_modules() { + let TestCase { db, .. } = setup_resolver_test(); + let nonexisting_modules = create_module_names(&[ + "collections", + "importlib", + "importlib.abc", + "xml", + "asyncio.tasks", + ]); + for module_name in nonexisting_modules { + assert!( + resolve_module(&db, module_name.clone()).is_none(), + "Unexpectedly resolved a module for {module_name}" + ); + } + } + + #[test] + fn stdlib_resolution_respects_versions_file_py39_existing_modules() { + let TestCase { + db, + custom_typeshed, + .. + } = create_resolver_builder() + .unwrap() + .with_target_version(TargetVersion::Py39) + .build(); + + let existing_modules = create_module_names(&[ + "asyncio", + "functools", + "importlib.abc", + "collections", + "asyncio.tasks", + ]); + for module_name in existing_modules { + let resolved_module = resolve_module(&db, module_name.clone()).unwrap_or_else(|| { + panic!("Expected module {module_name} to exist in the mock stdlib") + }); + let search_path = resolved_module.search_path(); + assert_eq!( + &custom_typeshed.join("stdlib"), + &search_path, + "Search path for {module_name} was unexpectedly {search_path:?}" + ); + assert!( + search_path.is_stdlib_search_path(), + "Expected a stdlib search path, but got {search_path:?}" + ); + } + } + #[test] + fn stdlib_resolution_respects_versions_file_py39_nonexisting_modules() { + let TestCase { db, .. } = create_resolver_builder() + .unwrap() + .with_target_version(TargetVersion::Py39) + .build(); + + let nonexisting_modules = create_module_names(&["importlib", "xml", "xml.etree"]); + for module_name in nonexisting_modules { + assert!( + resolve_module(&db, module_name.clone()).is_none(), + "Unexpectedly resolved a module for {module_name}" + ); + } } #[test] fn first_party_precedence_over_stdlib() -> anyhow::Result<()> { - let TestCase { - db, - src, - custom_typeshed, - .. - } = create_resolver()?; + let TestCase { db, src, .. } = setup_resolver_test(); - let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY); - let stdlib_functools_path = stdlib_dir.join("functools.py"); let first_party_functools_path = src.join("functools.py"); - - db.memory_file_system().write_files([ - (&stdlib_functools_path, "def update_wrapper(): ..."), - (&first_party_functools_path, "def update_wrapper(): ..."), - ])?; + db.memory_file_system() + .write_file(&first_party_functools_path, "def update_wrapper(): ...")?; let functools_module_name = ModuleName::new_static("functools").unwrap(); let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap(); @@ -502,10 +575,10 @@ mod tests { Some(&functools_module), resolve_module(&db, functools_module_name).as_ref() ); - assert_eq!(&src, functools_module.search_path().path()); + assert_eq!(&src, &functools_module.search_path()); assert_eq!(ModuleKind::Module, functools_module.kind()); assert_eq!( - &first_party_functools_path.clone(), + &first_party_functools_path, functools_module.file().path(&db) ); @@ -517,33 +590,9 @@ mod tests { Ok(()) } - // TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip - // is part of the red knot crate - // #[test] - // fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> { - // // The file path here is hardcoded in this crate's `build.rs` script. - // // Luckily this crate will fail to build if this file isn't available at build time. - // const TYPESHED_ZIP_BYTES: &[u8] = - // include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip")); - // assert!(!TYPESHED_ZIP_BYTES.is_empty()); - // let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?; - // - // let path_to_functools = Path::new("stdlib").join("functools.pyi"); - // let mut functools_module_stub = typeshed_zip_archive - // .by_name(path_to_functools.to_str().unwrap()) - // .unwrap(); - // assert!(functools_module_stub.is_file()); - // - // let mut functools_module_stub_source = String::new(); - // functools_module_stub.read_to_string(&mut functools_module_stub_source)?; - // - // assert!(functools_module_stub_source.contains("def update_wrapper(")); - // Ok(()) - // } - #[test] fn resolve_package() -> anyhow::Result<()> { - let TestCase { src, db, .. } = create_resolver()?; + let TestCase { src, db, .. } = setup_resolver_test(); let foo_dir = src.join("foo"); let foo_path = foo_dir.join("__init__.py"); @@ -554,7 +603,7 @@ mod tests { let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); assert_eq!("foo", foo_module.name()); - assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&src, &foo_module.search_path()); assert_eq!(&foo_path, foo_module.file().path(&db)); assert_eq!( @@ -570,7 +619,7 @@ mod tests { #[test] fn package_priority_over_module() -> anyhow::Result<()> { - let TestCase { db, src, .. } = create_resolver()?; + let TestCase { db, src, .. } = setup_resolver_test(); let foo_dir = src.join("foo"); let foo_init = foo_dir.join("__init__.py"); @@ -584,7 +633,7 @@ mod tests { let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); - assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&src, &foo_module.search_path()); assert_eq!(&foo_init, foo_module.file().path(&db)); assert_eq!(ModuleKind::Package, foo_module.kind()); @@ -599,7 +648,7 @@ mod tests { #[test] fn typing_stub_over_module() -> anyhow::Result<()> { - let TestCase { db, src, .. } = create_resolver()?; + let TestCase { db, src, .. } = setup_resolver_test(); let foo_stub = src.join("foo.pyi"); let foo_py = src.join("foo.py"); @@ -608,7 +657,7 @@ mod tests { let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); - assert_eq!(&src, foo.search_path().path()); + assert_eq!(&src, &foo.search_path()); assert_eq!(&foo_stub, foo.file().path(&db)); assert_eq!( @@ -622,7 +671,7 @@ mod tests { #[test] fn sub_packages() -> anyhow::Result<()> { - let TestCase { db, src, .. } = create_resolver()?; + let TestCase { db, src, .. } = setup_resolver_test(); let foo = src.join("foo"); let bar = foo.join("bar"); @@ -637,7 +686,7 @@ mod tests { let baz_module = resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap(); - assert_eq!(&src, baz_module.search_path().path()); + assert_eq!(&src, &baz_module.search_path()); assert_eq!(&baz, baz_module.file().path(&db)); assert_eq!( @@ -655,7 +704,7 @@ mod tests { src, site_packages, .. - } = create_resolver()?; + } = setup_resolver_test(); // From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages). // But uses `src` for `project1` and `site_packages2` for `project2`. @@ -708,7 +757,7 @@ mod tests { src, site_packages, .. - } = create_resolver()?; + } = setup_resolver_test(); // Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages). // The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved. @@ -759,7 +808,7 @@ mod tests { src, site_packages, .. - } = create_resolver()?; + } = setup_resolver_test(); let foo_src = src.join("foo.py"); let foo_site_packages = site_packages.join("foo.py"); @@ -769,7 +818,7 @@ mod tests { let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); - assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&src, &foo_module.search_path()); assert_eq!(&foo_src, foo_module.file().path(&db)); assert_eq!( @@ -792,7 +841,7 @@ mod tests { src, site_packages, custom_typeshed, - } = create_resolver()?; + } = setup_resolver_test(); db.with_os_file_system(); @@ -813,11 +862,12 @@ mod tests { std::fs::write(foo.as_std_path(), "")?; std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?; - let settings = ModuleResolutionSettings { + let settings = RawModuleResolutionSettings { + target_version: TargetVersion::Py38, extra_paths: vec![], workspace_root: src.clone(), - site_packages: Some(site_packages), - custom_typeshed: Some(custom_typeshed), + site_packages: Some(site_packages.clone()), + custom_typeshed: Some(custom_typeshed.clone()), }; set_module_resolution_settings(&mut db, settings); @@ -827,12 +877,12 @@ mod tests { assert_ne!(foo_module, bar_module); - assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&src, &foo_module.search_path()); assert_eq!(&foo, foo_module.file().path(&db)); // `foo` and `bar` shouldn't resolve to the same file - assert_eq!(&src, bar_module.search_path().path()); + assert_eq!(&src, &bar_module.search_path()); assert_eq!(&bar, bar_module.file().path(&db)); assert_eq!(&foo, foo_module.file().path(&db)); @@ -851,8 +901,8 @@ mod tests { } #[test] - fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> { - let TestCase { mut db, src, .. } = create_resolver()?; + fn deleting_an_unrelated_file_doesnt_change_module_resolution() -> anyhow::Result<()> { + let TestCase { mut db, src, .. } = setup_resolver_test(); let foo_path = src.join("foo.py"); let bar_path = src.join("bar.py"); @@ -889,7 +939,7 @@ mod tests { #[test] fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query( ) -> anyhow::Result<()> { - let TestCase { mut db, src, .. } = create_resolver()?; + let TestCase { mut db, src, .. } = setup_resolver_test(); let foo_path = src.join("foo.py"); let foo_module_name = ModuleName::new_static("foo").unwrap(); @@ -909,7 +959,7 @@ mod tests { #[test] fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query( ) -> anyhow::Result<()> { - let TestCase { mut db, src, .. } = create_resolver()?; + let TestCase { mut db, src, .. } = setup_resolver_test(); let foo_path = src.join("foo.py"); let foo_init_path = src.join("foo/__init__.py"); @@ -925,7 +975,7 @@ mod tests { db.memory_file_system().remove_file(&foo_init_path)?; db.memory_file_system() .remove_directory(foo_init_path.parent().unwrap())?; - VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone())); + VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path)); let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve"); assert_eq!(&foo_path, foo_module.file().path(&db)); diff --git a/crates/red_knot_module_resolver/src/state.rs b/crates/red_knot_module_resolver/src/state.rs new file mode 100644 index 0000000000..ad9a7329a8 --- /dev/null +++ b/crates/red_knot_module_resolver/src/state.rs @@ -0,0 +1,25 @@ +use ruff_db::file_system::FileSystem; + +use crate::db::Db; +use crate::supported_py_version::TargetVersion; +use crate::typeshed::LazyTypeshedVersions; + +pub(crate) struct ResolverState<'db> { + pub(crate) db: &'db dyn Db, + pub(crate) typeshed_versions: LazyTypeshedVersions<'db>, + pub(crate) target_version: TargetVersion, +} + +impl<'db> ResolverState<'db> { + pub(crate) fn new(db: &'db dyn Db, target_version: TargetVersion) -> Self { + Self { + db, + typeshed_versions: LazyTypeshedVersions::new(), + target_version, + } + } + + pub(crate) fn file_system(&self) -> &dyn FileSystem { + self.db.file_system() + } +} diff --git a/crates/red_knot_module_resolver/src/supported_py_version.rs b/crates/red_knot_module_resolver/src/supported_py_version.rs new file mode 100644 index 0000000000..466aae6b03 --- /dev/null +++ b/crates/red_knot_module_resolver/src/supported_py_version.rs @@ -0,0 +1,14 @@ +/// Enumeration of all supported Python versions +/// +/// TODO: unify with the `PythonVersion` enum in the linter/formatter crates? +#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default)] +pub enum TargetVersion { + Py37, + #[default] + Py38, + Py39, + Py310, + Py311, + Py312, + Py313, +} diff --git a/crates/red_knot_module_resolver/src/typeshed.rs b/crates/red_knot_module_resolver/src/typeshed.rs index fa49261d5f..c8a36b4626 100644 --- a/crates/red_knot_module_resolver/src/typeshed.rs +++ b/crates/red_knot_module_resolver/src/typeshed.rs @@ -1,4 +1,9 @@ -pub(crate) mod versions; +mod versions; + +pub(crate) use versions::{ + parse_typeshed_versions, LazyTypeshedVersions, TypeshedVersionsQueryResult, +}; +pub use versions::{TypeshedVersionsParseError, TypeshedVersionsParseErrorKind}; #[cfg(test)] mod tests { diff --git a/crates/red_knot_module_resolver/src/typeshed/versions.rs b/crates/red_knot_module_resolver/src/typeshed/versions.rs index aea7b2cab4..61ef0249cf 100644 --- a/crates/red_knot_module_resolver/src/typeshed/versions.rs +++ b/crates/red_knot_module_resolver/src/typeshed/versions.rs @@ -1,16 +1,78 @@ +use std::cell::OnceCell; use std::collections::BTreeMap; use std::fmt; use std::num::{NonZeroU16, NonZeroUsize}; use std::ops::{RangeFrom, RangeInclusive}; use std::str::FromStr; +use ruff_db::file_system::FileSystemPath; +use ruff_db::source::source_text; +use ruff_db::vfs::{system_path_to_file, VfsFile}; use rustc_hash::FxHashMap; -use crate::module::ModuleName; +use crate::db::Db; +use crate::module_name::ModuleName; +use crate::supported_py_version::TargetVersion; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug)] +pub(crate) struct LazyTypeshedVersions<'db>(OnceCell<&'db TypeshedVersions>); + +impl<'db> LazyTypeshedVersions<'db> { + #[must_use] + pub(crate) fn new() -> Self { + Self(OnceCell::new()) + } + + /// Query whether a module exists at runtime in the stdlib on a certain Python version. + /// + /// Simply probing whether a file exists in typeshed is insufficient for this question, + /// as a module in the stdlib may have been added in Python 3.10, but the typeshed stub + /// will still be available (either in a custom typeshed dir or in our vendored copy) + /// even if the user specified Python 3.8 as the target version. + /// + /// For top-level modules and packages, the VERSIONS file can always provide an unambiguous answer + /// as to whether the module exists on the specified target version. However, VERSIONS does not + /// provide comprehensive information on all submodules, meaning that this method sometimes + /// returns [`TypeshedVersionsQueryResult::MaybeExists`]. + /// See [`TypeshedVersionsQueryResult`] for more details. + #[must_use] + pub(crate) fn query_module( + &self, + module: &ModuleName, + db: &'db dyn Db, + stdlib_root: &FileSystemPath, + target_version: TargetVersion, + ) -> TypeshedVersionsQueryResult { + let versions = self.0.get_or_init(|| { + let versions_path = stdlib_root.join("VERSIONS"); + let Some(versions_file) = system_path_to_file(db.upcast(), &versions_path) else { + todo!( + "Still need to figure out how to handle VERSIONS files being deleted \ + from custom typeshed directories! Expected a file to exist at {versions_path}" + ) + }; + // TODO(Alex/Micha): If VERSIONS is invalid, + // this should invalidate not just the specific module resolution we're currently attempting, + // but all type inference that depends on any standard-library types. + // Unwrapping here is not correct... + parse_typeshed_versions(db, versions_file).as_ref().unwrap() + }); + versions.query_module(module, PyVersion::from(target_version)) + } +} + +#[salsa::tracked(return_ref)] +pub(crate) fn parse_typeshed_versions( + db: &dyn Db, + versions_file: VfsFile, +) -> Result { + let file_content = source_text(db.upcast(), versions_file); + file_content.parse() +} + +#[derive(Debug, PartialEq, Eq, Clone)] pub struct TypeshedVersionsParseError { - line_number: NonZeroU16, + line_number: Option, reason: TypeshedVersionsParseErrorKind, } @@ -20,10 +82,14 @@ impl fmt::Display for TypeshedVersionsParseError { line_number, reason, } = self; - write!( - f, - "Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}" - ) + if let Some(line_number) = line_number { + write!( + f, + "Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}" + ) + } else { + write!(f, "Error while parsing typeshed's VERSIONS file: {reason}") + } } } @@ -37,7 +103,7 @@ impl std::error::Error for TypeshedVersionsParseError { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum TypeshedVersionsParseErrorKind { TooManyLines(NonZeroUsize), UnexpectedNumberOfColons, @@ -48,6 +114,7 @@ pub enum TypeshedVersionsParseErrorKind { version: String, err: std::num::ParseIntError, }, + EmptyVersionsFile, } impl fmt::Display for TypeshedVersionsParseErrorKind { @@ -76,43 +143,100 @@ impl fmt::Display for TypeshedVersionsParseErrorKind { f, "Failed to convert '{version}' to a pair of integers due to {err}", ), + Self::EmptyVersionsFile => f.write_str("Versions file was empty!"), } } } #[derive(Debug, PartialEq, Eq)] -pub struct TypeshedVersions(FxHashMap); +pub(crate) struct TypeshedVersions(FxHashMap); impl TypeshedVersions { - pub fn len(&self) -> usize { - self.0.len() + #[must_use] + fn exact(&self, module_name: &ModuleName) -> Option<&PyVersionRange> { + self.0.get(module_name) } - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - pub fn contains_module(&self, module_name: &ModuleName) -> bool { - self.0.contains_key(module_name) - } - - pub fn module_exists_on_version( + #[must_use] + fn query_module( &self, - module: ModuleName, - version: impl Into, - ) -> bool { - let version = version.into(); - let mut module: Option = Some(module); - while let Some(module_to_try) = module { - if let Some(range) = self.0.get(&module_to_try) { - return range.contains(version); + module: &ModuleName, + target_version: PyVersion, + ) -> TypeshedVersionsQueryResult { + if let Some(range) = self.exact(module) { + if range.contains(target_version) { + TypeshedVersionsQueryResult::Exists + } else { + TypeshedVersionsQueryResult::DoesNotExist } - module = module_to_try.parent(); + } else { + let mut module = module.parent(); + while let Some(module_to_try) = module { + if let Some(range) = self.exact(&module_to_try) { + return { + if range.contains(target_version) { + TypeshedVersionsQueryResult::MaybeExists + } else { + TypeshedVersionsQueryResult::DoesNotExist + } + }; + } + module = module_to_try.parent(); + } + TypeshedVersionsQueryResult::DoesNotExist } - false } } +/// Possible answers [`LazyTypeshedVersions::query_module()`] could give to the question: +/// "Does this module exist in the stdlib at runtime on a certain target version?" +#[derive(Debug, Copy, PartialEq, Eq, Clone, Hash)] +pub(crate) enum TypeshedVersionsQueryResult { + /// The module definitely exists in the stdlib at runtime on the user-specified target version. + /// + /// For example: + /// - The target version is Python 3.8 + /// - We're querying whether the `asyncio.tasks` module exists in the stdlib + /// - The VERSIONS file contains the line `asyncio.tasks: 3.8-` + Exists, + + /// The module definitely does not exist in the stdlib on the user-specified target version. + /// + /// For example: + /// - We're querying whether the `foo` module exists in the stdlib + /// - There is no top-level `foo` module in VERSIONS + /// + /// OR: + /// - The target version is Python 3.8 + /// - We're querying whether the module `importlib.abc` exists in the stdlib + /// - The VERSIONS file contains the line `importlib.abc: 3.10-`, + /// indicating that the module was added in 3.10 + /// + /// OR: + /// - The target version is Python 3.8 + /// - We're querying whether the module `collections.abc` exists in the stdlib + /// - The VERSIONS file does not contain any information about the `collections.abc` submodule, + /// but *does* contain the line `collections: 3.10-`, + /// indicating that the entire `collections` package was added in Python 3.10. + DoesNotExist, + + /// The module potentially exists in the stdlib and, if it does, + /// it definitely exists on the user-specified target version. + /// + /// This variant is only relevant for submodules, + /// for which the typeshed VERSIONS file does not provide comprehensive information. + /// (The VERSIONS file is guaranteed to provide information about all top-level stdlib modules and packages, + /// but not necessarily about all submodules within each top-level package.) + /// + /// For example: + /// - The target version is Python 3.8 + /// - We're querying whether the `asyncio.staggered` module exists in the stdlib + /// - The typeshed VERSIONS file contains the line `asyncio: 3.8`, + /// indicating that the `asyncio` package was added in Python 3.8, + /// but does not contain any explicit information about the `asyncio.staggered` submodule. + MaybeExists, +} + impl FromStr for TypeshedVersions { type Err = TypeshedVersionsParseError; @@ -125,7 +249,7 @@ impl FromStr for TypeshedVersions { let Ok(line_number) = NonZeroU16::try_from(line_number) else { return Err(TypeshedVersionsParseError { - line_number: NonZeroU16::MAX, + line_number: None, reason: TypeshedVersionsParseErrorKind::TooManyLines(line_number), }); }; @@ -141,14 +265,14 @@ impl FromStr for TypeshedVersions { let (Some(module_name), Some(rest), None) = (parts.next(), parts.next(), parts.next()) else { return Err(TypeshedVersionsParseError { - line_number, + line_number: Some(line_number), reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons, }); }; let Some(module_name) = ModuleName::new(module_name) else { return Err(TypeshedVersionsParseError { - line_number, + line_number: Some(line_number), reason: TypeshedVersionsParseErrorKind::InvalidModuleName( module_name.to_string(), ), @@ -159,14 +283,21 @@ impl FromStr for TypeshedVersions { Ok(version) => map.insert(module_name, version), Err(reason) => { return Err(TypeshedVersionsParseError { - line_number, + line_number: Some(line_number), reason, }) } }; } - Ok(Self(map)) + if map.is_empty() { + Err(TypeshedVersionsParseError { + line_number: None, + reason: TypeshedVersionsParseErrorKind::EmptyVersionsFile, + }) + } else { + Ok(Self(map)) + } } } @@ -180,13 +311,14 @@ impl fmt::Display for TypeshedVersions { } } -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] enum PyVersionRange { AvailableFrom(RangeFrom), AvailableWithin(RangeInclusive), } impl PyVersionRange { + #[must_use] fn contains(&self, version: PyVersion) -> bool { match self { Self::AvailableFrom(inner) => inner.contains(&version), @@ -222,7 +354,7 @@ impl fmt::Display for PyVersionRange { } #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct PyVersion { +struct PyVersion { major: u8, minor: u8, } @@ -266,38 +398,25 @@ impl fmt::Display for PyVersion { } } -// TODO: unify with the PythonVersion enum in the linter/formatter crates? -#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default)] -pub enum SupportedPyVersion { - Py37, - #[default] - Py38, - Py39, - Py310, - Py311, - Py312, - Py313, -} - -impl From for PyVersion { - fn from(value: SupportedPyVersion) -> Self { +impl From for PyVersion { + fn from(value: TargetVersion) -> Self { match value { - SupportedPyVersion::Py37 => PyVersion { major: 3, minor: 7 }, - SupportedPyVersion::Py38 => PyVersion { major: 3, minor: 8 }, - SupportedPyVersion::Py39 => PyVersion { major: 3, minor: 9 }, - SupportedPyVersion::Py310 => PyVersion { + TargetVersion::Py37 => PyVersion { major: 3, minor: 7 }, + TargetVersion::Py38 => PyVersion { major: 3, minor: 8 }, + TargetVersion::Py39 => PyVersion { major: 3, minor: 9 }, + TargetVersion::Py310 => PyVersion { major: 3, minor: 10, }, - SupportedPyVersion::Py311 => PyVersion { + TargetVersion::Py311 => PyVersion { major: 3, minor: 11, }, - SupportedPyVersion::Py312 => PyVersion { + TargetVersion::Py312 => PyVersion { major: 3, minor: 12, }, - SupportedPyVersion::Py313 => PyVersion { + TargetVersion::Py313 => PyVersion { major: 3, minor: 13, }, @@ -317,7 +436,19 @@ mod tests { const TYPESHED_STDLIB_DIR: &str = "stdlib"; #[allow(unsafe_code)] - const ONE: NonZeroU16 = unsafe { NonZeroU16::new_unchecked(1) }; + const ONE: Option = Some(unsafe { NonZeroU16::new_unchecked(1) }); + + impl TypeshedVersions { + #[must_use] + fn contains_exact(&self, module: &ModuleName) -> bool { + self.exact(module).is_some() + } + + #[must_use] + fn len(&self) -> usize { + self.0.len() + } + } #[test] fn can_parse_vendored_versions_file() { @@ -334,18 +465,31 @@ mod tests { let asyncio_staggered = ModuleName::new_static("asyncio.staggered").unwrap(); let audioop = ModuleName::new_static("audioop").unwrap(); - assert!(versions.contains_module(&asyncio)); - assert!(versions.module_exists_on_version(asyncio, SupportedPyVersion::Py310)); - - assert!(versions.contains_module(&asyncio_staggered)); - assert!( - versions.module_exists_on_version(asyncio_staggered.clone(), SupportedPyVersion::Py38) + assert!(versions.contains_exact(&asyncio)); + assert_eq!( + versions.query_module(&asyncio, TargetVersion::Py310.into()), + TypeshedVersionsQueryResult::Exists ); - assert!(!versions.module_exists_on_version(asyncio_staggered, SupportedPyVersion::Py37)); - assert!(versions.contains_module(&audioop)); - assert!(versions.module_exists_on_version(audioop.clone(), SupportedPyVersion::Py312)); - assert!(!versions.module_exists_on_version(audioop, SupportedPyVersion::Py313)); + assert!(versions.contains_exact(&asyncio_staggered)); + assert_eq!( + versions.query_module(&asyncio_staggered, TargetVersion::Py38.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + versions.query_module(&asyncio_staggered, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + + assert!(versions.contains_exact(&audioop)); + assert_eq!( + versions.query_module(&audioop, TargetVersion::Py312.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + versions.query_module(&audioop, TargetVersion::Py313.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); } #[test] @@ -393,7 +537,7 @@ mod tests { let top_level_module = ModuleName::new(top_level_module) .unwrap_or_else(|| panic!("{top_level_module:?} was not a valid module name!")); - assert!(vendored_typeshed_versions.contains_module(&top_level_module)); + assert!(vendored_typeshed_versions.contains_exact(&top_level_module)); } assert!( @@ -426,30 +570,127 @@ foo: 3.8- # trailing comment foo: 3.8- "### ); + } - let foo = ModuleName::new_static("foo").unwrap(); + #[test] + fn version_within_range_parsed_correctly() { + let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap(); let bar = ModuleName::new_static("bar").unwrap(); + + assert!(parsed_versions.contains_exact(&bar)); + assert_eq!( + parsed_versions.query_module(&bar, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + parsed_versions.query_module(&bar, TargetVersion::Py310.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + parsed_versions.query_module(&bar, TargetVersion::Py311.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + } + + #[test] + fn version_from_range_parsed_correctly() { + let parsed_versions = TypeshedVersions::from_str("foo: 3.8-").unwrap(); + let foo = ModuleName::new_static("foo").unwrap(); + + assert!(parsed_versions.contains_exact(&foo)); + assert_eq!( + parsed_versions.query_module(&foo, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + assert_eq!( + parsed_versions.query_module(&foo, TargetVersion::Py38.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + parsed_versions.query_module(&foo, TargetVersion::Py311.into()), + TypeshedVersionsQueryResult::Exists + ); + } + + #[test] + fn explicit_submodule_parsed_correctly() { + let parsed_versions = TypeshedVersions::from_str("bar.baz: 3.1-3.9").unwrap(); let bar_baz = ModuleName::new_static("bar.baz").unwrap(); + + assert!(parsed_versions.contains_exact(&bar_baz)); + assert_eq!( + parsed_versions.query_module(&bar_baz, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + parsed_versions.query_module(&bar_baz, TargetVersion::Py39.into()), + TypeshedVersionsQueryResult::Exists + ); + assert_eq!( + parsed_versions.query_module(&bar_baz, TargetVersion::Py310.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + } + + #[test] + fn implicit_submodule_queried_correctly() { + let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap(); + let bar_eggs = ModuleName::new_static("bar.eggs").unwrap(); + + assert!(!parsed_versions.contains_exact(&bar_eggs)); + assert_eq!( + parsed_versions.query_module(&bar_eggs, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::MaybeExists + ); + assert_eq!( + parsed_versions.query_module(&bar_eggs, TargetVersion::Py310.into()), + TypeshedVersionsQueryResult::MaybeExists + ); + assert_eq!( + parsed_versions.query_module(&bar_eggs, TargetVersion::Py311.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + } + + #[test] + fn nonexistent_module_queried_correctly() { + let parsed_versions = TypeshedVersions::from_str("eggs: 3.8-").unwrap(); let spam = ModuleName::new_static("spam").unwrap(); - assert!(parsed_versions.contains_module(&foo)); - assert!(!parsed_versions.module_exists_on_version(foo.clone(), SupportedPyVersion::Py37)); - assert!(parsed_versions.module_exists_on_version(foo.clone(), SupportedPyVersion::Py38)); - assert!(parsed_versions.module_exists_on_version(foo, SupportedPyVersion::Py311)); + assert!(!parsed_versions.contains_exact(&spam)); + assert_eq!( + parsed_versions.query_module(&spam, TargetVersion::Py37.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + assert_eq!( + parsed_versions.query_module(&spam, TargetVersion::Py313.into()), + TypeshedVersionsQueryResult::DoesNotExist + ); + } - assert!(parsed_versions.contains_module(&bar)); - assert!(parsed_versions.module_exists_on_version(bar.clone(), SupportedPyVersion::Py37)); - assert!(parsed_versions.module_exists_on_version(bar.clone(), SupportedPyVersion::Py310)); - assert!(!parsed_versions.module_exists_on_version(bar, SupportedPyVersion::Py311)); - - assert!(parsed_versions.contains_module(&bar_baz)); - assert!(parsed_versions.module_exists_on_version(bar_baz.clone(), SupportedPyVersion::Py37)); - assert!(parsed_versions.module_exists_on_version(bar_baz.clone(), SupportedPyVersion::Py39)); - assert!(!parsed_versions.module_exists_on_version(bar_baz, SupportedPyVersion::Py310)); - - assert!(!parsed_versions.contains_module(&spam)); - assert!(!parsed_versions.module_exists_on_version(spam.clone(), SupportedPyVersion::Py37)); - assert!(!parsed_versions.module_exists_on_version(spam, SupportedPyVersion::Py313)); + #[test] + fn invalid_empty_versions_file() { + assert_eq!( + TypeshedVersions::from_str(""), + Err(TypeshedVersionsParseError { + line_number: None, + reason: TypeshedVersionsParseErrorKind::EmptyVersionsFile + }) + ); + assert_eq!( + TypeshedVersions::from_str(" "), + Err(TypeshedVersionsParseError { + line_number: None, + reason: TypeshedVersionsParseErrorKind::EmptyVersionsFile + }) + ); + assert_eq!( + TypeshedVersions::from_str(" \n \n \n "), + Err(TypeshedVersionsParseError { + line_number: None, + reason: TypeshedVersionsParseErrorKind::EmptyVersionsFile + }) + ); } #[test] @@ -465,7 +706,7 @@ foo: 3.8- # trailing comment assert_eq!( TypeshedVersions::from_str(&massive_versions_file), Err(TypeshedVersionsParseError { - line_number: NonZeroU16::MAX, + line_number: None, reason: TypeshedVersionsParseErrorKind::TooManyLines( NonZeroUsize::new(too_many + 1 - offset).unwrap() ) diff --git a/crates/red_knot_python_semantic/src/semantic_model.rs b/crates/red_knot_python_semantic/src/semantic_model.rs index 290285cde8..9e2afb8728 100644 --- a/crates/red_knot_python_semantic/src/semantic_model.rs +++ b/crates/red_knot_python_semantic/src/semantic_model.rs @@ -179,7 +179,9 @@ impl HasTy for ast::Alias { #[cfg(test)] mod tests { - use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings}; + use red_knot_module_resolver::{ + set_module_resolution_settings, RawModuleResolutionSettings, TargetVersion, + }; use ruff_db::file_system::FileSystemPathBuf; use ruff_db::parsed::parsed_module; use ruff_db::vfs::system_path_to_file; @@ -192,11 +194,12 @@ mod tests { let mut db = TestDb::new(); set_module_resolution_settings( &mut db, - ModuleResolutionSettings { + RawModuleResolutionSettings { extra_paths: vec![], workspace_root: FileSystemPathBuf::from("/src"), site_packages: None, custom_typeshed: None, + target_version: TargetVersion::Py38, }, ); diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 5e82c0c712..30deaf15df 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -271,7 +271,9 @@ pub struct IntersectionType<'db> { #[cfg(test)] mod tests { - use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings}; + use red_knot_module_resolver::{ + set_module_resolution_settings, RawModuleResolutionSettings, TargetVersion, + }; use ruff_db::file_system::FileSystemPathBuf; use ruff_db::parsed::parsed_module; use ruff_db::vfs::system_path_to_file; @@ -287,7 +289,8 @@ mod tests { let mut db = TestDb::new(); set_module_resolution_settings( &mut db, - ModuleResolutionSettings { + RawModuleResolutionSettings { + target_version: TargetVersion::Py38, extra_paths: vec![], workspace_root: FileSystemPathBuf::from("/src"), site_packages: None, diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 59811fc9ae..173ac48431 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -598,7 +598,9 @@ impl<'db> TypeInferenceBuilder<'db> { #[cfg(test)] mod tests { - use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings}; + use red_knot_module_resolver::{ + set_module_resolution_settings, RawModuleResolutionSettings, TargetVersion, + }; use ruff_db::file_system::FileSystemPathBuf; use ruff_db::vfs::system_path_to_file; use ruff_python_ast::name::Name; @@ -611,7 +613,8 @@ mod tests { set_module_resolution_settings( &mut db, - ModuleResolutionSettings { + RawModuleResolutionSettings { + target_version: TargetVersion::Py38, extra_paths: Vec::new(), workspace_root: FileSystemPathBuf::from("/src"), site_packages: None, diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index d482580885..800d2f05e5 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -2,7 +2,9 @@ use red_knot::program::Program; use red_knot::Workspace; -use red_knot_module_resolver::{set_module_resolution_settings, ModuleResolutionSettings}; +use red_knot_module_resolver::{ + set_module_resolution_settings, RawModuleResolutionSettings, TargetVersion, +}; use ruff_benchmark::criterion::{ criterion_group, criterion_main, BatchSize, Criterion, Throughput, }; @@ -70,11 +72,12 @@ fn setup_case() -> Case { set_module_resolution_settings( &mut program, - ModuleResolutionSettings { + RawModuleResolutionSettings { extra_paths: vec![], workspace_root: workspace_root.to_path_buf(), site_packages: None, custom_typeshed: None, + target_version: TargetVersion::Py38, }, );