red-knot: Port module resolver to salsa (#11835)

This commit is contained in:
Micha Reiser 2024-06-18 13:11:58 +01:00 committed by GitHub
parent 98b13b9844
commit 26ac805e6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 1953 additions and 36 deletions

View file

@ -1,6 +1,6 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::path::Path;
use std::path::{Path, StripPrefixError};
use camino::{Utf8Path, Utf8PathBuf};
use filetime::FileTime;
@ -88,6 +88,245 @@ impl FileSystemPath {
self.0.extension()
}
/// Determines whether `base` is a prefix of `self`.
///
/// Only considers whole path components to match.
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// let path = FileSystemPath::new("/etc/passwd");
///
/// assert!(path.starts_with("/etc"));
/// assert!(path.starts_with("/etc/"));
/// assert!(path.starts_with("/etc/passwd"));
/// assert!(path.starts_with("/etc/passwd/")); // extra slash is okay
/// assert!(path.starts_with("/etc/passwd///")); // multiple extra slashes are okay
///
/// assert!(!path.starts_with("/e"));
/// assert!(!path.starts_with("/etc/passwd.txt"));
///
/// assert!(!FileSystemPath::new("/etc/foo.rs").starts_with("/etc/foo"));
/// ```
#[inline]
#[must_use]
pub fn starts_with(&self, base: impl AsRef<FileSystemPath>) -> bool {
self.0.starts_with(base.as_ref())
}
/// Determines whether `child` is a suffix of `self`.
///
/// Only considers whole path components to match.
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// let path = FileSystemPath::new("/etc/resolv.conf");
///
/// assert!(path.ends_with("resolv.conf"));
/// assert!(path.ends_with("etc/resolv.conf"));
/// assert!(path.ends_with("/etc/resolv.conf"));
///
/// assert!(!path.ends_with("/resolv.conf"));
/// assert!(!path.ends_with("conf")); // use .extension() instead
/// ```
#[inline]
#[must_use]
pub fn ends_with(&self, child: impl AsRef<FileSystemPath>) -> bool {
self.0.ends_with(child.as_ref())
}
/// Returns the `FileSystemPath` without its final component, if there is one.
///
/// Returns [`None`] if the path terminates in a root or prefix.
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// let path = FileSystemPath::new("/foo/bar");
/// let parent = path.parent().unwrap();
/// assert_eq!(parent, FileSystemPath::new("/foo"));
///
/// let grand_parent = parent.parent().unwrap();
/// assert_eq!(grand_parent, FileSystemPath::new("/"));
/// assert_eq!(grand_parent.parent(), None);
/// ```
#[inline]
#[must_use]
pub fn parent(&self) -> Option<&FileSystemPath> {
self.0.parent().map(FileSystemPath::new)
}
/// Produces an iterator over the [`camino::Utf8Component`]s of the path.
///
/// When parsing the path, there is a small amount of normalization:
///
/// * Repeated separators are ignored, so `a/b` and `a//b` both have
/// `a` and `b` as components.
///
/// * Occurrences of `.` are normalized away, except if they are at the
/// beginning of the path. For example, `a/./b`, `a/b/`, `a/b/.` and
/// `a/b` all have `a` and `b` as components, but `./a/b` starts with
/// an additional [`CurDir`] component.
///
/// * A trailing slash is normalized away, `/a/b` and `/a/b/` are equivalent.
///
/// Note that no other normalization takes place; in particular, `a/c`
/// and `a/b/../c` are distinct, to account for the possibility that `b`
/// is a symbolic link (so its parent isn't `a`).
///
/// # Examples
///
/// ```
/// use camino::{Utf8Component};
/// use ruff_db::file_system::FileSystemPath;
///
/// let mut components = FileSystemPath::new("/tmp/foo.txt").components();
///
/// assert_eq!(components.next(), Some(Utf8Component::RootDir));
/// assert_eq!(components.next(), Some(Utf8Component::Normal("tmp")));
/// assert_eq!(components.next(), Some(Utf8Component::Normal("foo.txt")));
/// assert_eq!(components.next(), None)
/// ```
///
/// [`CurDir`]: camino::Utf8Component::CurDir
#[inline]
pub fn components(&self) -> camino::Utf8Components {
self.0.components()
}
/// Returns the final component of the `FileSystemPath`, if there is one.
///
/// If the path is a normal file, this is the file name. If it's the path of a directory, this
/// is the directory name.
///
/// Returns [`None`] if the path terminates in `..`.
///
/// # Examples
///
/// ```
/// use camino::Utf8Path;
/// use ruff_db::file_system::FileSystemPath;
///
/// assert_eq!(Some("bin"), FileSystemPath::new("/usr/bin/").file_name());
/// assert_eq!(Some("foo.txt"), FileSystemPath::new("tmp/foo.txt").file_name());
/// assert_eq!(Some("foo.txt"), FileSystemPath::new("foo.txt/.").file_name());
/// assert_eq!(Some("foo.txt"), FileSystemPath::new("foo.txt/.//").file_name());
/// assert_eq!(None, FileSystemPath::new("foo.txt/..").file_name());
/// assert_eq!(None, FileSystemPath::new("/").file_name());
/// ```
#[inline]
#[must_use]
pub fn file_name(&self) -> Option<&str> {
self.0.file_name()
}
/// Extracts the stem (non-extension) portion of [`self.file_name`].
///
/// [`self.file_name`]: FileSystemPath::file_name
///
/// The stem is:
///
/// * [`None`], if there is no file name;
/// * The entire file name if there is no embedded `.`;
/// * The entire file name if the file name begins with `.` and has no other `.`s within;
/// * Otherwise, the portion of the file name before the final `.`
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// assert_eq!("foo", FileSystemPath::new("foo.rs").file_stem().unwrap());
/// assert_eq!("foo.tar", FileSystemPath::new("foo.tar.gz").file_stem().unwrap());
/// ```
#[inline]
#[must_use]
pub fn file_stem(&self) -> Option<&str> {
self.0.file_stem()
}
/// Returns a path that, when joined onto `base`, yields `self`.
///
/// # Errors
///
/// If `base` is not a prefix of `self` (i.e., [`starts_with`]
/// returns `false`), returns [`Err`].
///
/// [`starts_with`]: FileSystemPath::starts_with
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
///
/// let path = FileSystemPath::new("/test/haha/foo.txt");
///
/// assert_eq!(path.strip_prefix("/"), Ok(FileSystemPath::new("test/haha/foo.txt")));
/// assert_eq!(path.strip_prefix("/test"), Ok(FileSystemPath::new("haha/foo.txt")));
/// assert_eq!(path.strip_prefix("/test/"), Ok(FileSystemPath::new("haha/foo.txt")));
/// assert_eq!(path.strip_prefix("/test/haha/foo.txt"), Ok(FileSystemPath::new("")));
/// assert_eq!(path.strip_prefix("/test/haha/foo.txt/"), Ok(FileSystemPath::new("")));
///
/// assert!(path.strip_prefix("test").is_err());
/// assert!(path.strip_prefix("/haha").is_err());
///
/// let prefix = FileSystemPathBuf::from("/test/");
/// assert_eq!(path.strip_prefix(prefix), Ok(FileSystemPath::new("haha/foo.txt")));
/// ```
#[inline]
pub fn strip_prefix(
&self,
base: impl AsRef<FileSystemPath>,
) -> std::result::Result<&FileSystemPath, StripPrefixError> {
self.0.strip_prefix(base.as_ref()).map(FileSystemPath::new)
}
/// Creates an owned [`FileSystemPathBuf`] with `path` adjoined to `self`.
///
/// See [`std::path::PathBuf::push`] for more details on what it means to adjoin a path.
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
///
/// assert_eq!(FileSystemPath::new("/etc").join("passwd"), FileSystemPathBuf::from("/etc/passwd"));
/// ```
#[inline]
#[must_use]
pub fn join(&self, path: impl AsRef<FileSystemPath>) -> FileSystemPathBuf {
FileSystemPathBuf::from_utf8_path_buf(self.0.join(&path.as_ref().0))
}
/// Creates an owned [`FileSystemPathBuf`] like `self` but with the given extension.
///
/// See [`std::path::PathBuf::set_extension`] for more details.
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
///
/// let path = FileSystemPath::new("foo.rs");
/// assert_eq!(path.with_extension("txt"), FileSystemPathBuf::from("foo.txt"));
///
/// let path = FileSystemPath::new("foo.tar.gz");
/// assert_eq!(path.with_extension(""), FileSystemPathBuf::from("foo.tar"));
/// assert_eq!(path.with_extension("xz"), FileSystemPathBuf::from("foo.tar.xz"));
/// assert_eq!(path.with_extension("").with_extension("txt"), FileSystemPathBuf::from("foo.txt"));
/// ```
#[inline]
pub fn with_extension(&self, extension: &str) -> FileSystemPathBuf {
FileSystemPathBuf::from_utf8_path_buf(self.0.with_extension(extension))
}
/// Converts the path to an owned [`FileSystemPathBuf`].
pub fn to_path_buf(&self) -> FileSystemPathBuf {
FileSystemPathBuf(self.0.to_path_buf())
@ -104,6 +343,10 @@ impl FileSystemPath {
pub fn as_std_path(&self) -> &Path {
self.0.as_std_path()
}
pub fn from_std_path(path: &Path) -> Option<&FileSystemPath> {
Some(FileSystemPath::new(Utf8Path::from_path(path)?))
}
}
/// Owned path to a file or directory stored in [`FileSystem`].
@ -113,12 +356,6 @@ impl FileSystemPath {
#[derive(Eq, PartialEq, Clone, Hash, PartialOrd, Ord)]
pub struct FileSystemPathBuf(Utf8PathBuf);
impl Default for FileSystemPathBuf {
fn default() -> Self {
Self::new()
}
}
impl FileSystemPathBuf {
pub fn new() -> Self {
Self(Utf8PathBuf::new())
@ -128,12 +365,66 @@ impl FileSystemPathBuf {
Self(path)
}
pub fn from_path_buf(
path: std::path::PathBuf,
) -> std::result::Result<Self, std::path::PathBuf> {
Utf8PathBuf::from_path_buf(path).map(Self)
}
/// Extends `self` with `path`.
///
/// If `path` is absolute, it replaces the current path.
///
/// On Windows:
///
/// * if `path` has a root but no prefix (e.g., `\windows`), it
/// replaces everything except for the prefix (if any) of `self`.
/// * if `path` has a prefix but no root, it replaces `self`.
///
/// # Examples
///
/// Pushing a relative path extends the existing path:
///
/// ```
/// use ruff_db::file_system::FileSystemPathBuf;
///
/// let mut path = FileSystemPathBuf::from("/tmp");
/// path.push("file.bk");
/// assert_eq!(path, FileSystemPathBuf::from("/tmp/file.bk"));
/// ```
///
/// Pushing an absolute path replaces the existing path:
///
/// ```
///
/// use ruff_db::file_system::FileSystemPathBuf;
///
/// let mut path = FileSystemPathBuf::from("/tmp");
/// path.push("/etc");
/// assert_eq!(path, FileSystemPathBuf::from("/etc"));
/// ```
pub fn push(&mut self, path: impl AsRef<FileSystemPath>) {
self.0.push(&path.as_ref().0);
}
#[inline]
pub fn as_path(&self) -> &FileSystemPath {
FileSystemPath::new(&self.0)
}
}
impl From<&str> for FileSystemPathBuf {
fn from(value: &str) -> Self {
FileSystemPathBuf::from_utf8_path_buf(Utf8PathBuf::from(value))
}
}
impl Default for FileSystemPathBuf {
fn default() -> Self {
Self::new()
}
}
impl AsRef<FileSystemPath> for FileSystemPathBuf {
#[inline]
fn as_ref(&self) -> &FileSystemPath {

View file

@ -1,8 +1,8 @@
use std::collections::BTreeMap;
use std::sync::{Arc, RwLock, RwLockWriteGuard};
use camino::{Utf8Path, Utf8PathBuf};
use filetime::FileTime;
use rustc_hash::FxHashMap;
use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result};
@ -41,7 +41,7 @@ impl MemoryFileSystem {
let fs = Self {
inner: Arc::new(MemoryFileSystemInner {
by_path: RwLock::new(FxHashMap::default()),
by_path: RwLock::new(BTreeMap::default()),
cwd: cwd.clone(),
}),
};
@ -80,16 +80,36 @@ impl MemoryFileSystem {
/// The operation overrides the content for an existing file with the same normalized `path`.
///
/// Enclosing directories are automatically created if they don't exist.
pub fn write_file(&self, path: impl AsRef<FileSystemPath>, content: String) -> Result<()> {
pub fn write_file(
&self,
path: impl AsRef<FileSystemPath>,
content: impl ToString,
) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
get_or_create_file(&mut by_path, &normalized)?.content = content;
get_or_create_file(&mut by_path, &normalized)?.content = content.to_string();
Ok(())
}
pub fn remove_file(&self, path: impl AsRef<FileSystemPath>) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
match by_path.entry(normalized) {
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
Entry::File(_) => {
entry.remove();
Ok(())
}
Entry::Directory(_) => Err(is_a_directory()),
},
std::collections::btree_map::Entry::Vacant(_) => Err(not_found()),
}
}
/// Sets the last modified timestamp of the file stored at `path` to now.
///
/// Creates a new file if the file at `path` doesn't exist.
@ -109,6 +129,38 @@ impl MemoryFileSystem {
create_dir_all(&mut by_path, &normalized)
}
/// Deletes the directory at `path`.
///
/// ## Errors
/// * If the directory is not empty
/// * The `path` is not a directory
/// * The `path` does not exist
pub fn remove_directory(&self, path: impl AsRef<FileSystemPath>) -> Result<()> {
let mut by_path = self.inner.by_path.write().unwrap();
let normalized = normalize_path(path.as_ref(), &self.inner.cwd);
// Test if the directory is empty
// Skip the directory path itself
for (maybe_child, _) in by_path.range(normalized.clone()..).skip(1) {
if maybe_child.starts_with(&normalized) {
return Err(directory_not_empty());
} else if !maybe_child.as_str().starts_with(normalized.as_str()) {
break;
}
}
match by_path.entry(normalized.clone()) {
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
Entry::Directory(_) => {
entry.remove();
Ok(())
}
Entry::File(_) => Err(not_a_directory()),
},
std::collections::btree_map::Entry::Vacant(_) => Err(not_found()),
}
}
}
impl FileSystem for MemoryFileSystem {
@ -169,7 +221,7 @@ impl std::fmt::Debug for MemoryFileSystem {
}
struct MemoryFileSystemInner {
by_path: RwLock<FxHashMap<Utf8PathBuf, Entry>>,
by_path: RwLock<BTreeMap<Utf8PathBuf, Entry>>,
cwd: Utf8PathBuf,
}
@ -212,6 +264,10 @@ fn not_a_directory() -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::Other, "Not a directory")
}
fn directory_not_empty() -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::Other, "directory not empty")
}
/// Normalizes the path by removing `.` and `..` components and transform the path into an absolute path.
///
/// Adapted from https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61
@ -249,7 +305,7 @@ fn normalize_path(path: &FileSystemPath, cwd: &Utf8Path) -> Utf8PathBuf {
}
fn create_dir_all(
paths: &mut RwLockWriteGuard<FxHashMap<Utf8PathBuf, Entry>>,
paths: &mut RwLockWriteGuard<BTreeMap<Utf8PathBuf, Entry>>,
normalized: &Utf8Path,
) -> Result<()> {
let mut path = Utf8PathBuf::new();
@ -271,7 +327,7 @@ fn create_dir_all(
}
fn get_or_create_file<'a>(
paths: &'a mut RwLockWriteGuard<FxHashMap<Utf8PathBuf, Entry>>,
paths: &'a mut RwLockWriteGuard<BTreeMap<Utf8PathBuf, Entry>>,
normalized: &Utf8Path,
) -> Result<&'a mut File> {
if let Some(parent) = normalized.parent() {
@ -293,10 +349,11 @@ fn get_or_create_file<'a>(
#[cfg(test)]
mod tests {
use crate::file_system::{FileSystem, FileSystemPath, MemoryFileSystem, Result};
use std::io::ErrorKind;
use std::time::Duration;
use crate::file_system::{FileSystem, FileSystemPath, MemoryFileSystem, Result};
/// Creates a file system with the given files.
///
/// The content of all files will be empty.
@ -470,4 +527,94 @@ mod tests {
Ok(())
}
#[test]
fn remove_file() -> Result<()> {
let fs = with_files(["a/a.py", "b.py"]);
fs.remove_file("a/a.py")?;
assert!(!fs.exists(FileSystemPath::new("a/a.py")));
// It doesn't delete the enclosing directories
assert!(fs.exists(FileSystemPath::new("a")));
// It doesn't delete unrelated files.
assert!(fs.exists(FileSystemPath::new("b.py")));
Ok(())
}
#[test]
fn remove_non_existing_file() {
let fs = with_files(["b.py"]);
let error = fs.remove_file("a.py").unwrap_err();
assert_eq!(error.kind(), ErrorKind::NotFound);
}
#[test]
fn remove_file_that_is_a_directory() -> Result<()> {
let fs = MemoryFileSystem::new();
fs.create_directory_all("a")?;
let error = fs.remove_file("a").unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
Ok(())
}
#[test]
fn remove_directory() -> Result<()> {
let fs = with_files(["b.py"]);
fs.create_directory_all("a")?;
fs.remove_directory("a")?;
assert!(!fs.exists(FileSystemPath::new("a")));
// It doesn't delete unrelated files.
assert!(fs.exists(FileSystemPath::new("b.py")));
Ok(())
}
#[test]
fn remove_non_empty_directory() {
let fs = with_files(["a/a.py"]);
let error = fs.remove_directory("a").unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
}
#[test]
fn remove_directory_with_files_that_start_with_the_same_string() -> Result<()> {
let fs = with_files(["foo_bar.py", "foob.py"]);
fs.create_directory_all("foo")?;
fs.remove_directory("foo").unwrap();
assert!(!fs.exists(FileSystemPath::new("foo")));
assert!(fs.exists(FileSystemPath::new("foo_bar.py")));
assert!(fs.exists(FileSystemPath::new("foob.py")));
Ok(())
}
#[test]
fn remove_non_existing_directory() {
let fs = MemoryFileSystem::new();
let error = fs.remove_directory("a").unwrap_err();
assert_eq!(error.kind(), ErrorKind::NotFound);
}
#[test]
fn remove_directory_that_is_a_file() {
let fs = with_files(["a"]);
let error = fs.remove_directory("a").unwrap_err();
assert_eq!(error.kind(), ErrorKind::Other);
}
}

View file

@ -1,6 +1,7 @@
use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result};
use filetime::FileTime;
use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result};
pub struct OsFileSystem;
impl OsFileSystem {
@ -15,6 +16,10 @@ impl OsFileSystem {
fn permissions(_metadata: &std::fs::Metadata) -> Option<u32> {
None
}
pub fn snapshot(&self) -> Self {
Self
}
}
impl FileSystem for OsFileSystem {

View file

@ -54,7 +54,6 @@ impl std::fmt::Debug for SourceText {
#[cfg(test)]
mod tests {
use filetime::FileTime;
use salsa::EventKind;
use ruff_source_file::OneIndexed;
@ -80,7 +79,7 @@ mod tests {
db.file_system_mut()
.write_file(path, "x = 20".to_string())
.unwrap();
file.set_revision(&mut db).to(FileTime::now().into());
file.touch(&mut db);
assert_eq!(&*source_text(&db, file), "x = 20");

View file

@ -259,6 +259,41 @@ impl VfsFile {
db.vfs().read(db, path)
}
/// Refreshes the file metadata by querying the file system if needed.
/// TODO: The API should instead take all observed changes from the file system directly
/// and then apply the VfsFile status accordingly. But for now, this is sufficient.
pub fn touch_path(db: &mut dyn Db, path: &VfsPath) {
Self::touch_impl(db, path, None);
}
pub fn touch(self, db: &mut dyn Db) {
let path = self.path(db).clone();
Self::touch_impl(db, &path, Some(self));
}
/// Private method providing the implementation for [`Self::touch_path`] and [`Self::touch`].
fn touch_impl(db: &mut dyn Db, path: &VfsPath, file: Option<VfsFile>) {
match path {
VfsPath::FileSystem(path) => {
let metadata = db.file_system().metadata(path);
let (status, revision) = match metadata {
Ok(metadata) if metadata.file_type().is_file() => {
(FileStatus::Exists, metadata.revision())
}
_ => (FileStatus::Deleted, FileRevision::zero()),
};
let file = file.unwrap_or_else(|| db.vfs().file_system(db, path));
file.set_status(db).to(status);
file.set_revision(db).to(revision);
}
VfsPath::Vendored(_) => {
// Readonly, can never be out of date.
}
}
}
}
#[derive(Default, Debug)]

View file

@ -111,6 +111,7 @@ impl VfsPath {
/// Returns `Some` if the path is a file system path that points to a path on disk.
#[must_use]
#[inline]
pub fn into_file_system_path_buf(self) -> Option<FileSystemPathBuf> {
match self {
VfsPath::FileSystem(path) => Some(path),
@ -118,12 +119,38 @@ impl VfsPath {
}
}
#[must_use]
#[inline]
pub fn as_file_system_path(&self) -> Option<&FileSystemPath> {
match self {
VfsPath::FileSystem(path) => Some(path.as_path()),
VfsPath::Vendored(_) => None,
}
}
/// Returns `true` if the path is a file system path that points to a path on disk.
#[must_use]
#[inline]
pub const fn is_file_system_path(&self) -> bool {
matches!(self, VfsPath::FileSystem(_))
}
/// Returns `true` if the path is a vendored path.
#[must_use]
#[inline]
pub const fn is_vendored_path(&self) -> bool {
matches!(self, VfsPath::Vendored(_))
}
#[must_use]
#[inline]
pub fn as_vendored_path(&self) -> Option<&VendoredPath> {
match self {
VfsPath::Vendored(path) => Some(path.as_path()),
VfsPath::FileSystem(_) => None,
}
}
/// Yields the underlying [`str`] slice.
pub fn as_str(&self) -> &str {
match self {
@ -138,3 +165,84 @@ impl AsRef<str> for VfsPath {
self.as_str()
}
}
impl From<FileSystemPathBuf> for VfsPath {
fn from(value: FileSystemPathBuf) -> Self {
Self::FileSystem(value)
}
}
impl From<&FileSystemPath> for VfsPath {
fn from(value: &FileSystemPath) -> Self {
VfsPath::FileSystem(value.to_path_buf())
}
}
impl From<VendoredPathBuf> for VfsPath {
fn from(value: VendoredPathBuf) -> Self {
Self::Vendored(value)
}
}
impl From<&VendoredPath> for VfsPath {
fn from(value: &VendoredPath) -> Self {
Self::Vendored(value.to_path_buf())
}
}
impl PartialEq<FileSystemPath> for VfsPath {
#[inline]
fn eq(&self, other: &FileSystemPath) -> bool {
self.as_file_system_path()
.is_some_and(|self_path| self_path == other)
}
}
impl PartialEq<VfsPath> for FileSystemPath {
#[inline]
fn eq(&self, other: &VfsPath) -> bool {
other == self
}
}
impl PartialEq<FileSystemPathBuf> for VfsPath {
#[inline]
fn eq(&self, other: &FileSystemPathBuf) -> bool {
self == other.as_path()
}
}
impl PartialEq<VfsPath> for FileSystemPathBuf {
fn eq(&self, other: &VfsPath) -> bool {
other == self
}
}
impl PartialEq<VendoredPath> for VfsPath {
#[inline]
fn eq(&self, other: &VendoredPath) -> bool {
self.as_vendored_path()
.is_some_and(|self_path| self_path == other)
}
}
impl PartialEq<VfsPath> for VendoredPath {
#[inline]
fn eq(&self, other: &VfsPath) -> bool {
other == self
}
}
impl PartialEq<VendoredPathBuf> for VfsPath {
#[inline]
fn eq(&self, other: &VendoredPathBuf) -> bool {
other.as_path() == self
}
}
impl PartialEq<VfsPath> for VendoredPathBuf {
#[inline]
fn eq(&self, other: &VfsPath) -> bool {
other == self
}
}

View file

@ -10,9 +10,6 @@ documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[lib]
doctest = false
[dependencies]
ruff_db = { workspace = true }
ruff_index = { workspace = true }
@ -24,14 +21,17 @@ ruff_text_size = { workspace = true }
bitflags = { workspace = true }
is-macro = { workspace = true }
salsa = { workspace = true, optional = true }
smol_str = { workspace = true, optional = true }
tracing = { workspace = true, optional = true }
rustc-hash = { workspace = true }
[dev-dependencies]
anyhow = { workspace = true }
ruff_python_parser = { workspace = true }
tempfile = { workspace = true }
[lints]
workspace = true
[features]
red_knot = ["dep:salsa", "dep:tracing"]
red_knot = ["dep:salsa", "dep:smol_str", "dep:tracing"]

View file

@ -1,60 +1,101 @@
use crate::module::resolver::{
file_to_module, internal::ModuleNameIngredient, internal::ModuleResolverSearchPaths,
resolve_module_query,
};
use ruff_db::{Db as SourceDb, Upcast};
use salsa::DbWithJar;
// Salsa doesn't support a struct without fields, so allow the clippy lint for now.
#[allow(clippy::empty_structs_with_brackets)]
#[salsa::jar(db=Db)]
pub struct Jar();
pub struct Jar(
ModuleNameIngredient,
ModuleResolverSearchPaths,
resolve_module_query,
file_to_module,
);
/// Database giving access to semantic information about a Python program.
pub trait Db: SourceDb + DbWithJar<Jar> + Upcast<dyn SourceDb> {}
#[cfg(test)]
mod tests {
pub(crate) mod tests {
use super::{Db, Jar};
use ruff_db::file_system::{FileSystem, MemoryFileSystem};
use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem};
use ruff_db::vfs::Vfs;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use salsa::DebugWithDb;
use std::sync::Arc;
#[salsa::db(Jar, SourceJar)]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: MemoryFileSystem,
file_system: TestFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}
impl TestDb {
#[allow(unused)]
pub(crate) fn new() -> Self {
Self {
storage: salsa::Storage::default(),
file_system: MemoryFileSystem::default(),
file_system: TestFileSystem::Memory(MemoryFileSystem::default()),
events: std::sync::Arc::default(),
vfs: Vfs::with_stubbed_vendored(),
}
}
#[allow(unused)]
/// Returns the memory file system.
///
/// ## Panics
/// If this test db isn't using a memory file system.
pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem {
&self.file_system
if let TestFileSystem::Memory(fs) = &self.file_system {
fs
} else {
panic!("The test db is not using a memory file system");
}
}
/// Uses the real file system instead of the memory file system.
///
/// This useful for testing advanced file system features like permissions, symlinks, etc.
///
/// Note that any files written to the memory file system won't be copied over.
#[allow(unused)]
pub(crate) fn memory_file_system_mut(&mut self) -> &mut MemoryFileSystem {
&mut self.file_system
pub(crate) fn with_os_file_system(&mut self) {
self.file_system = TestFileSystem::Os(OsFileSystem);
}
#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
/// Takes the salsa events.
///
/// ## Panics
/// If there are any pending salsa snapshots.
pub(crate) fn take_sale_events(&mut self) -> Vec<salsa::Event> {
let inner = Arc::get_mut(&mut self.events).expect("no pending salsa snapshots");
let events = inner.get_mut().unwrap();
std::mem::take(&mut *events)
}
/// Clears the salsa events.
///
/// ## Panics
/// If there are any pending salsa snapshots.
pub(crate) fn clear_salsa_events(&mut self) {
self.take_sale_events();
}
}
impl SourceDb for TestDb {
fn file_system(&self) -> &dyn FileSystem {
&self.file_system
match &self.file_system {
TestFileSystem::Memory(fs) => fs,
TestFileSystem::Os(fs) => fs,
}
}
fn vfs(&self) -> &Vfs {
@ -83,9 +124,18 @@ mod tests {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
vfs: self.vfs.snapshot(),
file_system: self.file_system.snapshot(),
file_system: match &self.file_system {
TestFileSystem::Memory(memory) => TestFileSystem::Memory(memory.snapshot()),
TestFileSystem::Os(fs) => TestFileSystem::Os(fs.snapshot()),
},
events: self.events.clone(),
})
}
}
enum TestFileSystem {
Memory(MemoryFileSystem),
#[allow(unused)]
Os(OsFileSystem),
}
}

View file

@ -7,6 +7,8 @@ mod db;
mod definition;
mod globals;
mod model;
#[cfg(feature = "red_knot")]
pub mod module;
mod nodes;
mod reference;
mod scope;

View file

@ -0,0 +1,332 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::FileSystemPath;
use ruff_db::vfs::{VfsFile, VfsPath};
use ruff_python_stdlib::identifiers::is_identifier;
use crate::Db;
pub mod resolver;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
pub fn new(name: &str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new(name))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use ruff_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new_static(name))
}
fn new_from_smol(name: smol_str::SmolStr) -> Option<Self> {
if name.is_empty() {
return None;
}
if name.split('.').all(is_identifier) {
Some(Self(name))
} else {
None
}
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use ruff_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use ruff_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(smol_str::SmolStr::new(parent)))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use ruff_python_semantic::module::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
fn from_relative_path(path: &FileSystemPath) -> Option<Self> {
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
path.parent()?
} else {
path
};
let name = if let Some(parent) = path.parent() {
let mut name = String::with_capacity(path.as_str().len());
for component in parent.components() {
name.push_str(component.as_os_str().to_str()?);
name.push('.');
}
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
name.push_str(path.file_stem().unwrap());
smol_str::SmolStr::from(name)
} else {
smol_str::SmolStr::new(path.file_stem()?)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> VfsFile {
self.inner.file
}
/// The search path from which the module was resolved.
pub fn search_path(&self) -> &ModuleSearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
impl salsa::DebugWithDb<dyn Db> for Module {
fn fmt(&self, f: &mut Formatter<'_>, db: &dyn Db) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file().debug(db.upcast()))
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: ModuleSearchPath,
file: VfsFile,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}
/// A search path in which to search modules.
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
///
/// Cloning a search path is cheap because it's an `Arc`.
#[derive(Clone, PartialEq, Eq)]
pub struct ModuleSearchPath {
inner: Arc<ModuleSearchPathInner>,
}
impl ModuleSearchPath {
pub fn new<P>(path: P, kind: ModuleSearchPathKind) -> Self
where
P: Into<VfsPath>,
{
Self {
inner: Arc::new(ModuleSearchPathInner {
path: path.into(),
kind,
}),
}
}
/// Determine whether this is a first-party, third-party or standard-library search path
pub fn kind(&self) -> ModuleSearchPathKind {
self.inner.kind
}
/// Return the location of the search path on the file system
pub fn path(&self) -> &VfsPath {
&self.inner.path
}
}
impl std::fmt::Debug for ModuleSearchPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ModuleSearchPath")
.field("path", &self.inner.path)
.field("kind", &self.kind())
.finish()
}
}
#[derive(Eq, PartialEq)]
struct ModuleSearchPathInner {
path: VfsPath,
kind: ModuleSearchPathKind,
}
/// Enumeration of the different kinds of search paths type checkers are expected to support.
///
/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the
/// priority that we want to give these modules when resolving them.
/// This is roughly [the order given in the typing spec], but typeshed's stubs
/// for the standard library are moved higher up to match Python's semantics at runtime.
///
/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, is_macro::Is)]
pub enum ModuleSearchPathKind {
/// "Extra" paths provided by the user in a config file, env var or CLI flag.
/// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting
Extra,
/// Files in the project we're directly being invoked on
FirstParty,
/// The `stdlib` directory of typeshed (either vendored or custom)
StandardLibrary,
/// Stubs or runtime modules installed in site-packages
SitePackagesThirdParty,
/// Vendored third-party stubs from typeshed
VendoredThirdParty,
}

View file

@ -0,0 +1,944 @@
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath};
use crate::module::resolver::internal::ModuleResolverSearchPaths;
use crate::module::{
Module, ModuleInner, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind,
};
use crate::Db;
const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib";
/// Configures the module search paths for the module resolver.
///
/// Must be called before calling any other module resolution functions.
pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) {
// There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other
// thread can mutate the `Db` while we're in this call, so using `try_get` to test if
// the settings have already been set is safe.
if let Some(existing) = ModuleResolverSearchPaths::try_get(db) {
existing
.set_search_paths(db)
.to(config.into_ordered_search_paths());
} else {
ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths());
}
}
/// Resolves a module name to a module.
#[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_module(db: &dyn Db, module_name: ModuleName) -> Option<Module> {
let interned_name = internal::ModuleNameIngredient::new(db, module_name);
resolve_module_query(db, interned_name)
}
/// Salsa query that resolves an interned [`ModuleNameIngredient`] to a module.
///
/// This query should not be called directly. Instead, use [`resolve_module`]. It only exists
/// because Salsa requires the module name to be an ingredient.
#[salsa::tracked]
pub(crate) fn resolve_module_query(
db: &dyn Db,
module_name: internal::ModuleNameIngredient,
) -> Option<Module> {
let name = module_name.name(db);
let (search_path, module_file, kind) = resolve_name(db, name)?;
let module = Module {
inner: Arc::new(ModuleInner {
name: name.clone(),
kind,
search_path,
file: module_file,
}),
};
Some(module)
}
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option<Module> {
// It's not entirely clear on first sight why this method calls `file_to_module` instead of
// it being the other way round, considering that the first thing that `file_to_module` does
// is to retrieve the file's path.
//
// The reason is that `file_to_module` is a tracked Salsa query and salsa queries require that
// all arguments are Salsa ingredients (something stored in Salsa). `Path`s aren't salsa ingredients but
// `VfsFile` is. So what we do here is to retrieve the `path`'s `VfsFile` so that we can make
// use of Salsa's caching and invalidation.
let file = vfs_path_to_file(db.upcast(), path)?;
file_to_module(db, file)
}
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[salsa::tracked]
#[tracing::instrument(level = "debug", skip(db))]
pub fn file_to_module(db: &dyn Db, file: VfsFile) -> Option<Module> {
let path = file.path(db.upcast());
let search_paths = module_search_paths(db);
let relative_path = search_paths
.iter()
.find_map(|root| match (root.path(), path) {
(VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => {
let relative_path = path.strip_prefix(root_path).ok()?;
Some(relative_path)
}
(VfsPath::Vendored(_), VfsPath::Vendored(_)) => {
todo!("Add support for vendored modules")
}
(VfsPath::Vendored(_), VfsPath::FileSystem(_))
| (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None,
})?;
let module_name = ModuleName::from_relative_path(relative_path)?;
// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same name in different
// root paths, but that the module corresponding to `path` is in a lower priority search path,
// in which case we ignore it.
let module = resolve_module(db, module_name)?;
if file == module.file() {
Some(module)
} else {
// This path is for a module with the same name but with a different precedence. For example:
// ```
// src/foo.py
// src/foo/__init__.py
// ```
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
None
}
}
/// Configures the [`ModuleSearchPath`]s that are used to resolve modules.
#[derive(Eq, PartialEq, Debug)]
pub struct ModuleResolutionSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<FileSystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub workspace_root: FileSystemPathBuf,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Option<FileSystemPathBuf>,
/// Optional path to standard-library typeshed stubs.
/// Currently this has to be a directory that exists on disk.
///
/// (TODO: fall back to vendored stubs if no custom directory is provided.)
pub custom_typeshed: Option<FileSystemPathBuf>,
}
impl ModuleResolutionSettings {
/// Implementation of PEP 561's module resolution order
/// (with some small, deliberate, differences)
fn into_ordered_search_paths(self) -> OrderedSearchPaths {
let ModuleResolutionSettings {
extra_paths,
workspace_root,
site_packages,
custom_typeshed,
} = self;
let mut paths: Vec<_> = extra_paths
.into_iter()
.map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra))
.collect();
paths.push(ModuleSearchPath::new(
workspace_root,
ModuleSearchPathKind::FirstParty,
));
// TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user
if let Some(custom_typeshed) = custom_typeshed {
paths.push(ModuleSearchPath::new(
custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY),
ModuleSearchPathKind::StandardLibrary,
));
}
// TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step
if let Some(site_packages) = site_packages {
paths.push(ModuleSearchPath::new(
site_packages,
ModuleSearchPathKind::SitePackagesThirdParty,
));
}
OrderedSearchPaths(paths)
}
}
/// A resolved module resolution order, implementing PEP 561
/// (with some small, deliberate differences)
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub(crate) struct OrderedSearchPaths(Vec<ModuleSearchPath>);
impl Deref for OrderedSearchPaths {
type Target = [ModuleSearchPath];
fn deref(&self) -> &Self::Target {
&self.0
}
}
// The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers
// `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it.
// Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too
// TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct.
#[allow(unreachable_pub, clippy::used_underscore_binding)]
pub(crate) mod internal {
use crate::module::resolver::OrderedSearchPaths;
use crate::module::ModuleName;
#[salsa::input(singleton)]
pub(crate) struct ModuleResolverSearchPaths {
#[return_ref]
pub(super) search_paths: OrderedSearchPaths,
}
/// A thin wrapper around `ModuleName` to make it a Salsa ingredient.
///
/// This is needed because Salsa requires that all query arguments are salsa ingredients.
#[salsa::interned]
pub(crate) struct ModuleNameIngredient {
#[return_ref]
pub(super) name: ModuleName,
}
}
fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] {
ModuleResolverSearchPaths::get(db).search_paths(db)
}
/// Given a module name and a list of search paths in which to lookup modules,
/// attempt to resolve the module name
fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> {
let search_paths = module_search_paths(db);
for search_path in search_paths {
let mut components = name.components();
let module_name = components.next_back()?;
let VfsPath::FileSystem(fs_search_path) = search_path.path() else {
todo!("Vendored search paths are not yet supported");
};
match resolve_package(db.file_system(), fs_search_path, components) {
Ok(resolved_package) => {
let mut package_path = resolved_package.path;
package_path.push(module_name);
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
let kind = if db.file_system().is_directory(&package_path) {
package_path.push("__init__");
ModuleKind::Package
} else {
ModuleKind::Module
};
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
let stub = package_path.with_extension("pyi");
if let Some(stub) = system_path_to_file(db.upcast(), &stub) {
return Some((search_path.clone(), stub, kind));
}
let module = package_path.with_extension("py");
if let Some(module) = system_path_to_file(db.upcast(), &module) {
return Some((search_path.clone(), module, kind));
}
// For regular packages, don't search the next search path. All files of that
// package must be in the same location
if resolved_package.kind.is_regular_package() {
return None;
}
}
Err(parent_kind) => {
if parent_kind.is_regular_package() {
// For regular packages, don't search the next search path.
return None;
}
}
}
}
None
}
fn resolve_package<'a, I>(
fs: &dyn FileSystem,
module_search_path: &FileSystemPath,
components: I,
) -> Result<ResolvedPackage, PackageKind>
where
I: Iterator<Item = &'a str>,
{
let mut package_path = module_search_path.to_path_buf();
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
// Namespace packages are special because they can be spread across multiple search paths.
// https://peps.python.org/pep-0420/
let mut in_namespace_package = false;
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
let mut in_sub_package = false;
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
for folder in components {
package_path.push(folder);
let has_init_py = fs.is_file(&package_path.join("__init__.py"))
|| fs.is_file(&package_path.join("__init__.pyi"));
if has_init_py {
in_namespace_package = false;
} else if fs.is_directory(&package_path) {
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
in_namespace_package = true;
} else if in_namespace_package {
// Package not found but it is part of a namespace package.
return Err(PackageKind::Namespace);
} else if in_sub_package {
// A regular sub package wasn't found.
return Err(PackageKind::Regular);
} else {
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
return Err(PackageKind::Root);
}
in_sub_package = true;
}
let kind = if in_namespace_package {
PackageKind::Namespace
} else if in_sub_package {
PackageKind::Regular
} else {
PackageKind::Root
};
Ok(ResolvedPackage {
kind,
path: package_path,
})
}
#[derive(Debug)]
struct ResolvedPackage {
path: FileSystemPathBuf,
kind: PackageKind,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum PackageKind {
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
Root,
/// A regular sub-package where the parent contains an `__init__.py`.
///
/// For example, `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
Regular,
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
///
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
Namespace,
}
impl PackageKind {
const fn is_regular_package(self) -> bool {
matches!(self, PackageKind::Regular)
}
}
#[cfg(test)]
mod tests {
use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath};
use crate::db::tests::TestDb;
use crate::module::{ModuleKind, ModuleName};
use super::{
path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings,
TYPESHED_STDLIB_DIRECTORY,
};
struct TestCase {
db: TestDb,
src: FileSystemPathBuf,
custom_typeshed: FileSystemPathBuf,
site_packages: FileSystemPathBuf,
}
fn create_resolver() -> std::io::Result<TestCase> {
let mut db = TestDb::new();
let src = FileSystemPath::new("src").to_path_buf();
let site_packages = FileSystemPath::new("site_packages").to_path_buf();
let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf();
let fs = db.memory_file_system();
fs.create_directory_all(&src)?;
fs.create_directory_all(&site_packages)?;
fs.create_directory_all(&custom_typeshed)?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages.clone()),
custom_typeshed: Some(custom_typeshed.clone()),
};
set_module_resolution_settings(&mut db, settings);
Ok(TestCase {
db,
src,
custom_typeshed,
site_packages,
})
}
#[test]
fn first_party_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_path = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
assert_eq!(
Some(&foo_module),
resolve_module(&db, foo_module_name.clone()).as_ref()
);
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(ModuleKind::Module, foo_module.kind());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path))
);
Ok(())
}
#[test]
fn stdlib() -> anyhow::Result<()> {
let TestCase {
db,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let functools_path = stdlib_dir.join("functools.py");
db.memory_file_system()
.write_file(&functools_path, "def update_wrapper(): ...")?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&stdlib_dir, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(&functools_path.clone(), functools_module.file().path(&db));
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(functools_path))
);
Ok(())
}
#[test]
fn first_party_precedence_over_stdlib() -> anyhow::Result<()> {
let TestCase {
db,
src,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let stdlib_functools_path = stdlib_dir.join("functools.py");
let first_party_functools_path = src.join("functools.py");
db.memory_file_system().write_files([
(&stdlib_functools_path, "def update_wrapper(): ..."),
(&first_party_functools_path, "def update_wrapper(): ..."),
])?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&src, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(
&first_party_functools_path.clone(),
functools_module.file().path(&db)
);
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(first_party_functools_path))
);
Ok(())
}
// TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip
// is part of the red knot crate
// #[test]
// fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> {
// // The file path here is hardcoded in this crate's `build.rs` script.
// // Luckily this crate will fail to build if this file isn't available at build time.
// const TYPESHED_ZIP_BYTES: &[u8] =
// include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
// assert!(!TYPESHED_ZIP_BYTES.is_empty());
// let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?;
//
// let path_to_functools = Path::new("stdlib").join("functools.pyi");
// let mut functools_module_stub = typeshed_zip_archive
// .by_name(path_to_functools.to_str().unwrap())
// .unwrap();
// assert!(functools_module_stub.is_file());
//
// let mut functools_module_stub_source = String::new();
// functools_module_stub.read_to_string(&mut functools_module_stub_source)?;
//
// assert!(functools_module_stub_source.contains("def update_wrapper("));
// Ok(())
// }
#[test]
fn resolve_package() -> anyhow::Result<()> {
let TestCase { src, db, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_path = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(&foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path)).as_ref()
);
// Resolving by directory doesn't resolve to the init file.
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_dir)));
Ok(())
}
#[test]
fn package_priority_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_init = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_init, "print('Hello, world!')")?;
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_py, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_init, foo_module.file().path(&db));
assert_eq!(ModuleKind::Package, foo_module.kind());
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_init))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn typing_stub_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_stub = src.join("foo.pyi");
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_files([(&foo_stub, "x: int"), (&foo_py, "print('Hello, world!')")])?;
let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo.search_path().path());
assert_eq!(&foo_stub, foo.file().path(&db));
assert_eq!(
Some(foo),
path_to_module(&db, &VfsPath::FileSystem(foo_stub))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn sub_packages() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo = src.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz.py");
db.memory_file_system().write_files([
(&foo.join("__init__.py"), ""),
(&bar.join("__init__.py"), ""),
(&baz, "print('Hello, world!')"),
])?;
let baz_module =
resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap();
assert_eq!(&src, baz_module.search_path().path());
assert_eq!(&baz, baz_module.file().path(&db));
assert_eq!(
Some(baz_module),
path_to_module(&db, &VfsPath::FileSystem(baz))
);
Ok(())
}
#[test]
fn namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
// But uses `src` for `project1` and `site_packages2` for `project2`.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
let two_module =
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()).unwrap();
assert_eq!(
Some(two_module),
path_to_module(&db, &VfsPath::FileSystem(two))
);
Ok(())
}
#[test]
fn regular_package_in_namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&child1.join("__init__.py"), "print('Hello, world!')"),
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
assert_eq!(
None,
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap())
);
Ok(())
}
#[test]
fn module_search_path_priority() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
let foo_src = src.join("foo.py");
let foo_site_packages = site_packages.join("foo.py");
db.memory_file_system()
.write_files([(&foo_src, ""), (&foo_site_packages, "")])?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_src, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_src))
);
assert_eq!(
None,
path_to_module(&db, &VfsPath::FileSystem(foo_site_packages))
);
Ok(())
}
#[test]
#[cfg(target_family = "unix")]
fn symlink() -> anyhow::Result<()> {
let TestCase {
mut db,
src,
site_packages,
custom_typeshed,
} = create_resolver()?;
db.with_os_file_system();
let temp_dir = tempfile::tempdir()?;
let root = FileSystemPath::from_std_path(temp_dir.path()).unwrap();
let src = root.join(src);
let site_packages = root.join(site_packages);
let custom_typeshed = root.join(custom_typeshed);
let foo = src.join("foo.py");
let bar = src.join("bar.py");
std::fs::create_dir_all(src.as_std_path())?;
std::fs::create_dir_all(site_packages.as_std_path())?;
std::fs::create_dir_all(custom_typeshed.as_std_path())?;
std::fs::write(foo.as_std_path(), "")?;
std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages),
custom_typeshed: Some(custom_typeshed),
};
set_module_resolution_settings(&mut db, settings);
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
let bar_module = resolve_module(&db, ModuleName::new_static("bar").unwrap()).unwrap();
assert_ne!(foo_module, bar_module);
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo, foo_module.file().path(&db));
// `foo` and `bar` shouldn't resolve to the same file
assert_eq!(&src, bar_module.search_path().path());
assert_eq!(&bar, bar_module.file().path(&db));
assert_eq!(&foo, foo_module.file().path(&db));
assert_ne!(&foo_module, &bar_module);
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo))
);
assert_eq!(
Some(bar_module),
path_to_module(&db, &VfsPath::FileSystem(bar))
);
Ok(())
}
#[test]
fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let bar_path = src.join("bar.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&bar_path, "y = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
let bar = system_path_to_file(&db, &bar_path).expect("bar.py to exist");
db.clear_salsa_events();
// Delete `bar.py`
db.memory_file_system().remove_file(&bar_path)?;
bar.touch(&mut db);
// Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant
// for resolving `foo`.
let foo_module2 = resolve_module(&db, foo_module_name);
assert!(!db
.take_sale_events()
.iter()
.any(|event| { matches!(event.kind, salsa::EventKind::WillExecute { .. }) }));
assert_eq!(Some(foo_module), foo_module2);
Ok(())
}
#[test]
fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_module_name = ModuleName::new_static("foo").unwrap();
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
// Now write the foo file
db.memory_file_system().write_file(&foo_path, "x = 1")?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_path.clone()));
let foo_file = system_path_to_file(&db, &foo_path).expect("foo.py to exist");
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(foo_file, foo_module.file());
Ok(())
}
#[test]
fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_init_path = src.join("foo/__init__.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&foo_init_path, "x = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).expect("foo module to exist");
assert_eq!(&foo_init_path, foo_module.file().path(&db));
// Delete `foo/__init__.py` and the `foo` folder. `foo` should now resolve to `foo.py`
db.memory_file_system().remove_file(&foo_init_path)?;
db.memory_file_system()
.remove_directory(foo_init_path.parent().unwrap())?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone()));
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(&foo_path, foo_module.file().path(&db));
Ok(())
}
}