Port Pyright's import resolver to Rust (#5381)

## Summary

This PR contains the first step towards enabling robust first-party,
third-party, and standard library import resolution in Ruff (including
support for `typeshed`, stub files, native modules, etc.) by porting
Pyright's import resolver to Rust.

The strategy taken here was to start with a more-or-less direct port of
the Pyright's TypeScript resolver. The code is intentionally similar,
and the test suite is effectively a superset of Pyright's test suite for
its own resolver. Due to the nature of the port, the code is very, very
non-idiomatic for Rust. The code is also entirely unused outside of the
test suite, and no effort has been made to integrate it with the rest of
the codebase.

Future work will include:

- Refactoring the code (now that it works) to match Rust and Ruff
idioms.
- Further testing, in practice, to ensure that the resolver can resolve
imports in a complex project, when provided with a virtual environment
path.
- Caching, to minimize filesystem lookups and redundant resolutions.
- Integration into Ruff itself (use Ruff's existing settings, find rules
that can make use of robust resolution, etc.)
This commit is contained in:
Charlie Marsh 2023-06-27 12:15:07 -04:00 committed by GitHub
parent 502e15585d
commit 1ed227a1e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 2343 additions and 1 deletions

View file

@ -0,0 +1,21 @@
[package]
name = "ruff_python_resolver"
version = "0.0.0"
description = "A Python module resolver for Ruff"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[lib]
[dependencies]
log = { workspace = true }
[dev-dependencies]
env_logger = "0.10.0"
tempfile = "3.6.0"

View file

@ -0,0 +1,26 @@
use std::path::PathBuf;
use crate::python_version::PythonVersion;
pub(crate) struct Config {
/// Path to python interpreter.
pub(crate) python_path: Option<PathBuf>,
/// Path to use for typeshed definitions.
pub(crate) typeshed_path: Option<PathBuf>,
/// Path to custom typings (stub) modules.
pub(crate) stub_path: Option<PathBuf>,
/// Path to a directory containing one or more virtual environment
/// directories. This is used in conjunction with the "venv" name in
/// the config file to identify the python environment used for resolving
/// third-party modules.
pub(crate) venv_path: Option<PathBuf>,
/// Default venv environment.
pub(crate) venv: Option<PathBuf>,
/// Default Python version. Can be overridden by ExecutionEnvironment.
pub(crate) default_python_version: Option<PythonVersion>,
}

View file

@ -0,0 +1,19 @@
use std::path::PathBuf;
use crate::python_platform::PythonPlatform;
use crate::python_version::PythonVersion;
#[derive(Debug)]
pub(crate) struct ExecutionEnvironment {
/// The root directory of the execution environment.
pub(crate) root: PathBuf,
/// The Python version of the execution environment.
pub(crate) python_version: PythonVersion,
/// The Python platform of the execution environment.
pub(crate) python_platform: PythonPlatform,
/// The extra search paths of the execution environment.
pub(crate) extra_paths: Vec<PathBuf>,
}

View file

@ -0,0 +1,43 @@
//! Expose the host environment to the resolver.
use std::path::PathBuf;
use crate::python_platform::PythonPlatform;
use crate::python_version::PythonVersion;
/// A trait to expose the host environment to the resolver.
pub(crate) trait Host {
/// The search paths to use when resolving Python modules.
fn python_search_paths(&self) -> Vec<PathBuf>;
/// The Python version to use when resolving Python modules.
fn python_version(&self) -> PythonVersion;
/// The OS platform to use when resolving Python modules.
fn python_platform(&self) -> PythonPlatform;
}
/// A host that exposes a fixed set of search paths.
pub(crate) struct StaticHost {
search_paths: Vec<PathBuf>,
}
impl StaticHost {
pub(crate) fn new(search_paths: Vec<PathBuf>) -> Self {
Self { search_paths }
}
}
impl Host for StaticHost {
fn python_search_paths(&self) -> Vec<PathBuf> {
self.search_paths.clone()
}
fn python_version(&self) -> PythonVersion {
PythonVersion::Py312
}
fn python_platform(&self) -> PythonPlatform {
PythonPlatform::Darwin
}
}

View file

@ -0,0 +1,150 @@
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use crate::{native_module, py_typed};
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ImplicitImport {
/// Whether the implicit import is a stub file.
pub(crate) is_stub_file: bool,
/// Whether the implicit import is a native module.
pub(crate) is_native_lib: bool,
/// The name of the implicit import (e.g., `os`).
pub(crate) name: String,
/// The path to the implicit import.
pub(crate) path: PathBuf,
/// The `py.typed` information for the implicit import, if any.
pub(crate) py_typed: Option<py_typed::PyTypedInfo>,
}
/// Find the "implicit" imports within the namespace package at the given path.
pub(crate) fn find(dir_path: &Path, exclusions: &[&Path]) -> HashMap<String, ImplicitImport> {
let mut implicit_imports = HashMap::new();
// Enumerate all files and directories in the path, expanding links.
let Ok(entries) = fs::read_dir(dir_path) else {
return implicit_imports;
};
for entry in entries.flatten() {
let path = entry.path();
if exclusions.contains(&path.as_path()) {
continue;
}
let Ok(file_type) = entry.file_type() else {
continue;
};
// TODO(charlie): Support symlinks.
if file_type.is_file() {
// Add implicit file-based modules.
let Some(extension) = path.extension() else {
continue;
};
let (file_stem, is_native_lib) = if extension == "py" || extension == "pyi" {
// E.g., `foo.py` becomes `foo`.
let file_stem = path.file_stem().and_then(OsStr::to_str);
let is_native_lib = false;
(file_stem, is_native_lib)
} else if native_module::is_native_module_file_extension(extension)
&& !path
.with_extension(format!("{}.py", extension.to_str().unwrap()))
.exists()
&& !path
.with_extension(format!("{}.pyi", extension.to_str().unwrap()))
.exists()
{
// E.g., `foo.abi3.so` becomes `foo`.
let file_stem = path
.file_stem()
.and_then(OsStr::to_str)
.and_then(|file_stem| {
file_stem.split_once('.').map(|(file_stem, _)| file_stem)
});
let is_native_lib = true;
(file_stem, is_native_lib)
} else {
continue;
};
let Some(name) = file_stem else {
continue;
};
let implicit_import = ImplicitImport {
is_stub_file: extension == "pyi",
is_native_lib,
name: name.to_string(),
path: path.clone(),
py_typed: None,
};
// Always prefer stub files over non-stub files.
if implicit_imports
.get(&implicit_import.name)
.map_or(true, |implicit_import| !implicit_import.is_stub_file)
{
implicit_imports.insert(implicit_import.name.clone(), implicit_import);
}
} else if file_type.is_dir() {
// Add implicit directory-based modules.
let py_file_path = path.join("__init__.py");
let pyi_file_path = path.join("__init__.pyi");
let (path, is_stub_file) = if py_file_path.exists() {
(py_file_path, false)
} else if pyi_file_path.exists() {
(pyi_file_path, true)
} else {
continue;
};
let Some(name) = path.file_name().and_then(OsStr::to_str) else {
continue;
};
let implicit_import = ImplicitImport {
is_stub_file,
is_native_lib: false,
name: name.to_string(),
path: path.clone(),
py_typed: py_typed::get_py_typed_info(&path),
};
implicit_imports.insert(implicit_import.name.clone(), implicit_import);
}
}
implicit_imports
}
/// Filter a map of implicit imports to only include those that were actually imported.
pub(crate) fn filter(
implicit_imports: &HashMap<String, ImplicitImport>,
imported_symbols: &[String],
) -> Option<HashMap<String, ImplicitImport>> {
if implicit_imports.is_empty() || imported_symbols.is_empty() {
return None;
}
let mut filtered_imports = HashMap::new();
for implicit_import in implicit_imports.values() {
if imported_symbols.contains(&implicit_import.name) {
filtered_imports.insert(implicit_import.name.clone(), implicit_import.clone());
}
}
if filtered_imports.len() == implicit_imports.len() {
return None;
}
Some(filtered_imports)
}

View file

@ -0,0 +1,122 @@
//! Interface that describes the output of the import resolver.
use crate::implicit_imports::ImplicitImport;
use std::collections::HashMap;
use std::path::PathBuf;
use crate::py_typed::PyTypedInfo;
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(clippy::struct_excessive_bools)]
pub(crate) struct ImportResult {
/// Whether the import name was relative (e.g., ".foo").
pub(crate) is_relative: bool,
/// Whether the import was resolved to a file or module.
pub(crate) is_import_found: bool,
/// The path was partially resolved, but the specific submodule
/// defining the import was not found. For example, `foo.bar` was
/// not found, but `foo` was found.
pub(crate) is_partly_resolved: bool,
/// The import refers to a namespace package (i.e., a folder without
/// an `__init__.py[i]` file at the final level of resolution). By
/// convention, we insert empty `PathBuf` segments into the resolved
/// paths vector to indicate intermediary namespace packages.
pub(crate) is_namespace_package: bool,
/// The final resolved directory contains an `__init__.py[i]` file.
pub(crate) is_init_file_present: bool,
/// The import resolved to a stub (`.pyi`) file within a stub package.
pub(crate) is_stub_package: bool,
/// The import resolved to a built-in, local, or third-party module.
pub(crate) import_type: ImportType,
/// A vector of resolved absolute paths for each file in the module
/// name. Typically includes a sequence of `__init__.py` files, followed
/// by the Python file defining the import itself, though the exact
/// structure can vary. For example, namespace packages will be represented
/// by empty `PathBuf` segments in the vector.
///
/// For example, resolving `import foo.bar` might yield `./foo/__init__.py` and `./foo/bar.py`,
/// or `./foo/__init__.py` and `./foo/bar/__init__.py`.
pub(crate) resolved_paths: Vec<PathBuf>,
/// The search path used to resolve the module.
pub(crate) search_path: Option<PathBuf>,
/// The resolved file is a type hint (i.e., a `.pyi` file), rather
/// than a Python (`.py`) file.
pub(crate) is_stub_file: bool,
/// The resolved file is a native library.
pub(crate) is_native_lib: bool,
/// The resolved file is a hint hint (i.e., a `.pyi` file) from
/// `typeshed` in the standard library.
pub(crate) is_stdlib_typeshed_file: bool,
/// The resolved file is a hint hint (i.e., a `.pyi` file) from
/// `typeshed` in third-party stubs.
pub(crate) is_third_party_typeshed_file: bool,
/// The resolved file is a type hint (i.e., a `.pyi` file) from
/// the configured typing directory.
pub(crate) is_local_typings_file: bool,
/// A map from file to resolved path, for all implicitly imported
/// modules that are part of a namespace package.
pub(crate) implicit_imports: HashMap<String, ImplicitImport>,
/// Any implicit imports whose symbols were explicitly imported (i.e., via
/// a `from x import y` statement).
pub(crate) filtered_implicit_imports: HashMap<String, ImplicitImport>,
/// If the import resolved to a type hint (i.e., a `.pyi` file), then
/// a non-type-hint resolution will be stored here.
pub(crate) non_stub_import_result: Option<Box<ImportResult>>,
/// Information extracted from the `py.typed` in the package used to
/// resolve the import, if any.
pub(crate) py_typed_info: Option<PyTypedInfo>,
/// The directory of the package, if any.
pub(crate) package_directory: Option<PathBuf>,
}
impl ImportResult {
/// An import result that indicates that the import was not found.
pub(crate) fn not_found() -> Self {
Self {
is_relative: false,
is_import_found: false,
is_partly_resolved: false,
is_namespace_package: false,
is_init_file_present: false,
is_stub_package: false,
import_type: ImportType::Local,
resolved_paths: vec![],
search_path: None,
is_stub_file: false,
is_native_lib: false,
is_stdlib_typeshed_file: false,
is_third_party_typeshed_file: false,
is_local_typings_file: false,
implicit_imports: HashMap::default(),
filtered_implicit_imports: HashMap::default(),
non_stub_import_result: None,
py_typed_info: None,
package_directory: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ImportType {
BuiltIn,
ThirdParty,
Local,
}

View file

@ -0,0 +1,16 @@
#![allow(dead_code)]
mod config;
mod execution_environment;
mod host;
mod implicit_imports;
mod import_result;
mod module_descriptor;
mod native_module;
mod py_typed;
mod python_platform;
mod python_version;
mod resolver;
mod search;
pub(crate) const SITE_PACKAGES: &str = "site-packages";

View file

@ -0,0 +1,16 @@
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ImportModuleDescriptor {
pub(crate) leading_dots: usize,
pub(crate) name_parts: Vec<String>,
pub(crate) imported_symbols: Vec<String>,
}
impl ImportModuleDescriptor {
pub(crate) fn name(&self) -> String {
format!(
"{}{}",
".".repeat(self.leading_dots),
&self.name_parts.join(".")
)
}
}

View file

@ -0,0 +1,14 @@
//! Support for native Python extension modules.
use std::ffi::OsStr;
use std::path::Path;
/// Returns `true` if the given file extension is that of a native module.
pub(crate) fn is_native_module_file_extension(file_extension: &OsStr) -> bool {
file_extension == "so" || file_extension == "pyd" || file_extension == "dylib"
}
/// Returns `true` if the given file name is that of a native module.
pub(crate) fn is_native_module_file_name(_module_name: &Path, _file_name: &Path) -> bool {
todo!()
}

View file

@ -0,0 +1,40 @@
//! Support for [PEP 561] (`py.typed` files).
//!
//! [PEP 561]: https://peps.python.org/pep-0561/
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct PyTypedInfo {
/// The path to the `py.typed` file.
py_typed_path: PathBuf,
/// Whether the package is partially typed (as opposed to fully typed).
is_partially_typed: bool,
}
/// Returns the `py.typed` information for the given directory, if any.
pub(crate) fn get_py_typed_info(dir_path: &Path) -> Option<PyTypedInfo> {
let py_typed_path = dir_path.join("py.typed");
if py_typed_path.is_file() {
// Do a quick sanity check on the size before we attempt to read it. This
// file should always be really small - typically zero bytes in length.
let file_len = py_typed_path.metadata().ok()?.len();
if file_len < 64 * 1024 {
// PEP 561 doesn't specify the format of "py.typed" in any detail other than
// to say that "If a stub package is partial it MUST include partial\n in a top
// level py.typed file."
let contents = std::fs::read_to_string(&py_typed_path).ok()?;
let is_partially_typed =
contents.contains("partial\n") || contents.contains("partial\r\n");
Some(PyTypedInfo {
py_typed_path,
is_partially_typed,
})
} else {
None
}
} else {
None
}
}

View file

@ -0,0 +1,7 @@
/// Enum to represent a Python platform.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(crate) enum PythonPlatform {
Darwin,
Linux,
Windows,
}

View file

@ -0,0 +1,24 @@
/// Enum to represent a Python version.
#[derive(Debug, Copy, Clone)]
pub(crate) enum PythonVersion {
Py37,
Py38,
Py39,
Py310,
Py311,
Py312,
}
impl PythonVersion {
/// The directory name (e.g., in a virtual environment) for this Python version.
pub(crate) fn dir(self) -> &'static str {
match self {
PythonVersion::Py37 => "python3.7",
PythonVersion::Py38 => "python3.8",
PythonVersion::Py39 => "python3.9",
PythonVersion::Py310 => "python3.10",
PythonVersion::Py311 => "python3.11",
PythonVersion::Py312 => "python3.12",
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,282 @@
//! Determine the appropriate search paths for the Python environment.
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use log::debug;
use crate::config::Config;
use crate::module_descriptor::ImportModuleDescriptor;
use crate::python_version::PythonVersion;
use crate::{host, SITE_PACKAGES};
/// Find the `site-packages` directory for the specified Python version.
fn find_site_packages_path(
lib_path: &Path,
python_version: Option<PythonVersion>,
) -> Option<PathBuf> {
if lib_path.is_dir() {
debug!(
"Found path `{}`; looking for site-packages",
lib_path.display()
);
} else {
debug!("Did not find `{}`", lib_path.display());
}
let site_packages_path = lib_path.join(SITE_PACKAGES);
if site_packages_path.is_dir() {
debug!("Found path `{}`", site_packages_path.display());
return Some(site_packages_path);
}
debug!(
"Did not find `{}`, so looking for Python subdirectory",
site_packages_path.display()
);
// There's no `site-packages` directory in the library directory; look for a `python3.X`
// directory instead.
let candidate_dirs: Vec<PathBuf> = fs::read_dir(lib_path)
.ok()?
.filter_map(|entry| {
let entry = entry.ok()?;
let metadata = entry.metadata().ok()?;
if metadata.file_type().is_dir() {
let dir_path = entry.path();
if dir_path
.file_name()
.and_then(OsStr::to_str)?
.starts_with("python3.")
{
if dir_path.join(SITE_PACKAGES).is_dir() {
return Some(dir_path);
}
}
} else if metadata.file_type().is_symlink() {
let symlink_path = fs::read_link(entry.path()).ok()?;
if symlink_path
.file_name()
.and_then(OsStr::to_str)?
.starts_with("python3.")
{
if symlink_path.join(SITE_PACKAGES).is_dir() {
return Some(symlink_path);
}
}
}
None
})
.collect();
// If a `python3.X` directory does exist (and `3.X` matches the current Python version),
// prefer it over any other Python directories.
if let Some(python_version) = python_version {
if let Some(preferred_dir) = candidate_dirs.iter().find(|dir| {
dir.file_name()
.and_then(OsStr::to_str)
.map_or(false, |name| name == python_version.dir())
}) {
debug!("Found path `{}`", preferred_dir.display());
return Some(preferred_dir.join(SITE_PACKAGES));
}
}
// Fallback to the first `python3.X` directory that we found.
let default_dir = candidate_dirs.first()?;
debug!("Found path `{}`", default_dir.display());
Some(default_dir.join(SITE_PACKAGES))
}
fn get_paths_from_pth_files(parent_dir: &Path) -> Vec<PathBuf> {
fs::read_dir(parent_dir)
.unwrap()
.flatten()
.filter(|entry| {
// Collect all *.pth files.
let Ok(file_type) = entry.file_type() else {
return false;
};
file_type.is_file() || file_type.is_symlink()
})
.map(|entry| entry.path())
.filter(|path| path.extension() == Some(OsStr::new("pth")))
.filter(|path| {
// Skip all files that are much larger than expected.
let Ok(metadata) = path.metadata() else {
return false;
};
let file_len = metadata.len();
file_len > 0 && file_len < 64 * 1024
})
.filter_map(|path| {
let data = fs::read_to_string(&path).ok()?;
for line in data.lines() {
let trimmed_line = line.trim();
if !trimmed_line.is_empty()
&& !trimmed_line.starts_with('#')
&& !trimmed_line.starts_with("import")
{
let pth_path = parent_dir.join(trimmed_line);
if pth_path.is_dir() {
return Some(pth_path);
}
}
}
None
})
.collect()
}
/// Find the Python search paths for the given virtual environment.
pub(crate) fn find_python_search_paths<Host: host::Host>(
config: &Config,
host: &Host,
) -> Vec<PathBuf> {
if let Some(venv_path) = config.venv_path.as_ref() {
if let Some(venv) = config.venv.as_ref() {
let mut found_paths = vec![];
for lib_name in ["lib", "Lib", "lib64"] {
let lib_path = venv_path.join(venv).join(lib_name);
if let Some(site_packages_path) =
find_site_packages_path(&lib_path, config.default_python_version)
{
// Add paths from any `.pth` files in each of the `site-packages` directories.
found_paths.extend(get_paths_from_pth_files(&site_packages_path));
// Add the `site-packages` directory to the search path.
found_paths.push(site_packages_path);
}
}
if !found_paths.is_empty() {
found_paths.sort();
found_paths.dedup();
debug!("Found the following `site-packages` dirs");
for path in &found_paths {
debug!(" {}", path.display());
}
return found_paths;
}
}
}
// Fall back to the Python interpreter.
host.python_search_paths()
}
/// Determine the relevant Python search paths.
fn get_python_search_paths<Host: host::Host>(config: &Config, host: &Host) -> Vec<PathBuf> {
// TODO(charlie): Cache search paths.
find_python_search_paths(config, host)
}
/// Determine the root of the `typeshed` directory.
pub(crate) fn get_typeshed_root<Host: host::Host>(config: &Config, host: &Host) -> Option<PathBuf> {
if let Some(typeshed_path) = config.typeshed_path.as_ref() {
// Did the user specify a typeshed path?
if typeshed_path.is_dir() {
return Some(typeshed_path.clone());
}
} else {
// If not, we'll look in the Python search paths.
for python_search_path in get_python_search_paths(config, host) {
let possible_typeshed_path = python_search_path.join("typeshed");
if possible_typeshed_path.is_dir() {
return Some(possible_typeshed_path);
}
}
}
None
}
/// Format the expected `typeshed` subdirectory.
fn format_typeshed_subdirectory(typeshed_path: &Path, is_stdlib: bool) -> PathBuf {
typeshed_path.join(if is_stdlib { "stdlib" } else { "stubs" })
}
/// Determine the current `typeshed` subdirectory.
fn get_typeshed_subdirectory<Host: host::Host>(
is_stdlib: bool,
config: &Config,
host: &Host,
) -> Option<PathBuf> {
let typeshed_path = get_typeshed_root(config, host)?;
let typeshed_path = format_typeshed_subdirectory(&typeshed_path, is_stdlib);
if typeshed_path.is_dir() {
Some(typeshed_path)
} else {
None
}
}
/// Determine the current `typeshed` subdirectory for the standard library.
pub(crate) fn get_stdlib_typeshed_path<Host: host::Host>(
config: &Config,
host: &Host,
) -> Option<PathBuf> {
get_typeshed_subdirectory(true, config, host)
}
/// Generate a map from PyPI-registered package name to a list of paths
/// containing the package's stubs.
fn build_typeshed_third_party_package_map(third_party_dir: &Path) -> HashMap<String, Vec<PathBuf>> {
let mut package_map = HashMap::new();
// Iterate over every directory.
for outer_entry in fs::read_dir(third_party_dir).unwrap() {
let outer_entry = outer_entry.unwrap();
if outer_entry.file_type().unwrap().is_dir() {
// Iterate over any subdirectory children.
for inner_entry in fs::read_dir(outer_entry.path()).unwrap() {
let inner_entry = inner_entry.unwrap();
if inner_entry.file_type().unwrap().is_dir() {
package_map
.entry(inner_entry.file_name().to_string_lossy().to_string())
.or_insert_with(Vec::new)
.push(outer_entry.path());
} else if inner_entry.file_type().unwrap().is_file() {
if inner_entry
.path()
.extension()
.map_or(false, |extension| extension == "pyi")
{
let stripped_file_name = inner_entry
.path()
.file_stem()
.unwrap()
.to_string_lossy()
.to_string();
package_map
.entry(stripped_file_name)
.or_insert_with(Vec::new)
.push(outer_entry.path());
}
}
}
}
}
package_map
}
/// Determine the current `typeshed` subdirectory for a third-party package.
pub(crate) fn get_third_party_typeshed_package_paths<Host: host::Host>(
module_descriptor: &ImportModuleDescriptor,
config: &Config,
host: &Host,
) -> Option<Vec<PathBuf>> {
let typeshed_path = get_typeshed_subdirectory(false, config, host)?;
let package_paths = build_typeshed_third_party_package_map(&typeshed_path);
let first_name_part = module_descriptor.name_parts.first().map(String::as_str)?;
package_paths.get(first_name_part).cloned()
}