Add a new script to generate builtin module names (#12696)

This commit is contained in:
Alex Waygood 2024-08-05 21:33:36 +01:00 committed by GitHub
parent 2393d19f91
commit 7ee7c68f36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 198 additions and 50 deletions

View file

@ -1,7 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::iter::FusedIterator; use std::iter::FusedIterator;
use once_cell::sync::Lazy;
use ruff_db::files::{File, FilePath, FileRootKind}; use ruff_db::files::{File, FilePath, FileRootKind};
use ruff_db::program::{Program, SearchPathSettings, TargetVersion}; use ruff_db::program::{Program, SearchPathSettings, TargetVersion};
use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf};
@ -447,60 +446,21 @@ struct ModuleNameIngredient<'db> {
pub(super) name: ModuleName, pub(super) name: ModuleName,
} }
/// Modules that are builtin to the Python interpreter itself.
///
/// When these module names are imported, standard module resolution is bypassed:
/// the module name always resolves to the stdlib module,
/// even if there's a module of the same name in the workspace root
/// (which would normally result in the stdlib module being overridden).
///
/// TODO(Alex): write a script to generate this list,
/// similar to what we do in `crates/ruff_python_stdlib/src/sys.rs`
static BUILTIN_MODULES: Lazy<FxHashSet<&str>> = Lazy::new(|| {
const BUILTIN_MODULE_NAMES: &[&str] = &[
"_abc",
"_ast",
"_codecs",
"_collections",
"_functools",
"_imp",
"_io",
"_locale",
"_operator",
"_signal",
"_sre",
"_stat",
"_string",
"_symtable",
"_thread",
"_tokenize",
"_tracemalloc",
"_typing",
"_warnings",
"_weakref",
"atexit",
"builtins",
"errno",
"faulthandler",
"gc",
"itertools",
"marshal",
"posix",
"pwd",
"sys",
"time",
];
BUILTIN_MODULE_NAMES.iter().copied().collect()
});
/// Given a module name and a list of search paths in which to lookup modules, /// Given a module name and a list of search paths in which to lookup modules,
/// attempt to resolve the module name /// attempt to resolve the module name
fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(SearchPath, File, ModuleKind)> { fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(SearchPath, File, ModuleKind)> {
let resolver_settings = module_resolution_settings(db); let resolver_settings = module_resolution_settings(db);
let resolver_state = ResolverState::new(db, resolver_settings.target_version()); let target_version = resolver_settings.target_version();
let is_builtin_module = BUILTIN_MODULES.contains(&name.as_str()); let resolver_state = ResolverState::new(db, target_version);
let (_, minor_version) = target_version.as_tuple();
let is_builtin_module =
ruff_python_stdlib::sys::is_builtin_module(minor_version, name.as_str());
for search_path in resolver_settings.search_paths(db) { for search_path in resolver_settings.search_paths(db) {
// When a builtin module is imported, standard module resolution is bypassed:
// the module name always resolves to the stdlib module,
// even if there's a module of the same name in the workspace root
// (which would normally result in the stdlib module being overridden).
if is_builtin_module && !search_path.is_standard_library() { if is_builtin_module && !search_path.is_standard_library() {
continue; continue;
} }

View file

@ -39,6 +39,18 @@ pub enum TargetVersion {
} }
impl TargetVersion { impl TargetVersion {
pub const fn as_tuple(self) -> (u8, u8) {
match self {
Self::Py37 => (3, 7),
Self::Py38 => (3, 8),
Self::Py39 => (3, 9),
Self::Py310 => (3, 10),
Self::Py311 => (3, 11),
Self::Py312 => (3, 12),
Self::Py313 => (3, 13),
}
}
const fn as_str(self) -> &'static str { const fn as_str(self) -> &'static str {
match self { match self {
Self::Py37 => "py37", Self::Py37 => "py37",

View file

@ -0,0 +1,55 @@
//! This file is generated by `scripts/generate_builtin_modules.py`
/// Return `true` if `module` is a [builtin module] on the given
/// Python 3 version.
///
/// "Builtin modules" are modules that are compiled directly into the
/// Python interpreter. These can never be shadowed by first-party
/// modules; the normal rules of module resolution do not apply to these
/// modules.
///
/// [builtin module]: https://docs.python.org/3/library/sys.html#sys.builtin_module_names
#[allow(clippy::unnested_or_patterns)]
pub fn is_builtin_module(minor_version: u8, module: &str) -> bool {
matches!(
(minor_version, module),
(
_,
"_abc"
| "_ast"
| "_codecs"
| "_collections"
| "_functools"
| "_imp"
| "_io"
| "_locale"
| "_operator"
| "_signal"
| "_sre"
| "_stat"
| "_string"
| "_symtable"
| "_thread"
| "_tracemalloc"
| "_warnings"
| "_weakref"
| "atexit"
| "builtins"
| "errno"
| "faulthandler"
| "gc"
| "itertools"
| "marshal"
| "posix"
| "pwd"
| "sys"
| "time"
) | (7, "xxsubtype" | "zipimport")
| (8, "xxsubtype")
| (9, "_peg_parser" | "xxsubtype")
| (10, "xxsubtype")
| (11, "_tokenize" | "xxsubtype")
| (12, "_tokenize" | "_typing")
| (13, "_suggestions" | "_sysconfig" | "_tokenize" | "_typing")
)
}

View file

@ -0,0 +1,5 @@
mod builtin_modules;
mod known_stdlib;
pub use builtin_modules::is_builtin_module;
pub use known_stdlib::is_known_standard_library;

View file

@ -0,0 +1,116 @@
"""Script to generate `crates/ruff_python_stdlib/src/builtin_modules.rs`.
This script requires the following executables to be callable via a subprocess:
- `python3.7`
- `python3.8`
- `python3.9`
- `python3.10`
- `python3.11`
- `python3.12`
- `python3.13`
"""
from __future__ import annotations
import builtins
import subprocess
import textwrap
from functools import partial
from pathlib import Path
MODULE_CRATE = "ruff_python_stdlib"
MODULE_PATH = Path("crates") / MODULE_CRATE / "src" / "sys" / "builtin_modules.rs"
type Version = tuple[int, int]
PYTHON_VERSIONS: list[Version] = [
(3, 7),
(3, 8),
(3, 9),
(3, 10),
(3, 11),
(3, 12),
(3, 13),
]
def builtin_modules_on_version(major_version: int, minor_version: int) -> set[str]:
executable = f"python{major_version}.{minor_version}"
try:
proc = subprocess.run(
[executable, "-c", "import sys; print(sys.builtin_module_names)"],
check=True,
text=True,
capture_output=True,
)
except subprocess.CalledProcessError as e:
print(e.stdout)
print(e.stderr)
raise
return set(eval(proc.stdout))
def generate_module(
script_destination: Path, crate_name: str, python_versions: list[Version]
) -> None:
with script_destination.open("w") as f:
print = partial(builtins.print, file=f)
print(
textwrap.dedent(
"""\
//! This file is generated by `scripts/generate_builtin_modules.py`
/// Return `true` if `module` is a [builtin module] on the given
/// Python 3 version.
///
/// "Builtin modules" are modules that are compiled directly into the
/// Python interpreter. These can never be shadowed by first-party
/// modules; the normal rules of module resolution do not apply to these
/// modules.
///
/// [builtin module]: https://docs.python.org/3/library/sys.html#sys.builtin_module_names
#[allow(clippy::unnested_or_patterns)]
pub fn is_builtin_module(minor_version: u8, module: &str) -> bool {
matches!((minor_version, module),
""",
)
)
modules_by_version = {
minor_version: builtin_modules_on_version(major_version, minor_version)
for major_version, minor_version in python_versions
}
# First, add a case for the modules that are in all versions.
ubiquitous_modules = set.intersection(*modules_by_version.values())
print("(_, ")
for i, module in enumerate(sorted(ubiquitous_modules)):
if i > 0:
print(" | ", end="")
print(f'"{module}"')
print(")")
# Next, add any version-specific modules.
for _major_version, minor_version in python_versions:
version_modules = set.difference(
modules_by_version[minor_version],
ubiquitous_modules,
)
print(" | ")
print(f"({minor_version}, ")
for i, module in enumerate(sorted(version_modules)):
if i > 0:
print(" | ", end="")
print(f'"{module}"')
print(")")
print(")}")
subprocess.run(["cargo", "fmt", "--package", crate_name], check=True)
if __name__ == "__main__":
generate_module(MODULE_PATH, MODULE_CRATE, PYTHON_VERSIONS)

View file

@ -4,7 +4,7 @@ from pathlib import Path
from stdlibs import stdlib_module_names from stdlibs import stdlib_module_names
PATH = Path("crates") / "ruff_python_stdlib" / "src" / "sys.rs" PATH = Path("crates") / "ruff_python_stdlib" / "src" / "sys" / "known_stdlib.rs"
VERSIONS: list[tuple[int, int]] = [ VERSIONS: list[tuple[int, int]] = [
(3, 7), (3, 7),
(3, 8), (3, 8),