Remove HashMap and HashSet for known-standard-library detection (#5345)

## Summary

This is a lot more concise and probably much more performant (with fewer
instructions).
This commit is contained in:
Charlie Marsh 2023-06-23 15:59:03 -04:00 committed by GitHub
parent 4b65446de6
commit f45d1c2b84
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 326 additions and 1149 deletions

4
Cargo.lock generated
View file

@ -2072,10 +2072,6 @@ dependencies = [
[[package]]
name = "ruff_python_stdlib"
version = "0.0.0"
dependencies = [
"once_cell",
"rustc-hash",
]
[[package]]
name = "ruff_python_whitespace"

View file

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use strum_macros::EnumIter;
use ruff_macros::CacheKey;
use ruff_python_stdlib::sys::KNOWN_STANDARD_LIBRARY;
use ruff_python_stdlib::sys::is_known_standard_library;
use crate::settings::types::PythonVersion;
use crate::warn_user_once;
@ -82,11 +82,7 @@ pub(crate) fn categorize<'a>(
(&ImportSection::Known(ImportType::Future), Reason::Future)
} else if let Some((import_type, reason)) = known_modules.categorize(module_name) {
(import_type, reason)
} else if KNOWN_STANDARD_LIBRARY
.get(&target_version.as_tuple())
.unwrap()
.contains(module_base)
{
} else if is_known_standard_library(target_version.minor(), module_base) {
(
&ImportSection::Known(ImportType::StandardLibrary),
Reason::KnownStandardLibrary,

View file

@ -52,6 +52,14 @@ impl PythonVersion {
}
}
pub const fn major(&self) -> u32 {
self.as_tuple().0
}
pub const fn minor(&self) -> u32 {
self.as_tuple().1
}
pub fn get_minimum_supported_version(requires_version: &VersionSpecifiers) -> Option<Self> {
let mut minimum_version = None;
for python_version in PythonVersion::iter() {

View file

@ -13,5 +13,3 @@ license = { workspace = true }
[lib]
[dependencies]
once_cell = { workspace = true }
rustc-hash = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -12,13 +12,14 @@ from pathlib import Path
from sphinx.ext.intersphinx import fetch_inventory
URL = "https://docs.python.org/{}/objects.inv"
PATH = Path("crates") / "ruff_python" / "src" / "sys.rs"
PATH = Path("crates") / "ruff_python_stdlib" / "src" / "sys.rs"
VERSIONS: list[tuple[int, int]] = [
(3, 7),
(3, 8),
(3, 9),
(3, 10),
(3, 11),
(3, 12),
]
@ -37,18 +38,16 @@ with PATH.open("w") as f:
f.write(
"""\
//! This file is generated by `scripts/generate_known_standard_library.py`
use once_cell::sync::Lazy;
use rustc_hash::{FxHashMap, FxHashSet};
// See: https://pycqa.github.io/isort/docs/configuration/options.html#known-standard-library
pub static KNOWN_STANDARD_LIBRARY: Lazy<FxHashMap<(u32, u32), FxHashSet<&'static str>>> =
Lazy::new(|| {
FxHashMap::from_iter([
""", # noqa: E501
pub fn is_known_standard_library(minor_version: u32, module: &str) -> bool {
matches!((minor_version, module),
""",
)
for major, minor in VERSIONS:
version = f"{major}.{minor}"
url = URL.format(version)
modules_by_version = {}
for major_version, minor_version in VERSIONS:
url = URL.format(f"{major_version}.{minor_version}")
invdata = fetch_inventory(FakeApp(), "", url)
modules = {
@ -60,33 +59,44 @@ pub static KNOWN_STANDARD_LIBRARY: Lazy<FxHashMap<(u32, u32), FxHashSet<&'static
"sre_compile",
"sre",
}
for module in invdata["py:module"]:
root, *_ = module.split(".")
if root not in ["__future__", "__main__"]:
modules.add(root)
f.write(
f"""\
(
({major}, {minor}),
FxHashSet::from_iter([
""",
)
for module in sorted(modules):
f.write(
f"""\
"{module}",
""",
)
f.write(
"""\
]),
),
""",
modules_by_version[minor_version] = modules
# First, add a case for the modules that are in all versions.
ubiquitous_modules = set.intersection(*modules_by_version.values())
f.write("(_, ")
for i, module in enumerate(sorted(ubiquitous_modules)):
if i > 0:
f.write(" | ")
f.write(f'"{module}"')
f.write(")")
f.write("\n")
# Next, add any version-specific modules.
for _major_version, minor_version in VERSIONS:
version_modules = set.difference(
modules_by_version[minor_version],
ubiquitous_modules,
)
f.write(" | ")
f.write(f"({minor_version}, ")
for i, module in enumerate(sorted(version_modules)):
if i > 0:
f.write(" | ")
f.write(f'"{module}"')
f.write(")")
f.write("\n")
f.write(
"""\
])
});
""",
)
}
""",
)