Generate ImportMap from module path to imported dependencies (#3243)

This commit is contained in:
Chris Chan 2023-04-04 11:31:37 +08:00 committed by GitHub
parent 76e111c874
commit 10504eb9ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 269 additions and 89 deletions

2
Cargo.lock generated
View file

@ -2087,6 +2087,7 @@ dependencies = [
"ruff",
"ruff_cache",
"ruff_diagnostics",
"ruff_python_ast",
"ruff_python_stdlib",
"rustc-hash",
"serde",
@ -2179,6 +2180,7 @@ dependencies = [
"rustc-hash",
"rustpython-common",
"rustpython-parser",
"serde",
"smallvec",
]

View file

@ -1,10 +1,12 @@
//! Lint rules based on import analysis.
use std::borrow::Cow;
use std::path::Path;
use rustpython_parser::ast::Suite;
use rustpython_parser::ast::{StmtKind, Suite};
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::helpers::to_module_path;
use ruff_python_ast::imports::{ImportMap, ModuleImport};
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_ast::visitor::Visitor;
@ -14,6 +16,66 @@ use crate::rules::isort;
use crate::rules::isort::track::{Block, ImportTracker};
use crate::settings::{flags, Settings};
fn extract_import_map(path: &Path, package: Option<&Path>, blocks: &[&Block]) -> Option<ImportMap> {
let Some(package) = package else {
return None;
};
let Some(module_path) = to_module_path(package, path) else {
return None;
};
let num_imports = blocks.iter().map(|block| block.imports.len()).sum();
let mut module_imports = Vec::with_capacity(num_imports);
for stmt in blocks.iter().flat_map(|block| &block.imports) {
match &stmt.node {
StmtKind::Import { names } => {
module_imports.extend(names.iter().map(|name| {
ModuleImport::new(
name.node.name.clone(),
stmt.location,
stmt.end_location.unwrap(),
)
}));
}
StmtKind::ImportFrom {
module,
names,
level,
} => {
let level = level.unwrap_or(0);
let module = if let Some(module) = module {
if level == 0 {
Cow::Borrowed(module)
} else {
if module_path.len() <= level {
continue;
}
let prefix = module_path[..module_path.len() - level].join(".");
Cow::Owned(format!("{prefix}.{module}"))
}
} else {
if module_path.len() <= level {
continue;
}
Cow::Owned(module_path[..module_path.len() - level].join("."))
};
module_imports.extend(names.iter().map(|name| {
ModuleImport::new(
format!("{}.{}", module, name.node.name),
name.location,
name.end_location.unwrap(),
)
}));
}
_ => panic!("Expected StmtKind::Import | StmtKind::ImportFrom"),
}
}
let mut import_map = ImportMap::default();
import_map.insert(module_path.join("."), module_imports);
Some(import_map)
}
#[allow(clippy::too_many_arguments)]
pub fn check_imports(
python_ast: &Suite,
@ -25,7 +87,7 @@ pub fn check_imports(
autofix: flags::Autofix,
path: &Path,
package: Option<&Path>,
) -> Vec<Diagnostic> {
) -> (Vec<Diagnostic>, Option<ImportMap>) {
// Extract all imports from the AST.
let tracker = {
let mut tracker = ImportTracker::new(locator, directives, path);
@ -52,5 +114,9 @@ pub fn check_imports(
&blocks, python_ast, locator, stylist, settings, autofix,
));
}
diagnostics
// Extract import map.
let imports = extract_import_map(path, package, &blocks);
(diagnostics, imports)
}

View file

@ -9,6 +9,7 @@ use rustpython_parser::lexer::LexResult;
use rustpython_parser::ParseError;
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_stdlib::path::is_python_stub_file;
@ -51,6 +52,15 @@ impl<T> LinterResult<T> {
pub type FixTable = FxHashMap<Rule, usize>;
pub struct FixerResult<'a> {
/// The result returned by the linter, after applying any fixes.
pub result: LinterResult<(Vec<Message>, Option<ImportMap>)>,
/// The resulting source code, after applying any fixes.
pub transformed: Cow<'a, str>,
/// The number of fixes applied for each [`Rule`].
pub fixed: FixTable,
}
/// Generate `Diagnostic`s from the source code contents at the
/// given `Path`.
#[allow(clippy::too_many_arguments)]
@ -66,9 +76,10 @@ pub fn check_path(
settings: &Settings,
noqa: flags::Noqa,
autofix: flags::Autofix,
) -> LinterResult<Vec<Diagnostic>> {
) -> LinterResult<(Vec<Diagnostic>, Option<ImportMap>)> {
// Aggregate all diagnostics.
let mut diagnostics = vec![];
let mut imports = None;
let mut error = None;
// Collect doc lines. This requires a rare mix of tokens (for comments) and AST
@ -142,7 +153,7 @@ pub fn check_path(
));
}
if use_imports {
diagnostics.extend(check_imports(
let (import_diagnostics, module_imports) = check_imports(
&python_ast,
locator,
indexer,
@ -152,7 +163,9 @@ pub fn check_path(
autofix,
path,
package,
));
);
imports = module_imports;
diagnostics.extend(import_diagnostics);
}
if use_doc_lines {
doc_lines.extend(doc_lines_from_ast(&python_ast));
@ -240,7 +253,7 @@ pub fn check_path(
}
}
LinterResult::new(diagnostics, error)
LinterResult::new((diagnostics, imports), error)
}
const MAX_ITERATIONS: usize = 100;
@ -297,7 +310,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings
// Add any missing `# noqa` pragmas.
add_noqa(
path,
&diagnostics,
&diagnostics.0,
&contents,
indexer.commented_lines(),
&directives.noqa_line_for,
@ -314,7 +327,7 @@ pub fn lint_only(
settings: &Settings,
noqa: flags::Noqa,
autofix: flags::Autofix,
) -> LinterResult<Vec<Message>> {
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
// Tokenize once.
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
@ -348,20 +361,23 @@ pub fn lint_only(
// Convert from diagnostics to messages.
let path_lossy = path.to_string_lossy();
result.map(|diagnostics| {
diagnostics
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(diagnostic, path_lossy.to_string(), source, noqa_row)
})
.collect()
result.map(|(messages, imports)| {
(
messages
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(diagnostic, path_lossy.to_string(), source, noqa_row)
})
.collect(),
imports,
)
})
}
@ -373,7 +389,7 @@ pub fn lint_fix<'a>(
package: Option<&Path>,
noqa: flags::Noqa,
settings: &Settings,
) -> Result<(LinterResult<Vec<Message>>, Cow<'a, str>, FixTable)> {
) -> Result<FixerResult<'a>> {
let mut transformed = Cow::Borrowed(contents);
// Track the number of fixed errors across iterations.
@ -448,7 +464,7 @@ This indicates a bug in `{}`. If you could open an issue at:
}
// Apply autofix.
if let Some((fixed_contents, applied)) = fix_file(&result.data, &locator) {
if let Some((fixed_contents, applied)) = fix_file(&result.data.0, &locator) {
if iterations < MAX_ITERATIONS {
// Count the number of fixed errors.
for (rule, count) in applied {
@ -488,29 +504,33 @@ This indicates a bug in `{}`. If you could open an issue at:
// Convert to messages.
let path_lossy = path.to_string_lossy();
return Ok((
result.map(|diagnostics| {
diagnostics
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(
diagnostic,
path_lossy.to_string(),
source,
noqa_row,
)
})
.collect()
return Ok(FixerResult {
result: result.map(|(messages, imports)| {
(
messages
.into_iter()
.map(|diagnostic| {
let source = if settings.show_source {
Some(Source::from_diagnostic(&diagnostic, &locator))
} else {
None
};
let lineno = diagnostic.location.row();
let noqa_row =
*directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
Message::from_diagnostic(
diagnostic,
path_lossy.to_string(),
source,
noqa_row,
)
})
.collect(),
imports,
)
}),
transformed,
fixed,
));
});
}
}

View file

@ -31,7 +31,8 @@ mod tests {
let directives =
directives::extract_directives(&tokens, directives::Flags::from_settings(&settings));
let LinterResult {
data: diagnostics, ..
data: (diagnostics, _imports),
..
} = check_path(
Path::new("<filename>"),
None,

View file

@ -258,7 +258,7 @@ mod tests {
let directives =
directives::extract_directives(&tokens, directives::Flags::from_settings(&settings));
let LinterResult {
data: mut diagnostics,
data: (mut diagnostics, _imports),
..
} = check_path(
Path::new("<filename>"),

View file

@ -31,7 +31,7 @@ pub fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<Vec<Diag
let directives =
directives::extract_directives(&tokens, directives::Flags::from_settings(settings));
let LinterResult {
data: mut diagnostics,
data: (mut diagnostics, _imports),
..
} = check_path(
&path,
@ -66,7 +66,8 @@ pub fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<Vec<Diag
let directives =
directives::extract_directives(&tokens, directives::Flags::from_settings(settings));
let LinterResult {
data: diagnostics, ..
data: (diagnostics, _imports),
..
} = check_path(
&path,
None,

View file

@ -25,6 +25,7 @@ doc = false
ruff = { path = "../ruff", features = ["clap"] }
ruff_cache = { path = "../ruff_cache" }
ruff_diagnostics = { path = "../ruff_diagnostics" }
ruff_python_ast = { path = "../ruff_python_ast" }
annotate-snippets = { version = "0.9.1", features = ["color"] }
anyhow = { workspace = true }

View file

@ -10,6 +10,7 @@ use path_absolutize::Absolutize;
use ruff::message::Message;
use ruff::settings::{flags, AllSettings, Settings};
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_python_ast::imports::ImportMap;
use serde::{Deserialize, Serialize};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
@ -19,11 +20,13 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Serialize)]
struct CheckResultRef<'a> {
messages: &'a [Message],
imports: &'a ImportMap,
}
#[derive(Deserialize)]
struct CheckResult {
messages: Vec<Message>,
imports: ImportMap,
}
fn content_dir() -> &'static Path {
@ -95,14 +98,14 @@ pub fn get<P: AsRef<Path>>(
metadata: &fs::Metadata,
settings: &AllSettings,
autofix: flags::Autofix,
) -> Option<Vec<Message>> {
) -> Option<(Vec<Message>, ImportMap)> {
let encoded = read_sync(
&settings.cli.cache_dir,
cache_key(path, package, metadata, &settings.lib, autofix),
)
.ok()?;
match bincode::deserialize::<CheckResult>(&encoded[..]) {
Ok(CheckResult { messages }) => Some(messages),
Ok(CheckResult { messages, imports }) => Some((messages, imports)),
Err(e) => {
error!("Failed to deserialize encoded cache entry: {e:?}");
None
@ -118,8 +121,9 @@ pub fn set<P: AsRef<Path>>(
settings: &AllSettings,
autofix: flags::Autofix,
messages: &[Message],
imports: &ImportMap,
) {
let check_result = CheckResultRef { messages };
let check_result = CheckResultRef { messages, imports };
if let Err(e) = write_sync(
&settings.cli.cache_dir,
cache_key(path, package, metadata, &settings.lib, autofix),

View file

@ -9,17 +9,18 @@ use log::{debug, error, warn};
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use crate::panic::catch_unwind;
use ruff::message::{Location, Message};
use ruff::registry::Rule;
use ruff::resolver::PyprojectDiscovery;
use ruff::settings::{flags, AllSettings};
use ruff::{fs, packaging, resolver, warn_user_once, IOError, Range};
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::imports::ImportMap;
use crate::args::Overrides;
use crate::cache;
use crate::diagnostics::Diagnostics;
use crate::panic::catch_unwind;
/// Run the linter over a collection of files.
pub fn run(
@ -43,7 +44,7 @@ pub fn run(
// Initialize the cache.
if cache.into() {
fn init_cache(path: &std::path::Path) {
fn init_cache(path: &Path) {
if let Err(e) = cache::init(path) {
error!("Failed to initialize cache at {}: {e:?}", path.display());
}
@ -115,15 +116,18 @@ pub fn run(
);
let settings = resolver.resolve(path, pyproject_strategy);
if settings.rules.enabled(Rule::IOError) {
Diagnostics::new(vec![Message::from_diagnostic(
Diagnostic::new(
IOError { message },
Range::new(Location::default(), Location::default()),
),
format!("{}", path.display()),
None,
1,
)])
Diagnostics::new(
vec![Message::from_diagnostic(
Diagnostic::new(
IOError { message },
Range::new(Location::default(), Location::default()),
),
format!("{}", path.display()),
None,
1,
)],
ImportMap::default(),
)
} else {
Diagnostics::default()
}
@ -137,7 +141,8 @@ pub fn run(
acc += item;
acc
});
// TODO(chris): actually check the imports?
debug!("{:#?}", diagnostics.imports);
diagnostics.messages.sort_unstable();
let duration = start.elapsed();
debug!("Checked {:?} files in: {:?}", paths.len(), duration);

View file

@ -14,9 +14,10 @@ use similar::TextDiff;
use ruff::fs;
use ruff::jupyter::{is_jupyter_notebook, JupyterIndex, JupyterNotebook};
use ruff::linter::{lint_fix, lint_only, FixTable, LinterResult};
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::message::Message;
use ruff::settings::{flags, AllSettings, Settings};
use ruff_python_ast::imports::ImportMap;
use crate::cache;
@ -24,16 +25,18 @@ use crate::cache;
pub struct Diagnostics {
pub messages: Vec<Message>,
pub fixed: FxHashMap<String, FixTable>,
pub imports: ImportMap,
/// Jupyter notebook indexing table for each input file that is a jupyter notebook
/// so we can rewrite the diagnostics in the end
pub jupyter_index: FxHashMap<String, JupyterIndex>,
}
impl Diagnostics {
pub fn new(messages: Vec<Message>) -> Self {
pub fn new(messages: Vec<Message>, imports: ImportMap) -> Self {
Self {
messages,
fixed: FxHashMap::default(),
imports,
jupyter_index: FxHashMap::default(),
}
}
@ -42,6 +45,7 @@ impl Diagnostics {
impl AddAssign for Diagnostics {
fn add_assign(&mut self, other: Self) {
self.messages.extend(other.messages);
self.imports.extend(other.imports);
for (filename, fixed) in other.fixed {
if fixed.is_empty() {
continue;
@ -58,7 +62,7 @@ impl AddAssign for Diagnostics {
}
/// Returns either an indexed python jupyter notebook or a diagnostic (which is empty if we skip)
fn load_jupyter_notebook(path: &Path) -> Result<(String, JupyterIndex), Diagnostics> {
fn load_jupyter_notebook(path: &Path) -> Result<(String, JupyterIndex), Box<Diagnostics>> {
let notebook = match JupyterNotebook::read(path) {
Ok(notebook) => {
if !notebook
@ -72,13 +76,13 @@ fn load_jupyter_notebook(path: &Path) -> Result<(String, JupyterIndex), Diagnost
"Skipping {} because it's not a Python notebook",
path.display()
);
return Err(Diagnostics::default());
return Err(Box::default());
}
notebook
}
Err(diagnostic) => {
// Failed to read the jupyter notebook
return Err(Diagnostics {
return Err(Box::new(Diagnostics {
messages: vec![Message::from_diagnostic(
*diagnostic,
path.to_string_lossy().to_string(),
@ -86,7 +90,7 @@ fn load_jupyter_notebook(path: &Path) -> Result<(String, JupyterIndex), Diagnost
1,
)],
..Diagnostics::default()
});
}));
}
};
@ -113,11 +117,11 @@ pub fn lint_path(
&& matches!(autofix, flags::FixMode::None | flags::FixMode::Generate)
{
let metadata = path.metadata()?;
if let Some(messages) =
if let Some((messages, imports)) =
cache::get(path, package.as_ref(), &metadata, settings, autofix.into())
{
debug!("Cache hit for: {}", path.display());
return Ok(Diagnostics::new(messages));
return Ok(Diagnostics::new(messages, imports));
}
Some(metadata)
} else {
@ -130,7 +134,7 @@ pub fn lint_path(
let (contents, jupyter_index) = if is_jupyter_notebook(path) {
match load_jupyter_notebook(path) {
Ok((contents, jupyter_index)) => (contents, Some(jupyter_index)),
Err(diagnostics) => return Ok(diagnostics),
Err(diagnostics) => return Ok(*diagnostics),
}
} else {
(std::fs::read_to_string(path)?, None)
@ -139,13 +143,16 @@ pub fn lint_path(
// Lint the file.
let (
LinterResult {
data: messages,
data: (messages, imports),
error: parse_error,
},
fixed,
) = if matches!(autofix, flags::FixMode::Apply | flags::FixMode::Diff) {
if let Ok((result, transformed, fixed)) =
lint_fix(&contents, path, package, noqa, &settings.lib)
if let Ok(FixerResult {
result,
transformed,
fixed,
}) = lint_fix(&contents, path, package, noqa, &settings.lib)
{
if !fixed.is_empty() {
if matches!(autofix, flags::FixMode::Apply) {
@ -187,6 +194,8 @@ pub fn lint_path(
(result, fixed)
};
let imports = imports.unwrap_or_default();
if let Some(err) = parse_error {
// Notify the user of any parse errors.
error!(
@ -210,6 +219,7 @@ pub fn lint_path(
settings,
autofix.into(),
&messages,
&imports,
);
}
}
@ -231,6 +241,7 @@ pub fn lint_path(
Ok(Diagnostics {
messages,
fixed: FxHashMap::from_iter([(fs::relativize_path(path), fixed)]),
imports,
jupyter_index,
})
}
@ -248,12 +259,16 @@ pub fn lint_stdin(
// Lint the inputs.
let (
LinterResult {
data: messages,
data: (messages, imports),
error: parse_error,
},
fixed,
) = if matches!(autofix, flags::FixMode::Apply | flags::FixMode::Diff) {
if let Ok((result, transformed, fixed)) = lint_fix(
if let Ok(FixerResult {
result,
transformed,
fixed,
}) = lint_fix(
contents,
path.unwrap_or_else(|| Path::new("-")),
package,
@ -312,6 +327,8 @@ pub fn lint_stdin(
(result, fixed)
};
let imports = imports.unwrap_or_default();
if let Some(err) = parse_error {
error!(
"Failed to parse {}: {err}",
@ -325,6 +342,7 @@ pub fn lint_stdin(
fs::relativize_path(path.unwrap_or_else(|| Path::new("-"))),
fixed,
)]),
imports,
jupyter_index: FxHashMap::default(),
})
}
@ -341,7 +359,7 @@ mod test {
// No diagnostics is used as skip signal
assert_eq!(
load_jupyter_notebook(path).unwrap_err(),
Diagnostics::default()
Box::<Diagnostics>::default()
);
}
}

View file

@ -24,4 +24,5 @@ regex = { workspace = true }
rustc-hash = { workspace = true }
rustpython-common = { workspace = true }
rustpython-parser = { workspace = true }
serde = { workspace = true }
smallvec = { version = "1.10.0" }

View file

@ -1,4 +1,8 @@
use std::fmt;
use rustc_hash::FxHashMap;
use rustpython_parser::ast::Location;
use serde::{Deserialize, Serialize};
use crate::types::Range;
/// A representation of an individual name imported via any import statement.
#[derive(Debug, Clone, PartialEq, Eq)]
@ -38,8 +42,8 @@ impl<'a> Import<'a> {
}
}
impl fmt::Display for AnyImport<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl std::fmt::Display for AnyImport<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
AnyImport::Import(import) => write!(f, "{import}"),
AnyImport::ImportFrom(import_from) => write!(f, "{import_from}"),
@ -47,8 +51,8 @@ impl fmt::Display for AnyImport<'_> {
}
}
impl fmt::Display for Import<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl std::fmt::Display for Import<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "import {}", self.name.name)?;
if let Some(as_name) = self.name.as_name {
write!(f, " as {as_name}")?;
@ -57,8 +61,8 @@ impl fmt::Display for Import<'_> {
}
}
impl fmt::Display for ImportFrom<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl std::fmt::Display for ImportFrom<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "from ")?;
if let Some(level) = self.level {
write!(f, "{}", ".".repeat(level))?;
@ -70,3 +74,59 @@ impl fmt::Display for ImportFrom<'_> {
Ok(())
}
}
/// A representation of a module reference in an import statement.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ModuleImport {
module: String,
location: Location,
end_location: Location,
}
impl ModuleImport {
pub fn new(module: String, location: Location, end_location: Location) -> Self {
Self {
module,
location,
end_location,
}
}
}
impl From<&ModuleImport> for Range {
fn from(import: &ModuleImport) -> Range {
Range::new(import.location, import.end_location)
}
}
/// A representation of the import dependencies between modules.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ImportMap {
/// A map from dot-delimited module name to the list of imports in that module.
module_to_imports: FxHashMap<String, Vec<ModuleImport>>,
}
impl ImportMap {
pub fn new() -> Self {
Self {
module_to_imports: FxHashMap::default(),
}
}
pub fn insert(&mut self, module: String, imports_vec: Vec<ModuleImport>) {
self.module_to_imports.insert(module, imports_vec);
}
pub fn extend(&mut self, other: Self) {
self.module_to_imports.extend(other.module_to_imports);
}
}
impl<'a> IntoIterator for &'a ImportMap {
type Item = (&'a String, &'a Vec<ModuleImport>);
type IntoIter = std::collections::hash_map::Iter<'a, String, Vec<ModuleImport>>;
fn into_iter(self) -> Self::IntoIter {
self.module_to_imports.iter()
}
}

View file

@ -185,7 +185,8 @@ pub fn check(contents: &str, options: JsValue) -> Result<JsValue, JsValue> {
// Generate checks.
let LinterResult {
data: diagnostics, ..
data: (diagnostics, _imports),
..
} = check_path(
Path::new("<filename>"),
None,