Remove ImportMap (#11234)

## Summary

This PR removes the `ImportMap` implementation and all its routing
through ruff.

The import map was added in https://github.com/astral-sh/ruff/pull/3243
but we then never ended up using it to do cross file analysis.

We are now working on adding multifile analysis to ruff, and revisit
import resolution as part of it.


```
hyperfine --warmup 10 --runs 20 --setup "./target/release/ruff clean" \
              "./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I" \
              "./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I" 
Benchmark 1: ./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I
  Time (mean ± σ):      37.6 ms ±   0.9 ms    [User: 52.2 ms, System: 63.7 ms]
  Range (min … max):    35.8 ms …  39.8 ms    20 runs
 
Benchmark 2: ./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I
  Time (mean ± σ):      36.0 ms ±   0.7 ms    [User: 50.3 ms, System: 58.4 ms]
  Range (min … max):    34.5 ms …  37.6 ms    20 runs
 
Summary
  ./target/release/ruff-import check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I ran
    1.04 ± 0.03 times faster than ./target/release/ruff check crates/ruff_linter/resources/test/cpython -e -s --extend-select=I
```

I suspect that the performance improvement should even be more
significant for users that otherwise don't have any diagnostics.


```
hyperfine --warmup 10 --runs 20 --setup "cd ../ecosystem/airflow && ../../ruff/target/release/ruff clean" \
              "./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I" \
              "./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I" 
Benchmark 1: ./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I
  Time (mean ± σ):      53.7 ms ±   1.8 ms    [User: 68.4 ms, System: 63.0 ms]
  Range (min … max):    51.1 ms …  58.7 ms    20 runs
 
Benchmark 2: ./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I
  Time (mean ± σ):      50.8 ms ±   1.4 ms    [User: 50.7 ms, System: 60.9 ms]
  Range (min … max):    48.5 ms …  55.3 ms    20 runs
 
Summary
  ./target/release/ruff-import check ../ecosystem/airflow -e -s --extend-select=I ran
    1.06 ± 0.05 times faster than ./target/release/ruff check ../ecosystem/airflow -e -s --extend-select=I

```

## Test Plan

`cargo test`
This commit is contained in:
Micha Reiser 2024-05-02 20:26:02 +02:00 committed by GitHub
parent e62fa4ea32
commit 64700d296f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 29 additions and 181 deletions

View file

@ -23,7 +23,6 @@ use ruff_linter::message::Message;
use ruff_linter::{warn_user, VERSION};
use ruff_macros::CacheKey;
use ruff_notebook::NotebookIndex;
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::resolver::Resolver;
@ -348,7 +347,7 @@ impl FileCache {
} else {
FxHashMap::default()
};
Diagnostics::new(messages, lint.imports.clone(), notebook_indexes)
Diagnostics::new(messages, notebook_indexes)
})
}
}
@ -394,7 +393,7 @@ pub(crate) fn init(path: &Path) -> Result<()> {
#[derive(Deserialize, Debug, Serialize, PartialEq)]
pub(crate) struct LintCacheData {
/// Imports made.
pub(super) imports: ImportMap,
// pub(super) imports: ImportMap,
/// Diagnostic messages.
pub(super) messages: Vec<CacheMessage>,
/// Source code of the file.
@ -410,7 +409,6 @@ pub(crate) struct LintCacheData {
impl LintCacheData {
pub(crate) fn from_messages(
messages: &[Message],
imports: ImportMap,
notebook_index: Option<NotebookIndex>,
) -> Self {
let source = if let Some(msg) = messages.first() {
@ -438,7 +436,6 @@ impl LintCacheData {
.collect();
Self {
imports,
messages,
source,
notebook_index,

View file

@ -17,7 +17,6 @@ use ruff_linter::registry::Rule;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::{fs, warn_user_once, IOError};
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::resolver::{
@ -134,7 +133,6 @@ pub(crate) fn check(
dummy,
TextSize::default(),
)],
ImportMap::default(),
FxHashMap::default(),
)
} else {

View file

@ -23,7 +23,6 @@ use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::{SourceError, SourceKind};
use ruff_linter::{fs, IOError, SyntaxError};
use ruff_notebook::{Notebook, NotebookError, NotebookIndex};
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
@ -35,20 +34,17 @@ use crate::cache::{Cache, FileCacheKey, LintCacheData};
pub(crate) struct Diagnostics {
pub(crate) messages: Vec<Message>,
pub(crate) fixed: FixMap,
pub(crate) imports: ImportMap,
pub(crate) notebook_indexes: FxHashMap<String, NotebookIndex>,
}
impl Diagnostics {
pub(crate) fn new(
messages: Vec<Message>,
imports: ImportMap,
notebook_indexes: FxHashMap<String, NotebookIndex>,
) -> Self {
Self {
messages,
fixed: FixMap::default(),
imports,
notebook_indexes,
}
}
@ -92,7 +88,6 @@ impl Diagnostics {
dummy,
TextSize::default(),
)],
ImportMap::default(),
FxHashMap::default(),
)
} else {
@ -127,7 +122,6 @@ impl Add for Diagnostics {
impl AddAssign for Diagnostics {
fn add_assign(&mut self, other: Self) {
self.messages.extend(other.messages);
self.imports.extend(other.imports);
self.fixed += other.fixed;
self.notebook_indexes.extend(other.notebook_indexes);
}
@ -267,7 +261,7 @@ pub(crate) fn lint_path(
// Lint the file.
let (
LinterResult {
data: (messages, imports),
data: messages,
error: parse_error,
},
transformed,
@ -335,8 +329,6 @@ pub(crate) fn lint_path(
(result, transformed, fixed)
};
let imports = imports.unwrap_or_default();
if let Some((cache, relative_path, key)) = caching {
// We don't cache parsing errors.
if parse_error.is_none() {
@ -354,7 +346,6 @@ pub(crate) fn lint_path(
&key,
LintCacheData::from_messages(
&messages,
imports.clone(),
transformed.as_ipy_notebook().map(Notebook::index).cloned(),
),
);
@ -378,7 +369,6 @@ pub(crate) fn lint_path(
Ok(Diagnostics {
messages,
fixed: FixMap::from_iter([(fs::relativize_path(path), fixed)]),
imports,
notebook_indexes,
})
}
@ -416,7 +406,7 @@ pub(crate) fn lint_stdin(
// Lint the inputs.
let (
LinterResult {
data: (messages, imports),
data: messages,
error: parse_error,
},
transformed,
@ -494,8 +484,6 @@ pub(crate) fn lint_stdin(
(result, transformed, fixed)
};
let imports = imports.unwrap_or_default();
if let Some(error) = parse_error {
error!(
"{}",
@ -518,7 +506,6 @@ pub(crate) fn lint_stdin(
fs::relativize_path(path.unwrap_or_else(|| Path::new("-"))),
fixed,
)]),
imports,
notebook_indexes,
})
}

View file

@ -1,17 +1,13 @@
//! Lint rules based on import analysis.
use std::borrow::Cow;
use std::path::Path;
use ruff_diagnostics::Diagnostic;
use ruff_notebook::CellOffsets;
use ruff_python_ast::helpers::to_module_path;
use ruff_python_ast::imports::{ImportMap, ModuleImport};
use ruff_python_ast::statement_visitor::StatementVisitor;
use ruff_python_ast::{self as ast, PySourceType, Stmt, Suite};
use ruff_python_ast::{PySourceType, Suite};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
use crate::directives::IsortDirectives;
use crate::registry::Rule;
@ -19,57 +15,6 @@ use crate::rules::isort;
use crate::rules::isort::block::{Block, BlockBuilder};
use crate::settings::LinterSettings;
fn extract_import_map(path: &Path, package: Option<&Path>, blocks: &[&Block]) -> Option<ImportMap> {
let module_path = to_module_path(package?, path)?;
let num_imports = blocks.iter().map(|block| block.imports.len()).sum();
let mut module_imports = Vec::with_capacity(num_imports);
for stmt in blocks.iter().flat_map(|block| &block.imports) {
match stmt {
Stmt::Import(ast::StmtImport { names, range: _ }) => {
module_imports.extend(
names
.iter()
.map(|name| ModuleImport::new(name.name.to_string(), stmt.range())),
);
}
Stmt::ImportFrom(ast::StmtImportFrom {
module,
names,
level,
range: _,
}) => {
let level = *level as usize;
let module = if let Some(module) = module {
let module: &String = module.as_ref();
if level == 0 {
Cow::Borrowed(module)
} else {
if module_path.len() <= level {
continue;
}
let prefix = module_path[..module_path.len() - level].join(".");
Cow::Owned(format!("{prefix}.{module}"))
}
} else {
if module_path.len() <= level {
continue;
}
Cow::Owned(module_path[..module_path.len() - level].join("."))
};
module_imports.extend(names.iter().map(|name| {
ModuleImport::new(format!("{}.{}", module, name.name), name.range())
}));
}
_ => panic!("Expected Stmt::Import | Stmt::ImportFrom"),
}
}
let mut import_map = ImportMap::default();
import_map.insert(module_path.join("."), module_imports);
Some(import_map)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn check_imports(
python_ast: &Suite,
@ -78,11 +23,10 @@ pub(crate) fn check_imports(
directives: &IsortDirectives,
settings: &LinterSettings,
stylist: &Stylist,
path: &Path,
package: Option<&Path>,
source_type: PySourceType,
cell_offsets: Option<&CellOffsets>,
) -> (Vec<Diagnostic>, Option<ImportMap>) {
) -> Vec<Diagnostic> {
// Extract all import blocks from the AST.
let tracker = {
let mut tracker =
@ -122,8 +66,5 @@ pub(crate) fn check_imports(
));
}
// Extract import map.
let imports = extract_import_map(path, package, &blocks);
(diagnostics, imports)
diagnostics
}

View file

@ -10,7 +10,6 @@ use rustc_hash::FxHashMap;
use ruff_diagnostics::Diagnostic;
use ruff_notebook::Notebook;
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{PySourceType, Suite};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
@ -62,7 +61,7 @@ pub type FixTable = FxHashMap<Rule, usize>;
pub struct FixerResult<'a> {
/// The result returned by the linter, after applying any fixes.
pub result: LinterResult<(Vec<Message>, Option<ImportMap>)>,
pub result: LinterResult<Vec<Message>>,
/// The resulting source code, after applying any fixes.
pub transformed: Cow<'a, SourceKind>,
/// The number of fixes applied for each [`Rule`].
@ -84,10 +83,9 @@ pub fn check_path(
source_kind: &SourceKind,
source_type: PySourceType,
tokens: TokenSource,
) -> LinterResult<(Vec<Diagnostic>, Option<ImportMap>)> {
) -> LinterResult<Vec<Diagnostic>> {
// Aggregate all diagnostics.
let mut diagnostics = vec![];
let mut imports = None;
let mut error = None;
// Collect doc lines. This requires a rare mix of tokens (for comments) and AST
@ -169,19 +167,18 @@ pub fn check_path(
));
}
if use_imports {
let (import_diagnostics, module_imports) = check_imports(
let import_diagnostics = check_imports(
&python_ast,
locator,
indexer,
&directives.isort,
settings,
stylist,
path,
package,
source_type,
cell_offsets,
);
imports = module_imports;
diagnostics.extend(import_diagnostics);
}
if use_doc_lines {
@ -340,7 +337,7 @@ pub fn check_path(
}
}
LinterResult::new((diagnostics, imports), error)
LinterResult::new(diagnostics, error)
}
const MAX_ITERATIONS: usize = 100;
@ -410,7 +407,7 @@ pub fn add_noqa_to_path(
// TODO(dhruvmanila): Add support for Jupyter Notebooks
add_noqa(
path,
&diagnostics.0,
&diagnostics,
&locator,
indexer.comment_ranges(),
&settings.external,
@ -429,7 +426,7 @@ pub fn lint_only(
source_kind: &SourceKind,
source_type: PySourceType,
data: ParseSource,
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
) -> LinterResult<Vec<Message>> {
// Tokenize once.
let tokens = data.into_token_source(source_kind, source_type);
@ -465,12 +462,7 @@ pub fn lint_only(
tokens,
);
result.map(|(diagnostics, imports)| {
(
diagnostics_to_messages(diagnostics, path, &locator, &directives),
imports,
)
})
result.map(|diagnostics| diagnostics_to_messages(diagnostics, path, &locator, &directives))
}
/// Convert from diagnostics to messages.
@ -583,7 +575,7 @@ pub fn lint_fix<'a>(
code: fixed_contents,
fixes: applied,
source_map,
}) = fix_file(&result.data.0, &locator, unsafe_fixes)
}) = fix_file(&result.data, &locator, unsafe_fixes)
{
if iterations < MAX_ITERATIONS {
// Count the number of fixed errors.
@ -600,15 +592,12 @@ pub fn lint_fix<'a>(
continue;
}
report_failed_to_converge_error(path, transformed.source_code(), &result.data.0);
report_failed_to_converge_error(path, transformed.source_code(), &result.data);
}
return Ok(FixerResult {
result: result.map(|(diagnostics, imports)| {
(
diagnostics_to_messages(diagnostics, path, &locator, &directives),
imports,
)
result: result.map(|diagnostics| {
diagnostics_to_messages(diagnostics, path, &locator, &directives)
}),
transformed,
fixed,

View file

@ -611,7 +611,7 @@ mod tests {
&indexer,
);
let LinterResult {
data: (mut diagnostics, ..),
data: mut diagnostics,
..
} = check_path(
Path::new("<filename>"),

View file

@ -10,6 +10,9 @@ use itertools::Itertools;
use rustc_hash::FxHashMap;
use ruff_diagnostics::{Applicability, Diagnostic, FixAvailability};
use ruff_notebook::Notebook;
#[cfg(not(fuzzing))]
use ruff_notebook::NotebookError;
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
@ -29,9 +32,6 @@ use crate::rules::pycodestyle::rules::syntax_error;
use crate::settings::types::UnsafeFixes;
use crate::settings::{flags, LinterSettings};
use crate::source_kind::SourceKind;
use ruff_notebook::Notebook;
#[cfg(not(fuzzing))]
use ruff_notebook::NotebookError;
#[cfg(not(fuzzing))]
pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
@ -123,7 +123,7 @@ pub(crate) fn test_contents<'a>(
&indexer,
);
let LinterResult {
data: (diagnostics, _imports),
data: diagnostics,
error,
} = check_path(
path,
@ -190,7 +190,7 @@ pub(crate) fn test_contents<'a>(
);
let LinterResult {
data: (fixed_diagnostics, _),
data: fixed_diagnostics,
error: fixed_error,
} = check_path(
path,

View file

@ -1,8 +1,3 @@
use ruff_text_size::TextRange;
use rustc_hash::FxHashMap;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// A representation of an individual name imported via any import statement.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AnyImport<'a> {
@ -117,60 +112,3 @@ impl FutureImport for AnyImport<'_> {
}
}
}
/// A representation of a module reference in an import statement.
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ModuleImport {
module: String,
range: TextRange,
}
impl ModuleImport {
pub fn new(module: String, range: TextRange) -> Self {
Self { module, range }
}
}
impl From<&ModuleImport> for TextRange {
fn from(import: &ModuleImport) -> TextRange {
import.range
}
}
/// A representation of the import dependencies between modules.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ImportMap {
/// A map from dot-delimited module name to the list of imports in that module.
module_to_imports: FxHashMap<String, Vec<ModuleImport>>,
}
impl ImportMap {
pub fn new() -> Self {
Self {
module_to_imports: FxHashMap::default(),
}
}
pub fn insert(&mut self, module: String, imports_vec: Vec<ModuleImport>) {
self.module_to_imports.insert(module, imports_vec);
}
pub fn extend(&mut self, other: Self) {
self.module_to_imports.extend(other.module_to_imports);
}
pub fn iter(&self) -> std::collections::hash_map::Iter<String, Vec<ModuleImport>> {
self.module_to_imports.iter()
}
}
impl<'a> IntoIterator for &'a ImportMap {
type IntoIter = std::collections::hash_map::Iter<'a, String, Vec<ModuleImport>>;
type Item = (&'a String, &'a Vec<ModuleImport>);
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

View file

@ -79,8 +79,7 @@ pub(crate) fn check(
// Generate checks.
let LinterResult {
data: (diagnostics, _imports),
..
data: diagnostics, ..
} = check_path(
&document_path,
package,

View file

@ -179,8 +179,7 @@ impl Workspace {
// Generate checks.
let LinterResult {
data: (diagnostics, _imports),
..
data: diagnostics, ..
} = check_path(
Path::new("<filename>"),
None,

View file

@ -43,7 +43,7 @@ fn do_fuzz(case: &[u8]) -> Corpus {
let mut warnings = HashMap::new();
for msg in linter_results.data.0 {
for msg in linter_results.data {
let count: &mut usize = warnings.entry(msg.kind.name).or_default();
*count += 1;
}
@ -67,7 +67,7 @@ fn do_fuzz(case: &[u8]) -> Corpus {
"formatter introduced a parse error"
);
for msg in linter_results.data.0 {
for msg in linter_results.data {
if let Some(count) = warnings.get_mut(&msg.kind.name) {
if let Some(decremented) = count.checked_sub(1) {
*count = decremented;