Fix NFKC normalization bug when removing unused imports (#12571)

This commit is contained in:
Alex Waygood 2024-07-30 10:54:35 +01:00 committed by GitHub
parent f3c14a4276
commit aaa56eb0bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 76 additions and 31 deletions

1
Cargo.lock generated
View file

@ -2319,6 +2319,7 @@ dependencies = [
"thiserror",
"toml",
"typed-arena",
"unicode-normalization",
"unicode-width",
"unicode_names2",
"url",

View file

@ -69,6 +69,7 @@ toml = { workspace = true }
typed-arena = { workspace = true }
unicode-width = { workspace = true }
unicode_names2 = { workspace = true }
unicode-normalization = { workspace = true }
url = { workspace = true }
[dev-dependencies]

View file

@ -0,0 +1,6 @@
"""
Test: ensure we're able to correctly remove unused imports
even if they have characters in them that undergo NFKC normalization
"""
from .main import MaµToMan

View file

@ -1,13 +1,16 @@
//! Interface for editing code snippets. These functions take statements or expressions as input,
//! and return the modified code snippet as output.
use std::borrow::Cow;
use anyhow::{bail, Result};
use libcst_native::{
Codegen, CodegenState, Expression, ImportNames, NameOrAttribute, ParenthesizableWhitespace,
SmallStatement, Statement,
};
use ruff_python_ast::name::UnqualifiedName;
use smallvec::{smallvec, SmallVec};
use unicode_normalization::UnicodeNormalization;
use ruff_python_ast::name::UnqualifiedName;
use ruff_python_ast::Stmt;
use ruff_python_codegen::Stylist;
use ruff_source_file::Locator;
@ -167,39 +170,55 @@ pub(crate) fn retain_imports(
Ok(tree.codegen_stylist(stylist))
}
fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) {
match expr {
Expression::Call(expr) => {
collect_segments(&expr.func, parts);
}
Expression::Attribute(expr) => {
collect_segments(&expr.value, parts);
parts.push(expr.attr.value);
}
Expression::Name(expr) => {
parts.push(expr.value);
}
_ => {}
}
}
fn unqualified_name_from_expression<'a>(expr: &'a Expression<'a>) -> Option<UnqualifiedName<'a>> {
let mut segments = smallvec![];
collect_segments(expr, &mut segments);
if segments.is_empty() {
None
} else {
Some(segments.into_iter().collect())
}
}
/// Create an NFKC-normalized qualified name from a libCST node.
fn qualified_name_from_name_or_attribute(module: &NameOrAttribute) -> String {
match module {
NameOrAttribute::N(name) => name.value.to_string(),
fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) {
match expr {
Expression::Call(expr) => {
collect_segments(&expr.func, parts);
}
Expression::Attribute(expr) => {
collect_segments(&expr.value, parts);
parts.push(expr.attr.value);
}
Expression::Name(expr) => {
parts.push(expr.value);
}
_ => {}
}
}
/// Attempt to create an [`UnqualifiedName`] from a libCST expression.
///
/// Strictly speaking, the `UnqualifiedName` returned by this function may be invalid,
/// since it hasn't been NFKC-normalized. In order for an `UnqualifiedName` to be
/// comparable to one constructed from a `ruff_python_ast` node, it has to undergo
/// NFKC normalization. As a local function, however, this is fine;
/// the outer function always performs NFKC normalization before returning the
/// qualified name to the caller.
fn unqualified_name_from_expression<'a>(
expr: &'a Expression<'a>,
) -> Option<UnqualifiedName<'a>> {
let mut segments = smallvec![];
collect_segments(expr, &mut segments);
if segments.is_empty() {
None
} else {
Some(segments.into_iter().collect())
}
}
let unnormalized = match module {
NameOrAttribute::N(name) => Cow::Borrowed(name.value),
NameOrAttribute::A(attr) => {
let name = attr.attr.value;
let prefix = unqualified_name_from_expression(&attr.value);
prefix.map_or_else(|| name.to_string(), |prefix| format!("{prefix}.{name}"))
prefix.map_or_else(
|| Cow::Borrowed(name),
|prefix| Cow::Owned(format!("{prefix}.{name}")),
)
}
}
};
unnormalized.nfkc().collect()
}

View file

@ -258,6 +258,7 @@ mod tests {
#[test_case(Rule::UnusedImport, Path::new("F401_27__all_mistyped/__init__.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_28__all_multiple/__init__.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_29__all_conditional/__init__.py"))]
#[test_case(Rule::UnusedImport, Path::new("F401_30.py"))]
fn f401_deprecated_option(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"{}_deprecated_option_{}",

View file

@ -0,0 +1,17 @@
---
source: crates/ruff_linter/src/rules/pyflakes/mod.rs
---
F401_30.py:6:19: F401 [*] `.main.MaμToMan` imported but unused
|
4 | """
5 |
6 | from .main import MaµToMan
| ^^^^^^^^ F401
|
= help: Remove unused import: `.main.MaμToMan`
Safe fix
3 3 | even if they have characters in them that undergo NFKC normalization
4 4 | """
5 5 |
6 |-from .main import MaµToMan