Fix is_module_name() and improve perf of is_identifier() (#3795)

This commit is contained in:
Jonathan Plasse 2023-03-31 21:15:36 +02:00 committed by GitHub
parent fe38597279
commit 968c7df770
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 29 deletions

View file

@ -41,9 +41,10 @@ mod tests {
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/no_module/test.txt"); "N999_8")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/no_module/test.txt"); "N999_8")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes.py"); "N999_9")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes.py"); "N999_9")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__main__.py"); "N999_10")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__main__.py"); "N999_10")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/0001_initial.py"); "N999_11")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/0001_initial.py"); "N999_11")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__setup__.py"); "N999_12")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__setup__.py"); "N999_12")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes"); "N999_13")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes"); "N999_13")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/import.py"); "N999_14")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> { fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path( let diagnostics = test_path(

View file

@ -1,13 +1,14 @@
use std::ffi::OsStr;
use std::path::Path; use std::path::Path;
use ruff_diagnostics::{Diagnostic, Violation}; use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use ruff_python_stdlib::identifiers::is_module_name; use ruff_python_stdlib::identifiers::{is_migration_name, is_module_name};
/// ## What it does /// ## What it does
/// Checks for module names that do not follow the `snake_case` naming /// Checks for module names that do not follow the `snake_case` naming
/// convention. /// convention or are otherwise invalid.
/// ///
/// ## Why is this bad? /// ## Why is this bad?
/// [PEP 8] recommends the use of the `snake_case` naming convention for /// [PEP 8] recommends the use of the `snake_case` naming convention for
@ -21,6 +22,10 @@ use ruff_python_stdlib::identifiers::is_module_name;
/// > provides a higher level (e.g. more object oriented) interface, the C/C++ module has /// > provides a higher level (e.g. more object oriented) interface, the C/C++ module has
/// > a leading underscore (e.g. `_socket`). /// > a leading underscore (e.g. `_socket`).
/// ///
/// Further, in order for Python modules to be importable, they must be valid
/// identifiers. As such, they cannot start with a digit, or collide with hard
/// keywords, like `import` or `class`.
///
/// ## Example /// ## Example
/// - Instead of `example-module-name` or `example module name`, use `example_module_name`. /// - Instead of `example-module-name` or `example module name`, use `example_module_name`.
/// - Instead of `ExampleModule`, use `example_module`. /// - Instead of `ExampleModule`, use `example_module`.
@ -49,18 +54,21 @@ pub fn invalid_module_name(path: &Path, package: Option<&Path>) -> Option<Diagno
} }
if let Some(package) = package { if let Some(package) = package {
let module_name = if path.file_name().map_or(false, |file_name| { let module_name = if is_module_file(path) {
file_name == "__init__.py"
|| file_name == "__init__.pyi"
|| file_name == "__main__.py"
|| file_name == "__main__.pyi"
}) {
package.file_name().unwrap().to_string_lossy() package.file_name().unwrap().to_string_lossy()
} else { } else {
path.file_stem().unwrap().to_string_lossy() path.file_stem().unwrap().to_string_lossy()
}; };
if !is_module_name(&module_name) { // As a special case, we allow files in `versions` and `migrations` directories to start
// with a digit (e.g., `0001_initial.py`), to support common conventions used by Django
// and other frameworks.
let is_valid_module_name = if is_migration_file(path) {
is_migration_name(&module_name)
} else {
is_module_name(&module_name)
};
if !is_valid_module_name {
return Some(Diagnostic::new( return Some(Diagnostic::new(
InvalidModuleName { InvalidModuleName {
name: module_name.to_string(), name: module_name.to_string(),
@ -72,3 +80,21 @@ pub fn invalid_module_name(path: &Path, package: Option<&Path>) -> Option<Diagno
None None
} }
/// Return `true` if a [`Path`] should use the name of its parent directory as its module name.
fn is_module_file(path: &Path) -> bool {
path.file_name().map_or(false, |file_name| {
file_name == "__init__.py"
|| file_name == "__init__.pyi"
|| file_name == "__main__.py"
|| file_name == "__main__.pyi"
})
}
/// Return `true` if a [`Path`] refers to a migration file.
fn is_migration_file(path: &Path) -> bool {
path.parent()
.and_then(Path::file_name)
.and_then(OsStr::to_str)
.map_or(false, |parent| matches!(parent, "versions" | "migrations"))
}

View file

@ -0,0 +1,19 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
- kind:
name: InvalidModuleName
body: "Invalid module name: '0001_initial'"
suggestion: ~
fixable: false
location:
row: 1
column: 0
end_location:
row: 1
column: 0
fix:
edits: []
parent: ~

View file

@ -0,0 +1,19 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
- kind:
name: InvalidModuleName
body: "Invalid module name: 'import'"
suggestion: ~
fixable: false
location:
row: 1
column: 0
end_location:
row: 1
column: 0
fix:
edits: []
parent: ~

View file

@ -1,6 +0,0 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
[]

View file

@ -1,9 +1,11 @@
use crate::keyword::KWLIST;
/// Returns `true` if a string is a valid Python identifier (e.g., variable /// Returns `true` if a string is a valid Python identifier (e.g., variable
/// name). /// name).
pub fn is_identifier(s: &str) -> bool { pub fn is_identifier(name: &str) -> bool {
// Is the first character a letter or underscore? // Is the first character a letter or underscore?
if !s let mut chars = name.chars();
.chars() if !chars
.next() .next()
.map_or(false, |c| c.is_alphabetic() || c == '_') .map_or(false, |c| c.is_alphabetic() || c == '_')
{ {
@ -11,7 +13,7 @@ pub fn is_identifier(s: &str) -> bool {
} }
// Are the rest of the characters letters, digits, or underscores? // Are the rest of the characters letters, digits, or underscores?
s.chars().skip(1).all(|c| c.is_alphanumeric() || c == '_') chars.all(|c| c.is_alphanumeric() || c == '_')
} }
/// Returns `true` if a string is a private identifier, such that, when the /// Returns `true` if a string is a private identifier, such that, when the
@ -24,26 +26,71 @@ pub fn is_mangled_private(id: &str) -> bool {
} }
/// Returns `true` if a string is a PEP 8-compliant module name (i.e., consists of lowercase /// Returns `true` if a string is a PEP 8-compliant module name (i.e., consists of lowercase
/// letters, numbers, and underscores). /// letters, numbers, underscores, and is not a keyword).
pub fn is_module_name(s: &str) -> bool { pub fn is_module_name(name: &str) -> bool {
s.chars() // Is the string a keyword?
.all(|c| c.is_lowercase() || c.is_numeric() || c == '_') if KWLIST.contains(&name) {
return false;
}
// Is the first character a letter or underscore?
let mut chars = name.chars();
if !chars
.next()
.map_or(false, |c| c.is_ascii_lowercase() || c == '_')
{
return false;
}
// Are the rest of the characters letters, digits, or underscores?
chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
}
/// Returns `true` if a string appears to be a valid migration file name (e.g., `0001_initial.py`).
pub fn is_migration_name(name: &str) -> bool {
// Is the string a keyword?
if KWLIST.contains(&name) {
return false;
}
// Are characters letters, digits, or underscores?
name.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::identifiers::is_module_name; use crate::identifiers::{is_migration_name, is_module_name};
#[test] #[test]
fn test_is_module_name() { fn module_name() {
assert!(is_module_name("_abc"));
assert!(is_module_name("a")); assert!(is_module_name("a"));
assert!(is_module_name("a_b_c"));
assert!(is_module_name("abc")); assert!(is_module_name("abc"));
assert!(is_module_name("abc0")); assert!(is_module_name("abc0"));
assert!(is_module_name("abc_")); assert!(is_module_name("abc_"));
assert!(is_module_name("a_b_c")); assert!(!is_module_name("0001_initial"));
assert!(is_module_name("0abc")); assert!(!is_module_name("0abc"));
assert!(is_module_name("_abc"));
assert!(!is_module_name("a-b-c")); assert!(!is_module_name("a-b-c"));
assert!(!is_module_name("a_B_c")); assert!(!is_module_name("a_B_c"));
assert!(!is_module_name("class"));
assert!(!is_module_name("δ"));
}
#[test]
fn migration_name() {
assert!(is_migration_name("0001_initial"));
assert!(is_migration_name("0abc"));
assert!(is_migration_name("_abc"));
assert!(is_migration_name("a"));
assert!(is_migration_name("a_b_c"));
assert!(is_migration_name("abc"));
assert!(is_migration_name("abc0"));
assert!(is_migration_name("abc_"));
assert!(!is_migration_name("a-b-c"));
assert!(!is_migration_name("a_B_c"));
assert!(!is_migration_name("class"));
assert!(!is_migration_name("δ"));
} }
} }