Fix is_module_name() and improve perf of is_identifier() (#3795)

This commit is contained in:
Jonathan Plasse 2023-03-31 21:15:36 +02:00 committed by GitHub
parent fe38597279
commit 968c7df770
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 29 deletions

View file

@ -41,9 +41,10 @@ mod tests {
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/no_module/test.txt"); "N999_8")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes.py"); "N999_9")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__main__.py"); "N999_10")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/0001_initial.py"); "N999_11")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/0001_initial.py"); "N999_11")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__setup__.py"); "N999_12")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes"); "N999_13")]
#[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/import.py"); "N999_14")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(

View file

@ -1,13 +1,14 @@
use std::ffi::OsStr;
use std::path::Path;
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
use ruff_python_stdlib::identifiers::is_module_name;
use ruff_python_stdlib::identifiers::{is_migration_name, is_module_name};
/// ## What it does
/// Checks for module names that do not follow the `snake_case` naming
/// convention.
/// convention or are otherwise invalid.
///
/// ## Why is this bad?
/// [PEP 8] recommends the use of the `snake_case` naming convention for
@ -21,6 +22,10 @@ use ruff_python_stdlib::identifiers::is_module_name;
/// > provides a higher level (e.g. more object oriented) interface, the C/C++ module has
/// > a leading underscore (e.g. `_socket`).
///
/// Further, in order for Python modules to be importable, they must be valid
/// identifiers. As such, they cannot start with a digit, or collide with hard
/// keywords, like `import` or `class`.
///
/// ## Example
/// - Instead of `example-module-name` or `example module name`, use `example_module_name`.
/// - Instead of `ExampleModule`, use `example_module`.
@ -49,18 +54,21 @@ pub fn invalid_module_name(path: &Path, package: Option<&Path>) -> Option<Diagno
}
if let Some(package) = package {
let module_name = if path.file_name().map_or(false, |file_name| {
file_name == "__init__.py"
|| file_name == "__init__.pyi"
|| file_name == "__main__.py"
|| file_name == "__main__.pyi"
}) {
let module_name = if is_module_file(path) {
package.file_name().unwrap().to_string_lossy()
} else {
path.file_stem().unwrap().to_string_lossy()
};
if !is_module_name(&module_name) {
// As a special case, we allow files in `versions` and `migrations` directories to start
// with a digit (e.g., `0001_initial.py`), to support common conventions used by Django
// and other frameworks.
let is_valid_module_name = if is_migration_file(path) {
is_migration_name(&module_name)
} else {
is_module_name(&module_name)
};
if !is_valid_module_name {
return Some(Diagnostic::new(
InvalidModuleName {
name: module_name.to_string(),
@ -72,3 +80,21 @@ pub fn invalid_module_name(path: &Path, package: Option<&Path>) -> Option<Diagno
None
}
/// Return `true` if a [`Path`] should use the name of its parent directory as its module name.
fn is_module_file(path: &Path) -> bool {
path.file_name().map_or(false, |file_name| {
file_name == "__init__.py"
|| file_name == "__init__.pyi"
|| file_name == "__main__.py"
|| file_name == "__main__.pyi"
})
}
/// Return `true` if a [`Path`] refers to a migration file.
fn is_migration_file(path: &Path) -> bool {
path.parent()
.and_then(Path::file_name)
.and_then(OsStr::to_str)
.map_or(false, |parent| matches!(parent, "versions" | "migrations"))
}

View file

@ -0,0 +1,19 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
- kind:
name: InvalidModuleName
body: "Invalid module name: '0001_initial'"
suggestion: ~
fixable: false
location:
row: 1
column: 0
end_location:
row: 1
column: 0
fix:
edits: []
parent: ~

View file

@ -0,0 +1,19 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
- kind:
name: InvalidModuleName
body: "Invalid module name: 'import'"
suggestion: ~
fixable: false
location:
row: 1
column: 0
end_location:
row: 1
column: 0
fix:
edits: []
parent: ~

View file

@ -1,6 +0,0 @@
---
source: crates/ruff/src/rules/pep8_naming/mod.rs
expression: diagnostics
---
[]

View file

@ -1,9 +1,11 @@
use crate::keyword::KWLIST;
/// Returns `true` if a string is a valid Python identifier (e.g., variable
/// name).
pub fn is_identifier(s: &str) -> bool {
pub fn is_identifier(name: &str) -> bool {
// Is the first character a letter or underscore?
if !s
.chars()
let mut chars = name.chars();
if !chars
.next()
.map_or(false, |c| c.is_alphabetic() || c == '_')
{
@ -11,7 +13,7 @@ pub fn is_identifier(s: &str) -> bool {
}
// Are the rest of the characters letters, digits, or underscores?
s.chars().skip(1).all(|c| c.is_alphanumeric() || c == '_')
chars.all(|c| c.is_alphanumeric() || c == '_')
}
/// Returns `true` if a string is a private identifier, such that, when the
@ -24,26 +26,71 @@ pub fn is_mangled_private(id: &str) -> bool {
}
/// Returns `true` if a string is a PEP 8-compliant module name (i.e., consists of lowercase
/// letters, numbers, and underscores).
pub fn is_module_name(s: &str) -> bool {
s.chars()
.all(|c| c.is_lowercase() || c.is_numeric() || c == '_')
/// letters, numbers, underscores, and is not a keyword).
pub fn is_module_name(name: &str) -> bool {
// Is the string a keyword?
if KWLIST.contains(&name) {
return false;
}
// Is the first character a letter or underscore?
let mut chars = name.chars();
if !chars
.next()
.map_or(false, |c| c.is_ascii_lowercase() || c == '_')
{
return false;
}
// Are the rest of the characters letters, digits, or underscores?
chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
}
/// Returns `true` if a string appears to be a valid migration file name (e.g., `0001_initial.py`).
pub fn is_migration_name(name: &str) -> bool {
// Is the string a keyword?
if KWLIST.contains(&name) {
return false;
}
// Are characters letters, digits, or underscores?
name.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
}
#[cfg(test)]
mod tests {
use crate::identifiers::is_module_name;
use crate::identifiers::{is_migration_name, is_module_name};
#[test]
fn test_is_module_name() {
fn module_name() {
assert!(is_module_name("_abc"));
assert!(is_module_name("a"));
assert!(is_module_name("a_b_c"));
assert!(is_module_name("abc"));
assert!(is_module_name("abc0"));
assert!(is_module_name("abc_"));
assert!(is_module_name("a_b_c"));
assert!(is_module_name("0abc"));
assert!(is_module_name("_abc"));
assert!(!is_module_name("0001_initial"));
assert!(!is_module_name("0abc"));
assert!(!is_module_name("a-b-c"));
assert!(!is_module_name("a_B_c"));
assert!(!is_module_name("class"));
assert!(!is_module_name("δ"));
}
#[test]
fn migration_name() {
assert!(is_migration_name("0001_initial"));
assert!(is_migration_name("0abc"));
assert!(is_migration_name("_abc"));
assert!(is_migration_name("a"));
assert!(is_migration_name("a_b_c"));
assert!(is_migration_name("abc"));
assert!(is_migration_name("abc0"));
assert!(is_migration_name("abc_"));
assert!(!is_migration_name("a-b-c"));
assert!(!is_migration_name("a_B_c"));
assert!(!is_migration_name("class"));
assert!(!is_migration_name("δ"));
}
}