Fix is_module_name() and improve perf of is_identifier() (#3795)

This commit is contained in:
Jonathan Plasse 2023-03-31 21:15:36 +02:00 committed by GitHub
parent fe38597279
commit 968c7df770
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 29 deletions

View file

@ -1,9 +1,11 @@
use crate::keyword::KWLIST;
/// Returns `true` if a string is a valid Python identifier (e.g., variable
/// name).
pub fn is_identifier(s: &str) -> bool {
pub fn is_identifier(name: &str) -> bool {
// Is the first character a letter or underscore?
if !s
.chars()
let mut chars = name.chars();
if !chars
.next()
.map_or(false, |c| c.is_alphabetic() || c == '_')
{
@ -11,7 +13,7 @@ pub fn is_identifier(s: &str) -> bool {
}
// Are the rest of the characters letters, digits, or underscores?
s.chars().skip(1).all(|c| c.is_alphanumeric() || c == '_')
chars.all(|c| c.is_alphanumeric() || c == '_')
}
/// Returns `true` if a string is a private identifier, such that, when the
@ -24,26 +26,71 @@ pub fn is_mangled_private(id: &str) -> bool {
}
/// Returns `true` if a string is a PEP 8-compliant module name (i.e., consists of lowercase
/// letters, numbers, and underscores).
pub fn is_module_name(s: &str) -> bool {
s.chars()
.all(|c| c.is_lowercase() || c.is_numeric() || c == '_')
/// letters, numbers, underscores, and is not a keyword).
pub fn is_module_name(name: &str) -> bool {
// Is the string a keyword?
if KWLIST.contains(&name) {
return false;
}
// Is the first character a letter or underscore?
let mut chars = name.chars();
if !chars
.next()
.map_or(false, |c| c.is_ascii_lowercase() || c == '_')
{
return false;
}
// Are the rest of the characters letters, digits, or underscores?
chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
}
/// Returns `true` if a string appears to be a valid migration file name (e.g., `0001_initial.py`).
pub fn is_migration_name(name: &str) -> bool {
// Is the string a keyword?
if KWLIST.contains(&name) {
return false;
}
// Are characters letters, digits, or underscores?
name.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
}
#[cfg(test)]
mod tests {
use crate::identifiers::is_module_name;
use crate::identifiers::{is_migration_name, is_module_name};
#[test]
fn test_is_module_name() {
fn module_name() {
assert!(is_module_name("_abc"));
assert!(is_module_name("a"));
assert!(is_module_name("a_b_c"));
assert!(is_module_name("abc"));
assert!(is_module_name("abc0"));
assert!(is_module_name("abc_"));
assert!(is_module_name("a_b_c"));
assert!(is_module_name("0abc"));
assert!(is_module_name("_abc"));
assert!(!is_module_name("0001_initial"));
assert!(!is_module_name("0abc"));
assert!(!is_module_name("a-b-c"));
assert!(!is_module_name("a_B_c"));
assert!(!is_module_name("class"));
assert!(!is_module_name("δ"));
}
#[test]
fn migration_name() {
assert!(is_migration_name("0001_initial"));
assert!(is_migration_name("0abc"));
assert!(is_migration_name("_abc"));
assert!(is_migration_name("a"));
assert!(is_migration_name("a_b_c"));
assert!(is_migration_name("abc"));
assert!(is_migration_name("abc0"));
assert!(is_migration_name("abc_"));
assert!(!is_migration_name("a-b-c"));
assert!(!is_migration_name("a_B_c"));
assert!(!is_migration_name("class"));
assert!(!is_migration_name("δ"));
}
}