Avoid allocations in lowercase comparisons (#5137)

## Summary

I noticed that we have a few hot comparisons that involve called
`s.to_lowercase()`. We can avoid an allocation by comparing characters
directly.
This commit is contained in:
Charlie Marsh 2023-06-16 08:57:43 -04:00 committed by GitHub
parent 3af9dfeb0a
commit 307f7a735c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 108 additions and 57 deletions

View file

@ -4,7 +4,7 @@ use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::RaiseStatementVisitor;
use ruff_python_ast::statement_visitor::StatementVisitor;
use ruff_python_stdlib::str::is_lower;
use ruff_python_stdlib::str::is_cased_lowercase;
use crate::checkers::ast::Checker;
@ -33,7 +33,7 @@ pub(crate) fn raise_without_from_inside_except(checker: &mut Checker, body: &[St
if cause.is_none() {
if let Some(exc) = exc {
match exc {
Expr::Name(ast::ExprName { id, .. }) if is_lower(id) => {}
Expr::Name(ast::ExprName { id, .. }) if is_cased_lowercase(id) => {}
_ => {
checker
.diagnostics

View file

@ -71,10 +71,10 @@ impl Violation for YodaConditions {
/// Return `true` if an [`Expr`] is a constant or a constant-like name.
fn is_constant_like(expr: &Expr) -> bool {
match expr {
Expr::Attribute(ast::ExprAttribute { attr, .. }) => str::is_upper(attr),
Expr::Attribute(ast::ExprAttribute { attr, .. }) => str::is_cased_uppercase(attr),
Expr::Constant(_) => true,
Expr::Tuple(ast::ExprTuple { elts, .. }) => elts.iter().all(is_constant_like),
Expr::Name(ast::ExprName { id, .. }) => str::is_upper(id),
Expr::Name(ast::ExprName { id, .. }) => str::is_cased_uppercase(id),
Expr::UnaryOp(ast::ExprUnaryOp {
op: Unaryop::UAdd | Unaryop::USub | Unaryop::Invert,
operand,

View file

@ -32,7 +32,7 @@ fn prefix(
} else if variables.contains(name) {
// Ex) `variable`
Prefix::Variables
} else if name.len() > 1 && str::is_upper(name) {
} else if name.len() > 1 && str::is_cased_uppercase(name) {
// Ex) `CONSTANT`
Prefix::Constants
} else if name.chars().next().map_or(false, char::is_uppercase) {

View file

@ -2,14 +2,14 @@ use itertools::Itertools;
use rustpython_parser::ast::{self, Expr, Stmt};
use ruff_python_semantic::SemanticModel;
use ruff_python_stdlib::str::{is_lower, is_upper};
use ruff_python_stdlib::str::{is_cased_lowercase, is_cased_uppercase};
pub(super) fn is_camelcase(name: &str) -> bool {
!is_lower(name) && !is_upper(name) && !name.contains('_')
!is_cased_lowercase(name) && !is_cased_uppercase(name) && !name.contains('_')
}
pub(super) fn is_mixed_case(name: &str) -> bool {
!is_lower(name)
!is_cased_lowercase(name)
&& name
.strip_prefix('_')
.unwrap_or(name)

View file

@ -63,8 +63,8 @@ pub(crate) fn camelcase_imported_as_acronym(
}
if helpers::is_camelcase(name)
&& !str::is_lower(asname)
&& str::is_upper(asname)
&& !str::is_cased_lowercase(asname)
&& str::is_cased_uppercase(asname)
&& helpers::is_acronym(name, asname)
{
let mut diagnostic = Diagnostic::new(

View file

@ -60,8 +60,8 @@ pub(crate) fn camelcase_imported_as_constant(
}
if helpers::is_camelcase(name)
&& !str::is_lower(asname)
&& str::is_upper(asname)
&& !str::is_cased_lowercase(asname)
&& str::is_cased_uppercase(asname)
&& !helpers::is_acronym(name, asname)
{
let mut diagnostic = Diagnostic::new(

View file

@ -58,7 +58,7 @@ pub(crate) fn camelcase_imported_as_lowercase(
return None;
}
if helpers::is_camelcase(name) && ruff_python_stdlib::str::is_lower(asname) {
if helpers::is_camelcase(name) && ruff_python_stdlib::str::is_cased_lowercase(asname) {
let mut diagnostic = Diagnostic::new(
CamelcaseImportedAsLowercase {
name: name.to_string(),

View file

@ -59,7 +59,7 @@ pub(crate) fn constant_imported_as_non_constant(
return None;
}
if str::is_upper(name) && !str::is_upper(asname) {
if str::is_cased_uppercase(name) && !str::is_cased_uppercase(asname) {
let mut diagnostic = Diagnostic::new(
ConstantImportedAsNonConstant {
name: name.to_string(),

View file

@ -2,6 +2,7 @@ use rustpython_parser::ast::{Arg, Ranged};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_stdlib::str;
use crate::settings::types::IdentifierPattern;
@ -58,7 +59,7 @@ pub(crate) fn invalid_argument_name(
{
return None;
}
if name.to_lowercase() != name {
if !str::is_lowercase(name) {
return Some(Diagnostic::new(
InvalidArgumentName {
name: name.to_string(),

View file

@ -6,6 +6,7 @@ use ruff_python_ast::identifier::Identifier;
use ruff_python_ast::source_code::Locator;
use ruff_python_semantic::analyze::visibility;
use ruff_python_semantic::SemanticModel;
use ruff_python_stdlib::str;
use crate::settings::types::IdentifierPattern;
@ -67,7 +68,7 @@ pub(crate) fn invalid_function_name(
}
// Ignore any function names that are already lowercase.
if name.to_lowercase() == name {
if str::is_lowercase(name) {
return None;
}

View file

@ -58,7 +58,8 @@ pub(crate) fn lowercase_imported_as_non_lowercase(
return None;
}
if !str::is_upper(name) && str::is_lower(name) && asname.to_lowercase() != asname {
if !str::is_cased_uppercase(name) && str::is_cased_lowercase(name) && !str::is_lowercase(asname)
{
let mut diagnostic = Diagnostic::new(
LowercaseImportedAsNonLowercase {
name: name.to_string(),

View file

@ -2,6 +2,7 @@ use rustpython_parser::ast::{Expr, Ranged, Stmt};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_stdlib::str;
use crate::checkers::ast::Checker;
use crate::rules::pep8_naming::helpers;
@ -65,7 +66,7 @@ pub(crate) fn non_lowercase_variable_in_function(
return;
}
if name.to_lowercase() != name
if !str::is_lowercase(name)
&& !helpers::is_named_tuple_assignment(stmt, checker.semantic())
&& !helpers::is_typed_dict_assignment(stmt, checker.semantic())
&& !helpers::is_type_var_assignment(stmt, checker.semantic())

View file

@ -22,7 +22,7 @@ use ruff_python_ast::source_code::Locator;
/// regex = r"\.png$"
/// ```
#[violation]
pub struct InvalidEscapeSequence(pub char);
pub struct InvalidEscapeSequence(char);
impl AlwaysAutofixableViolation for InvalidEscapeSequence {
#[derive_message_formats]

View file

@ -1,14 +1,67 @@
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
pub const TRIPLE_QUOTE_PREFIXES: &[&str] = &[
"u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''",
];
pub const SINGLE_QUOTE_PREFIXES: &[&str] = &[
"u\"", "u'", "r\"", "r'", "U\"", "U'", "R\"", "R'", "\"", "'",
];
pub const TRIPLE_QUOTE_SUFFIXES: &[&str] = &["\"\"\"", "'''"];
pub const SINGLE_QUOTE_SUFFIXES: &[&str] = &["\"", "'"];
/// Return `true` if a string is lowercase.
///
/// A string is lowercase if all alphabetic characters in the string are lowercase.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_stdlib::str::is_lowercase;
///
/// assert!(is_lowercase("abc"));
/// assert!(is_lowercase("a_b_c"));
/// assert!(is_lowercase("a2c"));
/// assert!(!is_lowercase("aBc"));
/// assert!(!is_lowercase("ABC"));
/// assert!(is_lowercase(""));
/// assert!(is_lowercase("_"));
/// ```
pub fn is_lowercase(s: &str) -> bool {
s.chars().all(|c| !c.is_alphabetic() || c.is_lowercase())
}
pub fn is_lower(s: &str) -> bool {
/// Return `true` if a string is uppercase.
///
/// A string is uppercase if all alphabetic characters in the string are uppercase.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_stdlib::str::is_uppercase;
///
/// assert!(is_uppercase("ABC"));
/// assert!(is_uppercase("A_B_C"));
/// assert!(is_uppercase("A2C"));
/// assert!(!is_uppercase("aBc"));
/// assert!(!is_uppercase("abc"));
/// assert!(is_uppercase(""));
/// assert!(is_uppercase("_"));
/// ```
pub fn is_uppercase(s: &str) -> bool {
s.chars().all(|c| !c.is_alphabetic() || c.is_uppercase())
}
/// Return `true` if a string is _cased_ as lowercase.
///
/// A string is cased as lowercase if it contains at least one lowercase character and no uppercase
/// characters.
///
/// This differs from `str::is_lowercase` in that it returns `false` for empty strings and strings
/// that contain only underscores or other non-alphabetic characters.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_stdlib::str::is_cased_lowercase;
///
/// assert!(is_cased_lowercase("abc"));
/// assert!(is_cased_lowercase("a_b_c"));
/// assert!(is_cased_lowercase("a2c"));
/// assert!(!is_cased_lowercase("aBc"));
/// assert!(!is_cased_lowercase("ABC"));
/// assert!(!is_cased_lowercase(""));
/// assert!(!is_cased_lowercase("_"));
/// ```
pub fn is_cased_lowercase(s: &str) -> bool {
let mut cased = false;
for c in s.chars() {
if c.is_uppercase() {
@ -20,7 +73,28 @@ pub fn is_lower(s: &str) -> bool {
cased
}
pub fn is_upper(s: &str) -> bool {
/// Return `true` if a string is _cased_ as uppercase.
///
/// A string is cased as uppercase if it contains at least one uppercase character and no lowercase
/// characters.
///
/// This differs from `str::is_uppercase` in that it returns `false` for empty strings and strings
/// that contain only underscores or other non-alphabetic characters.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_stdlib::str::is_cased_uppercase;
///
/// assert!(is_cased_uppercase("ABC"));
/// assert!(is_cased_uppercase("A_B_C"));
/// assert!(is_cased_uppercase("A2C"));
/// assert!(!is_cased_uppercase("aBc"));
/// assert!(!is_cased_uppercase("abc"));
/// assert!(!is_cased_uppercase(""));
/// assert!(!is_cased_uppercase("_"));
/// ```
pub fn is_cased_uppercase(s: &str) -> bool {
let mut cased = false;
for c in s.chars() {
if c.is_lowercase() {
@ -31,30 +105,3 @@ pub fn is_upper(s: &str) -> bool {
}
cased
}
#[cfg(test)]
mod tests {
use crate::str::{is_lower, is_upper};
#[test]
fn test_is_lower() {
assert!(is_lower("abc"));
assert!(is_lower("a_b_c"));
assert!(is_lower("a2c"));
assert!(!is_lower("aBc"));
assert!(!is_lower("ABC"));
assert!(!is_lower(""));
assert!(!is_lower("_"));
}
#[test]
fn test_is_upper() {
assert!(is_upper("ABC"));
assert!(is_upper("A_B_C"));
assert!(is_upper("A2C"));
assert!(!is_upper("aBc"));
assert!(!is_upper("abc"));
assert!(!is_upper(""));
assert!(!is_upper("_"));
}
}