From 2b76fa8fa1947a9948b4135588c636e5e9f85886 Mon Sep 17 00:00:00 2001 From: Dylan <53534755+dylwil3@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:33:04 -0600 Subject: [PATCH] [refurb] Parse more exotic decimal strings in `verbose-decimal-constructor (FURB157)` (#14098) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FURB157 suggests replacing expressions like `Decimal("123")` with `Decimal(123)`. This PR extends the rule to cover cases where the input string to `Decimal` can be easily transformed into an integer literal. For example: ```python Decimal("1__000") # fix: `Decimal(1000)` ``` Note: we do not implement the full decimal parsing logic from CPython on the grounds that certain acceptable string inputs to the `Decimal` constructor may be presumed purposeful on the part of the developer. For example, as in the linked issue, `Decimal("١٢٣")` is valid and equal to `Decimal(123)`, but we do not suggest a replacement in this case. Closes #13807 --- .../resources/test/fixtures/refurb/FURB157.py | 20 +++++++++ .../rules/verbose_decimal_constructor.rs | 41 ++++++++++++++++--- ...es__refurb__tests__FURB157_FURB157.py.snap | 40 ++++++++++++++++++ 3 files changed, 96 insertions(+), 5 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB157.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB157.py index 2d4b3aa089..56c1f00aba 100644 --- a/crates/ruff_linter/resources/test/fixtures/refurb/FURB157.py +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB157.py @@ -15,3 +15,23 @@ decimal.Decimal("0") Decimal(0) Decimal("Infinity") decimal.Decimal(0) + +# Handle Python's Decimal parsing +# See https://github.com/astral-sh/ruff/issues/13807 + +# Errors +Decimal("1_000") +Decimal("__1____000") + +# Ok +Decimal("2e-4") +Decimal("2E-4") +Decimal("_1.234__") +Decimal("2e4") +Decimal("2e+4") +Decimal("2E4") +Decimal("1.2") +# Ok: even though this is equal to `Decimal(123)`, +# we assume that a developer would +# only write it this way if they meant to. +Decimal("١٢٣") \ No newline at end of file diff --git a/crates/ruff_linter/src/rules/refurb/rules/verbose_decimal_constructor.rs b/crates/ruff_linter/src/rules/refurb/rules/verbose_decimal_constructor.rs index 9d97047db1..6aecfadbd4 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/verbose_decimal_constructor.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/verbose_decimal_constructor.rs @@ -3,6 +3,7 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::{self as ast, Expr}; use ruff_python_trivia::PythonWhitespace; use ruff_text_size::Ranged; +use std::borrow::Cow; use crate::checkers::ast::Checker; @@ -75,13 +76,20 @@ pub(crate) fn verbose_decimal_constructor(checker: &mut Checker, call: &ast::Exp value: str_literal, .. }) => { // Parse the inner string as an integer. - let trimmed = str_literal.to_str().trim_whitespace(); - + // + // For reference, a string argument to `Decimal` is parsed in CPython + // using this regex: + // https://github.com/python/cpython/blob/ac556a2ad1213b8bb81372fe6fb762f5fcb076de/Lib/_pydecimal.py#L6060-L6077 + // _after_ trimming whitespace from the string and removing all occurrences of "_". + let mut trimmed = Cow::from(str_literal.to_str().trim_whitespace()); + if memchr::memchr(b'_', trimmed.as_bytes()).is_some() { + trimmed = Cow::from(trimmed.replace('_', "")); + } // Extract the unary sign, if any. let (unary, rest) = if let Some(trimmed) = trimmed.strip_prefix('+') { - ("+", trimmed) + ("+", Cow::from(trimmed)) } else if let Some(trimmed) = trimmed.strip_prefix('-') { - ("-", trimmed) + ("-", Cow::from(trimmed)) } else { ("", trimmed) }; @@ -90,7 +98,7 @@ pub(crate) fn verbose_decimal_constructor(checker: &mut Checker, call: &ast::Exp let rest = rest.trim_start_matches('0'); // Verify that the rest of the string is a valid integer. - if !rest.chars().all(|c| c.is_ascii_digit()) { + if !rest.bytes().all(|c| c.is_ascii_digit()) { return; }; @@ -159,3 +167,26 @@ pub(crate) fn verbose_decimal_constructor(checker: &mut Checker, call: &ast::Exp checker.diagnostics.push(diagnostic); } + +// // Slightly modified from [CPython regex] to ignore https://github.com/python/cpython/blob/ac556a2ad1213b8bb81372fe6fb762f5fcb076de/Lib/_pydecimal.py#L6060-L6077 +// static DECIMAL_PARSER_REGEX: LazyLock = LazyLock::new(|| { +// Regex::new( +// r"(?x) # Verbose mode for comments +// ^ # Start of string +// (?P[-+])? # Optional sign +// (?: +// (?P\d*) # Integer part (can be empty) +// (\.(?P\d+))? # Optional fractional part +// (E(?P[-+]?\d+))? # Optional exponent +// | +// Inf(inity)? # Infinity +// | +// (?Ps)? # Optional signal +// NaN # NaN +// (?P\d*) # Optional diagnostic info +// ) +// $ # End of string +// ", +// ) +// .unwrap() +// }); diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB157_FURB157.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB157_FURB157.py.snap index ef8680e366..e735d59f3a 100644 --- a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB157_FURB157.py.snap +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB157_FURB157.py.snap @@ -166,3 +166,43 @@ FURB157.py:12:17: FURB157 [*] Verbose expression in `Decimal` constructor 13 13 | 14 14 | # OK 15 15 | Decimal(0) + +FURB157.py:23:9: FURB157 [*] Verbose expression in `Decimal` constructor + | +22 | # Errors +23 | Decimal("1_000") + | ^^^^^^^ FURB157 +24 | Decimal("__1____000") + | + = help: Replace with `1000` + +ℹ Safe fix +20 20 | # See https://github.com/astral-sh/ruff/issues/13807 +21 21 | +22 22 | # Errors +23 |-Decimal("1_000") + 23 |+Decimal(1000) +24 24 | Decimal("__1____000") +25 25 | +26 26 | # Ok + +FURB157.py:24:9: FURB157 [*] Verbose expression in `Decimal` constructor + | +22 | # Errors +23 | Decimal("1_000") +24 | Decimal("__1____000") + | ^^^^^^^^^^^^ FURB157 +25 | +26 | # Ok + | + = help: Replace with `1000` + +ℹ Safe fix +21 21 | +22 22 | # Errors +23 23 | Decimal("1_000") +24 |-Decimal("__1____000") + 24 |+Decimal(1000) +25 25 | +26 26 | # Ok +27 27 | Decimal("2e-4")