From 1a368b0bf97c3d0246390679166bbd2d589acf39 Mon Sep 17 00:00:00 2001 From: Dan Parizher <105245560+danparizher@users.noreply.github.com> Date: Sun, 3 Aug 2025 12:31:28 -0400 Subject: [PATCH] [`flake8-simplify`] Fix raw string handling in `SIM905` for embedded quotes (#19591) ## Summary When splitting triple-quoted, raw strings one has to take care before attempting to make each item have single-quotes. Fixes #19577 --------- Co-authored-by: dylwil3 --- .../test/fixtures/flake8_simplify/SIM905.py | 33 ++++++- .../rules/split_static_string.rs | 47 +++++++--- ...ke8_simplify__tests__SIM905_SIM905.py.snap | 90 +++++++++++++++++++ 3 files changed, 158 insertions(+), 12 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py index 049b60dfaf..0dbabcb64c 100644 --- a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py +++ b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py @@ -129,4 +129,35 @@ print(" x ".rsplit(maxsplit=0)) print(" x ".rsplit(maxsplit=0)) print(" x ".rsplit(sep=None, maxsplit=0)) print(" x ".rsplit(maxsplit=0)) -print(" x ".rsplit(sep=None, maxsplit=0)) \ No newline at end of file +print(" x ".rsplit(sep=None, maxsplit=0)) + +# https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings +r"""simple@example.com +very.common@example.com +FirstName.LastName@EasierReading.org +x@example.com +long.email-address-with-hyphens@and.subdomains.example.com +user.name+tag+sorting@example.com +name/surname@example.com +xample@s.example +" "@example.org +"john..doe"@example.org +mailhost!username@example.org +"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com +user%example.com@example.org +user-@example.org +I❤️CHOCOLATE@example.com +this\ still\"not\\allowed@example.com +stellyamburrr985@example.com +Abc.123@example.com +user+mailbox/department=shipping@example.com +!#$%&'*+-/=?^_`.{|}~@example.com +"Abc@def"@example.com +"Fred\ Bloggs"@example.com +"Joe.\\Blow"@example.com""".split("\n") + + +r"""first +'no need' to escape +"swap" quote style +"use' ugly triple quotes""".split("\n") diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs index c298e82f05..e2da14e16a 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs +++ b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs @@ -1,6 +1,7 @@ use std::cmp::Ordering; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::StringFlags; use ruff_python_ast::{ Expr, ExprCall, ExprContext, ExprList, ExprUnaryOp, StringLiteral, StringLiteralFlags, StringLiteralValue, UnaryOp, str::TripleQuotes, @@ -116,26 +117,50 @@ pub(crate) fn split_static_string( } } +fn replace_flags(elt: &str, flags: StringLiteralFlags) -> StringLiteralFlags { + // In the ideal case we can wrap the element in _single_ quotes of the same + // style. For example, both of these are okay: + // + // ```python + // """itemA + // itemB + // itemC""".split() # -> ["itemA", "itemB", "itemC"] + // ``` + // + // ```python + // r"""itemA + // 'single'quoted + // """.split() # -> [r"itemA",r"'single'quoted'"] + // ``` + if !flags.prefix().is_raw() || !elt.contains(flags.quote_style().as_char()) { + flags.with_triple_quotes(TripleQuotes::No) + } + // If we have a raw string containing a quotation mark of the same style, + // then we have to swap the style of quotation marks used + else if !elt.contains(flags.quote_style().opposite().as_char()) { + flags + .with_quote_style(flags.quote_style().opposite()) + .with_triple_quotes(TripleQuotes::No) + } else + // If both types of quotes are used in the raw, triple-quoted string, then + // we are forced to either add escapes or keep the triple quotes. We opt for + // the latter. + { + flags + } +} + fn construct_replacement(elts: &[&str], flags: StringLiteralFlags) -> Expr { Expr::List(ExprList { elts: elts .iter() .map(|elt| { + let element_flags = replace_flags(elt, flags); Expr::from(StringLiteral { value: Box::from(*elt), range: TextRange::default(), node_index: ruff_python_ast::AtomicNodeIndex::dummy(), - // intentionally omit the triple quote flag, if set, to avoid strange - // replacements like - // - // ```python - // """ - // itemA - // itemB - // itemC - // """.split() # -> ["""itemA""", """itemB""", """itemC"""] - // ``` - flags: flags.with_triple_quotes(TripleQuotes::No), + flags: element_flags, }) }) .collect(), diff --git a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap index ae93115007..7f5b3f6456 100644 --- a/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap +++ b/crates/ruff_linter/src/rules/flake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap @@ -1226,6 +1226,7 @@ SIM905.py:130:7: SIM905 [*] Consider using a list literal instead of `str.split` 130 |+print([" x"]) 131 131 | print(" x ".rsplit(maxsplit=0)) 132 132 | print(" x ".rsplit(sep=None, maxsplit=0)) +133 133 | SIM905.py:131:7: SIM905 [*] Consider using a list literal instead of `str.split` | @@ -1244,6 +1245,8 @@ SIM905.py:131:7: SIM905 [*] Consider using a list literal instead of `str.split` 131 |-print(" x ".rsplit(maxsplit=0)) 131 |+print([" x"]) 132 132 | print(" x ".rsplit(sep=None, maxsplit=0)) +133 133 | +134 134 | # https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings SIM905.py:132:7: SIM905 [*] Consider using a list literal instead of `str.split` | @@ -1251,6 +1254,8 @@ SIM905.py:132:7: SIM905 [*] Consider using a list literal instead of `str.split` 131 | print(" x ".rsplit(maxsplit=0)) 132 | print(" x ".rsplit(sep=None, maxsplit=0)) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ SIM905 +133 | +134 | # https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings | = help: Replace with list literal @@ -1260,3 +1265,88 @@ SIM905.py:132:7: SIM905 [*] Consider using a list literal instead of `str.split` 131 131 | print(" x ".rsplit(maxsplit=0)) 132 |-print(" x ".rsplit(sep=None, maxsplit=0)) 132 |+print([" x"]) +133 133 | +134 134 | # https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings +135 135 | r"""simple@example.com + +SIM905.py:135:1: SIM905 [*] Consider using a list literal instead of `str.split` + | +134 | # https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings +135 | / r"""simple@example.com +136 | | very.common@example.com +137 | | FirstName.LastName@EasierReading.org +138 | | x@example.com +139 | | long.email-address-with-hyphens@and.subdomains.example.com +140 | | user.name+tag+sorting@example.com +141 | | name/surname@example.com +142 | | xample@s.example +143 | | " "@example.org +144 | | "john..doe"@example.org +145 | | mailhost!username@example.org +146 | | "very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com +147 | | user%example.com@example.org +148 | | user-@example.org +149 | | I❤️CHOCOLATE@example.com +150 | | this\ still\"not\\allowed@example.com +151 | | stellyamburrr985@example.com +152 | | Abc.123@example.com +153 | | user+mailbox/department=shipping@example.com +154 | | !#$%&'*+-/=?^_`.{|}~@example.com +155 | | "Abc@def"@example.com +156 | | "Fred\ Bloggs"@example.com +157 | | "Joe.\\Blow"@example.com""".split("\n") + | |_______________________________________^ SIM905 + | + = help: Replace with list literal + +ℹ Safe fix +132 132 | print(" x ".rsplit(sep=None, maxsplit=0)) +133 133 | +134 134 | # https://github.com/astral-sh/ruff/issues/19581 - embedded quotes in raw strings +135 |-r"""simple@example.com +136 |-very.common@example.com +137 |-FirstName.LastName@EasierReading.org +138 |-x@example.com +139 |-long.email-address-with-hyphens@and.subdomains.example.com +140 |-user.name+tag+sorting@example.com +141 |-name/surname@example.com +142 |-xample@s.example +143 |-" "@example.org +144 |-"john..doe"@example.org +145 |-mailhost!username@example.org +146 |-"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com +147 |-user%example.com@example.org +148 |-user-@example.org +149 |-I❤️CHOCOLATE@example.com +150 |-this\ still\"not\\allowed@example.com +151 |-stellyamburrr985@example.com +152 |-Abc.123@example.com +153 |-user+mailbox/department=shipping@example.com +154 |-!#$%&'*+-/=?^_`.{|}~@example.com +155 |-"Abc@def"@example.com +156 |-"Fred\ Bloggs"@example.com +157 |-"Joe.\\Blow"@example.com""".split("\n") + 135 |+[r"simple@example.com", r"very.common@example.com", r"FirstName.LastName@EasierReading.org", r"x@example.com", r"long.email-address-with-hyphens@and.subdomains.example.com", r"user.name+tag+sorting@example.com", r"name/surname@example.com", r"xample@s.example", r'" "@example.org', r'"john..doe"@example.org', r"mailhost!username@example.org", r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', r"user%example.com@example.org", r"user-@example.org", r"I❤️CHOCOLATE@example.com", r'this\ still\"not\\allowed@example.com', r"stellyamburrr985@example.com", r"Abc.123@example.com", r"user+mailbox/department=shipping@example.com", r"!#$%&'*+-/=?^_`.{|}~@example.com", r'"Abc@def"@example.com', r'"Fred\ Bloggs"@example.com', r'"Joe.\\Blow"@example.com'] +158 136 | +159 137 | +160 138 | r"""first + +SIM905.py:160:1: SIM905 [*] Consider using a list literal instead of `str.split` + | +160 | / r"""first +161 | | 'no need' to escape +162 | | "swap" quote style +163 | | "use' ugly triple quotes""".split("\n") + | |_______________________________________^ SIM905 + | + = help: Replace with list literal + +ℹ Safe fix +157 157 | "Joe.\\Blow"@example.com""".split("\n") +158 158 | +159 159 | +160 |-r"""first +161 |-'no need' to escape +162 |-"swap" quote style +163 |-"use' ugly triple quotes""".split("\n") + 160 |+[r"first", r"'no need' to escape", r'"swap" quote style', r""""use' ugly triple quotes"""]