From ff2c0dd4911e9af26229d9d3877f85b7848a4680 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 8 Mar 2023 13:21:59 -0500 Subject: [PATCH] Use shared `leading_quote` implementation in ruff_python_formatter (#3396) --- Cargo.lock | 1 + crates/ruff/src/checkers/ast/mod.rs | 4 +- .../pydocstyle/rules/ends_with_period.rs | 2 +- .../pydocstyle/rules/ends_with_punctuation.rs | 2 +- .../rules/multi_line_summary_start.rs | 5 +- .../rules/no_surrounding_whitespace.rs | 2 +- .../src/rules/pydocstyle/rules/one_liner.rs | 2 +- crates/ruff/src/rules/pyflakes/fixes.rs | 2 +- .../pylint/rules/bad_string_format_type.rs | 2 +- .../src/rules/pyupgrade/rules/f_strings.rs | 2 +- .../rules/printf_string_formatting.rs | 2 +- crates/ruff_python_ast/src/lib.rs | 2 +- .../src/source_code/stylist.rs | 2 +- crates/ruff_python_ast/src/str.rs | 97 +++++++++++++++++++ crates/ruff_python_ast/src/strings.rs | 38 -------- crates/ruff_python_formatter/Cargo.toml | 1 + .../ruff_python_formatter/src/core/helpers.rs | 52 ---------- .../src/format/strings.rs | 2 +- crates/ruff_python_stdlib/src/bytes.rs | 10 -- crates/ruff_python_stdlib/src/lib.rs | 1 - crates/ruff_python_stdlib/src/str.rs | 6 -- 21 files changed, 114 insertions(+), 123 deletions(-) create mode 100644 crates/ruff_python_ast/src/str.rs delete mode 100644 crates/ruff_python_ast/src/strings.rs delete mode 100644 crates/ruff_python_stdlib/src/bytes.rs diff --git a/Cargo.lock b/Cargo.lock index 0cd1d6e902..e11a2d247f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2154,6 +2154,7 @@ dependencies = [ "itertools", "once_cell", "ruff_formatter", + "ruff_python_ast", "ruff_python_stdlib", "ruff_rustpython", "ruff_testing_macros", diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 8f96c869e5..1402a71ad3 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -27,7 +27,7 @@ use ruff_python_ast::types::{ use ruff_python_ast::typing::{match_annotated_subscript, Callable, SubscriptKind}; use ruff_python_ast::visitor::{walk_excepthandler, walk_pattern, Visitor}; use ruff_python_ast::{ - branch_detection, cast, helpers, operations, strings, typing, visibility, visitor, + branch_detection, cast, helpers, operations, str, typing, visibility, visitor, }; use ruff_python_stdlib::builtins::{BUILTINS, MAGIC_GLOBALS}; use ruff_python_stdlib::path::is_python_stub_file; @@ -5292,7 +5292,7 @@ impl<'a> Checker<'a> { Location::new(expr.location.row(), expr.location.column()), )); - let body = strings::raw_contents(contents); + let body = str::raw_contents(contents); let docstring = Docstring { kind: definition.kind, expr, diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs index 22e6c0bf55..fbbd7ec8ea 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs @@ -1,7 +1,7 @@ use strum::IntoEnumIterator; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::leading_quote; +use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs index d79669318a..23d6d0f51c 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs @@ -1,7 +1,7 @@ use strum::IntoEnumIterator; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::leading_quote; +use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; diff --git a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs index 2f897429bf..21d7a60c76 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs @@ -1,8 +1,7 @@ use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::leading_quote; +use ruff_python_ast::str::{is_triple_quote, leading_quote}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace::LinesWithTrailingNewline; -use ruff_python_stdlib::str::TRIPLE_QUOTE_PREFIXES; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -54,7 +53,7 @@ pub fn multi_line_summary_start(checker: &mut Checker, docstring: &Docstring) { { return; }; - if TRIPLE_QUOTE_PREFIXES.contains(&first_line) { + if is_triple_quote(first_line) { if checker .settings .rules diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs index 5290bd075c..c5f7959a30 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs @@ -1,5 +1,5 @@ use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::leading_quote; +use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace::LinesWithTrailingNewline; diff --git a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs index 73edd95eef..d2e775ffac 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs @@ -1,5 +1,5 @@ use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::{leading_quote, trailing_quote}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace::LinesWithTrailingNewline; diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs index 5e562053ec..ab6a104cc2 100644 --- a/crates/ruff/src/rules/pyflakes/fixes.rs +++ b/crates/ruff/src/rules/pyflakes/fixes.rs @@ -4,7 +4,7 @@ use rustpython_parser::ast::{Excepthandler, Expr}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_python_ast::source_code::{Locator, Stylist}; -use ruff_python_ast::strings::raw_contents; +use ruff_python_ast::str::raw_contents; use ruff_python_ast::types::Range; use crate::cst::matchers::{match_expr, match_module}; diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs index c8c31afa61..41e6e7d9aa 100644 --- a/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs @@ -6,7 +6,7 @@ use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Operator}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::{leading_quote, trailing_quote}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; diff --git a/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs index 0ea7f6d121..4c25069ab3 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs @@ -6,7 +6,7 @@ use rustpython_parser::ast::{Constant, Expr, ExprKind, KeywordData}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::{leading_quote, trailing_quote}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; diff --git a/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs index 23d0615a05..89aacc68a3 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -7,7 +7,7 @@ use rustpython_parser::ast::{Constant, Expr, ExprKind, Location}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::strings::{leading_quote, trailing_quote}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace::indentation; use ruff_python_stdlib::identifiers::is_identifier; diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 625bc8a98e..529034a98e 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -9,7 +9,7 @@ pub mod logging; pub mod operations; pub mod relocate; pub mod source_code; -pub mod strings; +pub mod str; pub mod types; pub mod typing; pub mod visibility; diff --git a/crates/ruff_python_ast/src/source_code/stylist.rs b/crates/ruff_python_ast/src/source_code/stylist.rs index e986345b3c..d6b8eb921c 100644 --- a/crates/ruff_python_ast/src/source_code/stylist.rs +++ b/crates/ruff_python_ast/src/source_code/stylist.rs @@ -10,7 +10,7 @@ use rustpython_parser::{lexer, Mode, Tok}; use crate::source_code::Locator; use ruff_rustpython::vendor; -use crate::strings::leading_quote; +use crate::str::leading_quote; use crate::types::Range; pub struct Stylist<'a> { diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs new file mode 100644 index 0000000000..1a332b474c --- /dev/null +++ b/crates/ruff_python_ast/src/str.rs @@ -0,0 +1,97 @@ +/// See: +const TRIPLE_QUOTE_STR_PREFIXES: &[&str] = &[ + "u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''", +]; +const SINGLE_QUOTE_STR_PREFIXES: &[&str] = &[ + "u\"", "u'", "r\"", "r'", "U\"", "U'", "R\"", "R'", "\"", "'", +]; +pub const TRIPLE_QUOTE_BYTE_PREFIXES: &[&str] = &[ + "br'''", "rb'''", "bR'''", "Rb'''", "Br'''", "rB'''", "RB'''", "BR'''", "b'''", "br\"\"\"", + "rb\"\"\"", "bR\"\"\"", "Rb\"\"\"", "Br\"\"\"", "rB\"\"\"", "RB\"\"\"", "BR\"\"\"", "b\"\"\"", + "B\"\"\"", +]; +pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[ + "br'", "rb'", "bR'", "Rb'", "Br'", "rB'", "RB'", "BR'", "b'", "br\"", "rb\"", "bR\"", "Rb\"", + "Br\"", "rB\"", "RB\"", "BR\"", "b\"", "B\"", +]; +const TRIPLE_QUOTE_SUFFIXES: &[&str] = &["\"\"\"", "'''"]; +const SINGLE_QUOTE_SUFFIXES: &[&str] = &["\"", "'"]; + +/// Strip the leading and trailing quotes from a docstring. +pub fn raw_contents(contents: &str) -> &str { + for pattern in TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + { + if contents.starts_with(pattern) { + return &contents[pattern.len()..contents.len() - 3]; + } + } + for pattern in SINGLE_QUOTE_STR_PREFIXES + .iter() + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + { + if contents.starts_with(pattern) { + return &contents[pattern.len()..contents.len() - 1]; + } + } + unreachable!("Expected docstring to start with a valid triple- or single-quote prefix") +} + +/// Return the leading quote for a string or byte literal (e.g., `"""`). +pub fn leading_quote(content: &str) -> Option<&str> { + if let Some(first_line) = content.lines().next() { + for pattern in TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + { + if first_line.starts_with(pattern) { + return Some(pattern); + } + } + } + None +} + +/// Return the trailing quote string for a string or byte literal (e.g., `"""`). +pub fn trailing_quote(content: &str) -> Option<&&str> { + TRIPLE_QUOTE_SUFFIXES + .iter() + .chain(SINGLE_QUOTE_SUFFIXES) + .find(|&pattern| content.ends_with(pattern)) +} + +/// Return `true` if the string is a triple-quote string or byte prefix. +pub fn is_triple_quote(content: &str) -> bool { + TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content) +} + +#[cfg(test)] +mod tests { + use super::{ + SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES, + TRIPLE_QUOTE_STR_PREFIXES, + }; + + #[test] + fn test_prefixes() { + let prefixes = TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + .collect::>(); + for (i, prefix_i) in prefixes.iter().enumerate() { + for (j, prefix_j) in prefixes.iter().enumerate() { + if i > j { + assert!( + !prefix_i.starts_with(*prefix_j), + "Prefixes are not unique: {prefix_i} starts with {prefix_j}", + ); + } + } + } + } +} diff --git a/crates/ruff_python_ast/src/strings.rs b/crates/ruff_python_ast/src/strings.rs deleted file mode 100644 index c20de07deb..0000000000 --- a/crates/ruff_python_ast/src/strings.rs +++ /dev/null @@ -1,38 +0,0 @@ -use ruff_python_stdlib::str::{ - SINGLE_QUOTE_PREFIXES, SINGLE_QUOTE_SUFFIXES, TRIPLE_QUOTE_PREFIXES, TRIPLE_QUOTE_SUFFIXES, -}; - -/// Strip the leading and trailing quotes from a docstring. -pub fn raw_contents(contents: &str) -> &str { - for pattern in TRIPLE_QUOTE_PREFIXES { - if contents.starts_with(pattern) { - return &contents[pattern.len()..contents.len() - 3]; - } - } - for pattern in SINGLE_QUOTE_PREFIXES { - if contents.starts_with(pattern) { - return &contents[pattern.len()..contents.len() - 1]; - } - } - unreachable!("Expected docstring to start with a valid triple- or single-quote prefix") -} - -/// Return the leading quote string for a docstring (e.g., `"""`). -pub fn leading_quote(content: &str) -> Option<&str> { - if let Some(first_line) = content.lines().next() { - for pattern in TRIPLE_QUOTE_PREFIXES.iter().chain(SINGLE_QUOTE_PREFIXES) { - if first_line.starts_with(pattern) { - return Some(pattern); - } - } - } - None -} - -/// Return the trailing quote string for a docstring (e.g., `"""`). -pub fn trailing_quote(content: &str) -> Option<&&str> { - TRIPLE_QUOTE_SUFFIXES - .iter() - .chain(SINGLE_QUOTE_SUFFIXES) - .find(|&pattern| content.ends_with(pattern)) -} diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 7a745c65e5..a4d1a93d45 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -7,6 +7,7 @@ rust-version = { workspace = true } [dependencies] ruff_formatter = { path = "../ruff_formatter" } +ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_stdlib = { path = "../ruff_python_stdlib" } ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { path = "../ruff_text_size" } diff --git a/crates/ruff_python_formatter/src/core/helpers.rs b/crates/ruff_python_formatter/src/core/helpers.rs index a4ff965e0c..032ba06fa8 100644 --- a/crates/ruff_python_formatter/src/core/helpers.rs +++ b/crates/ruff_python_formatter/src/core/helpers.rs @@ -3,31 +3,6 @@ use rustpython_parser::ast::Location; use crate::core::locator::Locator; use crate::core::types::Range; -/// Return the leading quote for a string or byte literal (e.g., `"""`). -pub fn leading_quote(content: &str) -> Option<&str> { - if let Some(first_line) = content.lines().next() { - for pattern in ruff_python_stdlib::str::TRIPLE_QUOTE_PREFIXES - .iter() - .chain(ruff_python_stdlib::bytes::TRIPLE_QUOTE_PREFIXES) - .chain(ruff_python_stdlib::str::SINGLE_QUOTE_PREFIXES) - .chain(ruff_python_stdlib::bytes::SINGLE_QUOTE_PREFIXES) - { - if first_line.starts_with(pattern) { - return Some(pattern); - } - } - } - None -} - -/// Return the trailing quote string for a string or byte literal (e.g., `"""`). -pub fn trailing_quote(content: &str) -> Option<&&str> { - ruff_python_stdlib::str::TRIPLE_QUOTE_SUFFIXES - .iter() - .chain(ruff_python_stdlib::str::SINGLE_QUOTE_SUFFIXES) - .find(|&pattern| content.ends_with(pattern)) -} - /// Return `true` if the given string is a radix literal (e.g., `0b101`). pub fn is_radix_literal(content: &str) -> bool { content.starts_with("0b") @@ -155,30 +130,3 @@ pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bo } false } - -#[cfg(test)] -mod tests { - #[test] - fn test_prefixes() { - let prefixes = ruff_python_stdlib::str::TRIPLE_QUOTE_PREFIXES - .iter() - .chain(ruff_python_stdlib::bytes::TRIPLE_QUOTE_PREFIXES) - .chain(ruff_python_stdlib::str::SINGLE_QUOTE_PREFIXES) - .chain(ruff_python_stdlib::bytes::SINGLE_QUOTE_PREFIXES) - .collect::>(); - for i in 1..prefixes.len() { - for j in 0..i - 1 { - if i != j { - if prefixes[i].starts_with(prefixes[j]) { - assert!( - !prefixes[i].starts_with(prefixes[j]), - "Prefixes are not unique: {} starts with {}", - prefixes[i], - prefixes[j] - ); - } - } - } - } - } -} diff --git a/crates/ruff_python_formatter/src/format/strings.rs b/crates/ruff_python_formatter/src/format/strings.rs index 633efc076d..668cafc456 100644 --- a/crates/ruff_python_formatter/src/format/strings.rs +++ b/crates/ruff_python_formatter/src/format/strings.rs @@ -2,10 +2,10 @@ use rustpython_parser::{Mode, Tok}; use ruff_formatter::prelude::*; use ruff_formatter::{write, Format}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_text_size::TextSize; use crate::context::ASTFormatContext; -use crate::core::helpers::{leading_quote, trailing_quote}; use crate::core::types::Range; use crate::cst::Expr; diff --git a/crates/ruff_python_stdlib/src/bytes.rs b/crates/ruff_python_stdlib/src/bytes.rs deleted file mode 100644 index 9ec4e0d604..0000000000 --- a/crates/ruff_python_stdlib/src/bytes.rs +++ /dev/null @@ -1,10 +0,0 @@ -/// See: -pub const TRIPLE_QUOTE_PREFIXES: &[&str] = &[ - "br'''", "rb'''", "bR'''", "Rb'''", "Br'''", "rB'''", "RB'''", "BR'''", "b'''", "br\"\"\"", - "rb\"\"\"", "bR\"\"\"", "Rb\"\"\"", "Br\"\"\"", "rB\"\"\"", "RB\"\"\"", "BR\"\"\"", "b\"\"\"", - "B\"\"\"", -]; -pub const SINGLE_QUOTE_PREFIXES: &[&str] = &[ - "br'", "rb'", "bR'", "Rb'", "Br'", "rB'", "RB'", "BR'", "b'", "br\"", "rb\"", "bR\"", "Rb\"", - "Br\"", "rB\"", "RB\"", "BR\"", "b\"", "B\"", -]; diff --git a/crates/ruff_python_stdlib/src/lib.rs b/crates/ruff_python_stdlib/src/lib.rs index daa9776582..e64bb5c4c3 100644 --- a/crates/ruff_python_stdlib/src/lib.rs +++ b/crates/ruff_python_stdlib/src/lib.rs @@ -1,5 +1,4 @@ pub mod builtins; -pub mod bytes; pub mod future; pub mod identifiers; pub mod keyword; diff --git a/crates/ruff_python_stdlib/src/str.rs b/crates/ruff_python_stdlib/src/str.rs index f62d04a200..1c1e6ffae7 100644 --- a/crates/ruff_python_stdlib/src/str.rs +++ b/crates/ruff_python_stdlib/src/str.rs @@ -1,6 +1,3 @@ -use once_cell::sync::Lazy; -use regex::Regex; - /// See: pub const TRIPLE_QUOTE_PREFIXES: &[&str] = &[ "u\"\"\"", "u'''", "r\"\"\"", "r'''", "U\"\"\"", "U'''", "R\"\"\"", "R'''", "\"\"\"", "'''", @@ -11,9 +8,6 @@ pub const SINGLE_QUOTE_PREFIXES: &[&str] = &[ pub const TRIPLE_QUOTE_SUFFIXES: &[&str] = &["\"\"\"", "'''"]; pub const SINGLE_QUOTE_SUFFIXES: &[&str] = &["\"", "'"]; -pub static STRING_QUOTE_PREFIX_REGEX: Lazy = - Lazy::new(|| Regex::new(r#"^(?i)[urb]*['"](?P.*)['"]$"#).unwrap()); - pub fn is_lower(s: &str) -> bool { let mut cased = false; for c in s.chars() {