mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 13:34:40 +00:00
Simplify formatting of strings by using flags from the AST nodes (#10489)
This commit is contained in:
parent
fc792d1d2e
commit
7caf0d064a
22 changed files with 725 additions and 809 deletions
|
@ -25,7 +25,6 @@ ruff_python_parser = { path = "../ruff_python_parser" }
|
|||
ruff_text_size = { path = "../ruff_text_size" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
countme = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use ruff_python_ast::BytesLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{StringNormalizer, StringPart};
|
||||
use crate::string::StringNormalizer;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FormatBytesLiteral;
|
||||
|
@ -13,7 +12,7 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
|
|||
|
||||
StringNormalizer::from_context(f.context())
|
||||
.with_preferred_quote_style(f.options().quote_style())
|
||||
.normalize(&StringPart::from_source(item.range(), &locator), &locator)
|
||||
.normalize(item.into(), &locator)
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
use ruff_formatter::write;
|
||||
use ruff_python_ast::FString;
|
||||
use ruff_python_ast::{AnyStringKind, FString};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_f_string_formatting_enabled;
|
||||
use crate::string::{Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::string::{Quoting, StringNormalizer, StringQuotes};
|
||||
|
||||
use super::f_string_element::FormatFStringElement;
|
||||
|
||||
|
@ -30,8 +29,6 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let locator = f.context().locator();
|
||||
|
||||
let string = StringPart::from_source(self.value.range(), &locator);
|
||||
|
||||
let normalizer = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.quoting)
|
||||
.with_preferred_quote_style(f.options().quote_style());
|
||||
|
@ -39,7 +36,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
// If f-string formatting is disabled (not in preview), then we will
|
||||
// fall back to the previous behavior of normalizing the f-string.
|
||||
if !is_f_string_formatting_enabled(f.context()) {
|
||||
let result = normalizer.normalize(&string, &locator).fmt(f);
|
||||
let result = normalizer.normalize(self.value.into(), &locator).fmt(f);
|
||||
let comments = f.context().comments();
|
||||
self.value.elements.iter().for_each(|value| {
|
||||
comments.mark_verbatim_node_comments_formatted(value.into());
|
||||
|
@ -59,16 +56,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
return result;
|
||||
}
|
||||
|
||||
let quote_selection = normalizer.choose_quotes(&string, &locator);
|
||||
let string_kind = normalizer.choose_quotes(self.value.into(), &locator).kind();
|
||||
|
||||
let context = FStringContext::new(
|
||||
string.prefix(),
|
||||
quote_selection.quotes(),
|
||||
string_kind,
|
||||
FStringLayout::from_f_string(self.value, &locator),
|
||||
);
|
||||
|
||||
// Starting prefix and quote
|
||||
write!(f, [string.prefix(), quote_selection.quotes()])?;
|
||||
let quotes = StringQuotes::from(string_kind);
|
||||
write!(f, [string_kind.prefix(), quotes])?;
|
||||
|
||||
f.join()
|
||||
.entries(
|
||||
|
@ -80,32 +77,23 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
.finish()?;
|
||||
|
||||
// Ending quote
|
||||
quote_selection.quotes().fmt(f)
|
||||
quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct FStringContext {
|
||||
prefix: StringPrefix,
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
layout: FStringLayout,
|
||||
}
|
||||
|
||||
impl FStringContext {
|
||||
const fn new(prefix: StringPrefix, quotes: StringQuotes, layout: FStringLayout) -> Self {
|
||||
Self {
|
||||
prefix,
|
||||
quotes,
|
||||
layout,
|
||||
}
|
||||
const fn new(kind: AnyStringKind, layout: FStringLayout) -> Self {
|
||||
Self { kind, layout }
|
||||
}
|
||||
|
||||
pub(crate) const fn quotes(self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) const fn prefix(self) -> StringPrefix {
|
||||
self.prefix
|
||||
pub(crate) fn kind(self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
pub(crate) const fn layout(self) -> FStringLayout {
|
||||
|
|
|
@ -56,13 +56,7 @@ impl<'a> FormatFStringLiteralElement<'a> {
|
|||
impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let literal_content = f.context().locator().slice(self.element.range());
|
||||
let normalized = normalize_string(
|
||||
literal_content,
|
||||
0,
|
||||
self.context.quotes(),
|
||||
self.context.prefix(),
|
||||
true,
|
||||
);
|
||||
let normalized = normalize_string(literal_content, 0, self.context.kind(), true);
|
||||
match &normalized {
|
||||
Cow::Borrowed(_) => source_text_slice(self.element.range()).fmt(f),
|
||||
Cow::Owned(normalized) => text(normalized).fmt(f),
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use ruff_python_ast::StringLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
|
||||
use crate::string::{docstring, Quoting, StringNormalizer};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct FormatStringLiteral<'a> {
|
||||
|
@ -61,10 +60,7 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
|
|||
let normalized = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.layout.quoting())
|
||||
.with_preferred_quote_style(quote_style)
|
||||
.normalize(
|
||||
&StringPart::from_source(self.value.range(), &locator),
|
||||
&locator,
|
||||
);
|
||||
.normalize(self.value.into(), &locator);
|
||||
|
||||
if self.layout.is_docstring() {
|
||||
docstring::format(&normalized, f)
|
||||
|
|
|
@ -3,17 +3,17 @@ use std::iter::FusedIterator;
|
|||
use memchr::memchr2;
|
||||
|
||||
use ruff_python_ast::{
|
||||
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
|
||||
StringLiteral,
|
||||
self as ast, AnyNodeRef, AnyStringKind, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral,
|
||||
ExpressionRef, StringLiteral,
|
||||
};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::expression::expr_f_string::f_string_quoting;
|
||||
use crate::other::f_string::FormatFString;
|
||||
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
||||
use crate::prelude::*;
|
||||
use crate::string::{Quoting, StringPrefix, StringQuotes};
|
||||
use crate::string::Quoting;
|
||||
|
||||
/// Represents any kind of string expression. This could be either a string,
|
||||
/// bytes or f-string.
|
||||
|
@ -70,14 +70,10 @@ impl<'a> AnyString<'a> {
|
|||
pub(crate) fn is_multiline(self, source: &str) -> bool {
|
||||
match self {
|
||||
AnyString::String(_) | AnyString::Bytes(_) => {
|
||||
let contents = &source[self.range()];
|
||||
let prefix = StringPrefix::parse(contents);
|
||||
let quotes = StringQuotes::parse(
|
||||
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
|
||||
);
|
||||
|
||||
quotes.is_some_and(StringQuotes::is_triple)
|
||||
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
|
||||
self.parts(Quoting::default())
|
||||
.next()
|
||||
.is_some_and(|part| part.kind().is_triple_quoted())
|
||||
&& memchr2(b'\n', b'\r', source[self.range()].as_bytes()).is_some()
|
||||
}
|
||||
AnyString::FString(fstring) => {
|
||||
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
|
||||
|
@ -179,6 +175,16 @@ pub(super) enum AnyStringPart<'a> {
|
|||
},
|
||||
}
|
||||
|
||||
impl AnyStringPart<'_> {
|
||||
fn kind(&self) -> AnyStringKind {
|
||||
match self {
|
||||
Self::String { part, .. } => part.flags.into(),
|
||||
Self::Bytes(bytes_literal) => bytes_literal.flags.into(),
|
||||
Self::FString { part, .. } => part.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyStringPart<'a>) -> Self {
|
||||
match value {
|
||||
|
|
|
@ -18,6 +18,7 @@ use {
|
|||
ruff_text_size::{Ranged, TextLen, TextRange, TextSize},
|
||||
};
|
||||
|
||||
use crate::string::StringQuotes;
|
||||
use crate::{prelude::*, DocstringCodeLineWidth, FormatModuleError};
|
||||
|
||||
use super::NormalizedString;
|
||||
|
@ -126,7 +127,9 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
let mut lines = docstring.split('\n').peekable();
|
||||
|
||||
// Start the string
|
||||
write!(f, [normalized.prefix(), normalized.quotes()])?;
|
||||
let kind = normalized.kind();
|
||||
let quotes = StringQuotes::from(kind);
|
||||
write!(f, [kind.prefix(), quotes])?;
|
||||
// We track where in the source docstring we are (in source code byte offsets)
|
||||
let mut offset = normalized.start();
|
||||
|
||||
|
@ -142,7 +145,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
|
||||
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
|
||||
// inner quotes and closing quotes from getting to close to avoid `""""content`
|
||||
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
|
||||
if trim_both.starts_with(quotes.quote_char.as_char()) {
|
||||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
|
@ -169,7 +172,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
{
|
||||
space().fmt(f)?;
|
||||
}
|
||||
normalized.quotes().fmt(f)?;
|
||||
quotes.fmt(f)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
@ -195,7 +198,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
offset,
|
||||
stripped_indentation,
|
||||
already_normalized,
|
||||
quote_char: normalized.quotes().quote_char,
|
||||
quote_char: quotes.quote_char,
|
||||
code_example: CodeExample::default(),
|
||||
}
|
||||
.add_iter(lines)?;
|
||||
|
@ -208,7 +211,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
write!(f, [normalized.quotes()])
|
||||
write!(f, [quotes])
|
||||
}
|
||||
|
||||
fn contains_unescaped_newline(haystack: &str) -> bool {
|
||||
|
@ -1570,7 +1573,7 @@ fn docstring_format_source(
|
|||
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
|
||||
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
|
||||
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
|
||||
trim_end.ends_with(normalized.quotes().quote_char.as_char())
|
||||
trim_end.ends_with(normalized.kind().quote_style().as_char())
|
||||
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use bitflags::bitflags;
|
||||
|
||||
pub(crate) use any::AnyString;
|
||||
pub(crate) use normalize::{normalize_string, NormalizedString, StringNormalizer};
|
||||
use ruff_formatter::format_args;
|
||||
use ruff_python_ast::str::Quote;
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, AnyStringPrefix};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::comments::{leading_comments, trailing_comments};
|
||||
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
|
||||
|
@ -55,132 +53,17 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct StringPart {
|
||||
/// The prefix.
|
||||
prefix: StringPrefix,
|
||||
|
||||
/// The actual quotes of the string in the source
|
||||
quotes: StringQuotes,
|
||||
|
||||
/// The range of the string's content (full range minus quotes and prefix)
|
||||
content_range: TextRange,
|
||||
}
|
||||
|
||||
impl StringPart {
|
||||
pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self {
|
||||
let string_content = locator.slice(range);
|
||||
|
||||
let prefix = StringPrefix::parse(string_content);
|
||||
let after_prefix = &string_content[usize::from(prefix.text_len())..];
|
||||
|
||||
let quotes =
|
||||
StringQuotes::parse(after_prefix).expect("Didn't find string quotes after prefix");
|
||||
let relative_raw_content_range = TextRange::new(
|
||||
prefix.text_len() + quotes.text_len(),
|
||||
string_content.text_len() - quotes.text_len(),
|
||||
);
|
||||
let raw_content_range = relative_raw_content_range + range.start();
|
||||
|
||||
Self {
|
||||
prefix,
|
||||
content_range: raw_content_range,
|
||||
quotes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the prefix of the string part.
|
||||
pub(crate) const fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
}
|
||||
|
||||
/// Returns the surrounding quotes of the string part.
|
||||
pub(crate) const fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||
pub(crate) const fn content_range(&self) -> TextRange {
|
||||
self.content_range
|
||||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct StringPrefix: u8 {
|
||||
const UNICODE = 0b0000_0001;
|
||||
/// `r"test"`
|
||||
const RAW = 0b0000_0010;
|
||||
/// `R"test"
|
||||
const RAW_UPPER = 0b0000_0100;
|
||||
const BYTE = 0b0000_1000;
|
||||
const F_STRING = 0b0001_0000;
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPrefix {
|
||||
pub(crate) fn parse(input: &str) -> StringPrefix {
|
||||
let chars = input.chars();
|
||||
let mut prefix = StringPrefix::empty();
|
||||
|
||||
for c in chars {
|
||||
let flag = match c {
|
||||
'u' | 'U' => StringPrefix::UNICODE,
|
||||
'f' | 'F' => StringPrefix::F_STRING,
|
||||
'b' | 'B' => StringPrefix::BYTE,
|
||||
'r' => StringPrefix::RAW,
|
||||
'R' => StringPrefix::RAW_UPPER,
|
||||
'\'' | '"' => break,
|
||||
c => {
|
||||
unreachable!(
|
||||
"Unexpected character '{c}' terminating the prefix of a string literal"
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
prefix |= flag;
|
||||
}
|
||||
|
||||
prefix
|
||||
}
|
||||
|
||||
pub(crate) const fn text_len(self) -> TextSize {
|
||||
TextSize::new(self.bits().count_ones())
|
||||
}
|
||||
|
||||
pub(super) const fn is_raw_string(self) -> bool {
|
||||
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
|
||||
}
|
||||
|
||||
pub(super) const fn is_fstring(self) -> bool {
|
||||
self.contains(StringPrefix::F_STRING)
|
||||
}
|
||||
|
||||
pub(super) const fn is_byte(self) -> bool {
|
||||
self.contains(StringPrefix::BYTE)
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for StringPrefix {
|
||||
impl Format<PyFormatContext<'_>> for AnyStringPrefix {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
// Retain the casing for the raw prefix:
|
||||
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
if self.contains(StringPrefix::RAW) {
|
||||
token("r").fmt(f)?;
|
||||
} else if self.contains(StringPrefix::RAW_UPPER) {
|
||||
token("R").fmt(f)?;
|
||||
}
|
||||
|
||||
if self.contains(StringPrefix::BYTE) {
|
||||
token("b").fmt(f)?;
|
||||
}
|
||||
|
||||
if self.contains(StringPrefix::F_STRING) {
|
||||
token("f").fmt(f)?;
|
||||
}
|
||||
|
||||
// Remove the unicode prefix `u` if any because it is meaningless in Python 3+.
|
||||
|
||||
if !matches!(
|
||||
self,
|
||||
AnyStringPrefix::Regular(
|
||||
ast::StringLiteralPrefix::Empty | ast::StringLiteralPrefix::Unicode
|
||||
)
|
||||
) {
|
||||
token(self.as_str()).fmt(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -191,34 +74,6 @@ pub(crate) struct StringQuotes {
|
|||
quote_char: Quote,
|
||||
}
|
||||
|
||||
impl StringQuotes {
|
||||
pub(crate) fn parse(input: &str) -> Option<StringQuotes> {
|
||||
let mut chars = input.chars();
|
||||
|
||||
let quote_char = chars.next()?;
|
||||
let quote = Quote::try_from(quote_char).ok()?;
|
||||
|
||||
let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
|
||||
|
||||
Some(Self {
|
||||
triple,
|
||||
quote_char: quote,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) const fn is_triple(self) -> bool {
|
||||
self.triple
|
||||
}
|
||||
|
||||
const fn text_len(self) -> TextSize {
|
||||
if self.triple {
|
||||
TextSize::new(3)
|
||||
} else {
|
||||
TextSize::new(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for StringQuotes {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let quotes = match (self.quote_char, self.triple) {
|
||||
|
@ -232,6 +87,15 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<AnyStringKind> for StringQuotes {
|
||||
fn from(value: AnyStringKind) -> Self {
|
||||
Self {
|
||||
triple: value.is_triple_quoted(),
|
||||
quote_char: value.quote_style(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<QuoteStyle> for Quote {
|
||||
type Error = ();
|
||||
|
||||
|
@ -252,3 +116,58 @@ impl From<Quote> for QuoteStyle {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) struct StringPart {
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl Ranged for StringPart {
|
||||
fn range(&self) -> TextRange {
|
||||
self.range
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPart {
|
||||
/// Use the `kind()` method to retrieve information about the
|
||||
fn kind(self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||
fn content_range(self) -> TextRange {
|
||||
let kind = self.kind();
|
||||
TextRange::new(
|
||||
self.start() + kind.opener_len(),
|
||||
self.end() - kind.closer_len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::StringLiteral> for StringPart {
|
||||
fn from(value: &ast::StringLiteral) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::BytesLiteral> for StringPart {
|
||||
fn from(value: &ast::BytesLiteral) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::FString> for StringPart {
|
||||
fn from(value: &ast::FString) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
|||
use std::iter::FusedIterator;
|
||||
|
||||
use ruff_formatter::FormatContext;
|
||||
use ruff_python_ast::str::Quote;
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
|
@ -10,7 +10,7 @@ use crate::context::FStringState;
|
|||
use crate::options::PythonVersion;
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_f_string_formatting_enabled;
|
||||
use crate::string::{Quoting, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::string::{Quoting, StringPart, StringQuotes};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct StringNormalizer {
|
||||
|
@ -44,7 +44,7 @@ impl StringNormalizer {
|
|||
self
|
||||
}
|
||||
|
||||
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||
fn quoting(&self, string: StringPart) -> Quoting {
|
||||
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
|
@ -60,7 +60,7 @@ impl StringNormalizer {
|
|||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||
if (context.kind().is_triple_quoted() && !string.kind().is_triple_quoted())
|
||||
|| self.target_version.supports_pep_701()
|
||||
{
|
||||
self.quoting
|
||||
|
@ -73,18 +73,19 @@ impl StringNormalizer {
|
|||
}
|
||||
|
||||
/// Computes the strings preferred quotes.
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||
pub(crate) fn choose_quotes(&self, string: StringPart, locator: &Locator) -> QuoteSelection {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
let first_quote_or_normalized_char_offset = raw_content
|
||||
.bytes()
|
||||
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||
let string_kind = string.kind();
|
||||
|
||||
let quotes = match self.quoting(string) {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
let new_kind = match self.quoting(string) {
|
||||
Quoting::Preserve => string_kind,
|
||||
Quoting::CanChange => {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if string.quotes().triple {
|
||||
let preferred_style = if string_kind.is_triple_quoted() {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
// writing out invalid Python.
|
||||
|
@ -145,33 +146,30 @@ impl StringNormalizer {
|
|||
if let Some(first_quote_or_normalized_char_offset) =
|
||||
first_quote_or_normalized_char_offset
|
||||
{
|
||||
if string.prefix().is_raw_string() {
|
||||
if string_kind.is_raw_string() {
|
||||
choose_quotes_for_raw_string(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
} else {
|
||||
choose_quotes_impl(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
StringQuotes {
|
||||
quote_char: preferred_quote,
|
||||
triple: string.quotes().is_triple(),
|
||||
}
|
||||
string_kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
} else {
|
||||
string.quotes()
|
||||
string_kind
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
QuoteSelection {
|
||||
quotes,
|
||||
kind: new_kind,
|
||||
first_quote_or_normalized_char_offset,
|
||||
}
|
||||
}
|
||||
|
@ -179,11 +177,10 @@ impl StringNormalizer {
|
|||
/// Computes the strings preferred quotes and normalizes its content.
|
||||
pub(crate) fn normalize<'a>(
|
||||
&self,
|
||||
string: &StringPart,
|
||||
string: StringPart,
|
||||
locator: &'a Locator,
|
||||
) -> NormalizedString<'a> {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
|
||||
let quote_selection = self.choose_quotes(string, locator);
|
||||
|
||||
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||
|
@ -192,8 +189,7 @@ impl StringNormalizer {
|
|||
normalize_string(
|
||||
raw_content,
|
||||
first_quote_or_escape_offset,
|
||||
quote_selection.quotes,
|
||||
string.prefix(),
|
||||
quote_selection.kind,
|
||||
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||
// `format_fstring` preview style
|
||||
self.format_fstring,
|
||||
|
@ -203,34 +199,31 @@ impl StringNormalizer {
|
|||
};
|
||||
|
||||
NormalizedString {
|
||||
prefix: string.prefix(),
|
||||
kind: quote_selection.kind,
|
||||
content_range: string.content_range(),
|
||||
text: normalized,
|
||||
quotes: quote_selection.quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QuoteSelection {
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||
first_quote_or_normalized_char_offset: Option<usize>,
|
||||
}
|
||||
|
||||
impl QuoteSelection {
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: crate::string::StringPrefix,
|
||||
|
||||
/// The quotes of the normalized string (preferred quotes)
|
||||
quotes: StringQuotes,
|
||||
/// Holds data about the quotes and prefix of the string
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// The range of the string's content in the source (minus prefix and quotes).
|
||||
content_range: TextRange,
|
||||
|
@ -244,12 +237,8 @@ impl<'a> NormalizedString<'a> {
|
|||
&self.text
|
||||
}
|
||||
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -261,7 +250,8 @@ impl Ranged for NormalizedString<'_> {
|
|||
|
||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||
ruff_formatter::write!(f, [self.prefix, self.quotes])?;
|
||||
let quotes = StringQuotes::from(self.kind);
|
||||
ruff_formatter::write!(f, [self.kind.prefix(), quotes])?;
|
||||
match &self.text {
|
||||
Cow::Borrowed(_) => {
|
||||
source_text_slice(self.range()).fmt(f)?;
|
||||
|
@ -270,7 +260,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
|||
text(normalized).fmt(f)?;
|
||||
}
|
||||
}
|
||||
self.quotes.fmt(f)
|
||||
quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -281,9 +271,9 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
|||
/// style is double quotes.
|
||||
fn choose_quotes_for_raw_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
preferred_quote: Quote,
|
||||
) -> StringQuotes {
|
||||
) -> AnyStringKind {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
let mut chars = input.chars().peekable();
|
||||
let contains_unescaped_configured_quotes = loop {
|
||||
|
@ -294,7 +284,7 @@ fn choose_quotes_for_raw_string(
|
|||
}
|
||||
// `"` or `'`
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
break true;
|
||||
}
|
||||
|
||||
|
@ -319,14 +309,10 @@ fn choose_quotes_for_raw_string(
|
|||
None => break false,
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: if contains_unescaped_configured_quotes {
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
},
|
||||
if contains_unescaped_configured_quotes {
|
||||
kind
|
||||
} else {
|
||||
kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,8 +324,8 @@ fn choose_quotes_for_raw_string(
|
|||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||
/// used unless the string contains `"""`).
|
||||
fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote) -> StringQuotes {
|
||||
let quote = if quotes.triple {
|
||||
fn choose_quotes_impl(input: &str, kind: AnyStringKind, preferred_quote: Quote) -> AnyStringKind {
|
||||
let quote = if kind.is_triple_quoted() {
|
||||
// True if the string contains a triple quote sequence of the configured quote style.
|
||||
let mut uses_triple_quotes = false;
|
||||
let mut chars = input.chars().peekable();
|
||||
|
@ -393,7 +379,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
if uses_triple_quotes {
|
||||
// String contains a triple quote sequence of the configured quote style.
|
||||
// Keep the existing quote style.
|
||||
quotes.quote_char
|
||||
kind.quote_style()
|
||||
} else {
|
||||
preferred_quote
|
||||
}
|
||||
|
@ -433,10 +419,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: quote,
|
||||
}
|
||||
kind.with_quote_style(quote)
|
||||
}
|
||||
|
||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
||||
|
@ -446,8 +429,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
pub(crate) fn normalize_string(
|
||||
input: &str,
|
||||
start_offset: usize,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
kind: AnyStringKind,
|
||||
format_fstring: bool,
|
||||
) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
|
@ -457,14 +439,14 @@ pub(crate) fn normalize_string(
|
|||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||
let mut last_index = 0;
|
||||
|
||||
let quote = quotes.quote_char;
|
||||
let quote = kind.quote_style();
|
||||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.opposite().as_char();
|
||||
|
||||
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||
let is_raw = kind.is_raw_string();
|
||||
let is_fstring = !format_fstring && kind.is_f_string();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
|
@ -502,7 +484,7 @@ pub(crate) fn normalize_string(
|
|||
} else {
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !kind.is_byte_string())
|
||||
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||
{
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
|
@ -521,7 +503,7 @@ pub(crate) fn normalize_string(
|
|||
}
|
||||
}
|
||||
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
|
@ -534,7 +516,10 @@ pub(crate) fn normalize_string(
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
|
||||
} else if !kind.is_triple_quoted()
|
||||
&& c == preferred_quote
|
||||
&& formatted_value_nesting == 0
|
||||
{
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
|
@ -704,9 +689,7 @@ impl UnicodeEscape {
|
|||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use ruff_python_ast::str::Quote;
|
||||
|
||||
use crate::string::{StringPrefix, StringQuotes};
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind, AnyStringPrefix, ByteStringPrefix};
|
||||
|
||||
use super::{normalize_string, UnicodeEscape};
|
||||
|
||||
|
@ -727,11 +710,11 @@ mod tests {
|
|||
let normalized = normalize_string(
|
||||
input,
|
||||
0,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: Quote::Double,
|
||||
},
|
||||
StringPrefix::BYTE,
|
||||
AnyStringKind::new(
|
||||
AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
|
||||
Quote::Double,
|
||||
false,
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue