mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-26 20:10:09 +00:00
Separate StringNormalizer
from StringPart
(#9954)
## Summary This PR is a small refactor to extract out the logic for normalizing string in the formatter from the `StringPart` struct. It also separates the quote selection into a separate method on the new `StringNormalizer`. Both of these will help in the f-string formatting to use `StringPart` and `choose_quotes` irrespective of normalization. The reason for having separate quote selection and normalization step is so that the f-string formatting can perform quote selection on its own. Unlike string and byte literals, the f-string formatting would require that the normalization happens only for the literal elements of it i.e., the "foo" and "bar" in `f"foo {x + y} bar"`. This will automatically be handled by the already separate `normalize_string` function. Another use-case in the f-string formatting is to extract out the relevant information from the `StringPart` like quotes and prefix which is to be passed as context while formatting each element of an f-string. ## Test Plan Ensure that clippy is happy and all tests pass.
This commit is contained in:
parent
6380c90031
commit
6f9c128d77
4 changed files with 96 additions and 59 deletions
|
@ -2,8 +2,7 @@ use ruff_python_ast::BytesLiteral;
|
||||||
use ruff_text_size::Ranged;
|
use ruff_text_size::Ranged;
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
use crate::string::{StringNormalizer, StringPart};
|
||||||
use crate::string::{Quoting, StringPart};
|
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct FormatBytesLiteral;
|
pub struct FormatBytesLiteral;
|
||||||
|
@ -12,14 +11,9 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
|
||||||
fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
|
fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
|
||||||
let locator = f.context().locator();
|
let locator = f.context().locator();
|
||||||
|
|
||||||
StringPart::from_source(item.range(), &locator)
|
StringNormalizer::from_context(f.context())
|
||||||
.normalize(
|
.with_preferred_quote_style(f.options().quote_style())
|
||||||
Quoting::CanChange,
|
.normalize(&StringPart::from_source(item.range(), &locator), &locator)
|
||||||
&locator,
|
|
||||||
f.options().quote_style(),
|
|
||||||
f.context().docstring(),
|
|
||||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
|
||||||
)
|
|
||||||
.fmt(f)
|
.fmt(f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,7 @@ use ruff_python_ast::FString;
|
||||||
use ruff_text_size::Ranged;
|
use ruff_text_size::Ranged;
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
use crate::string::{Quoting, StringNormalizer, StringPart};
|
||||||
use crate::string::{Quoting, StringPart};
|
|
||||||
|
|
||||||
/// Formats an f-string which is part of a larger f-string expression.
|
/// Formats an f-string which is part of a larger f-string expression.
|
||||||
///
|
///
|
||||||
|
@ -26,13 +25,12 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||||
let locator = f.context().locator();
|
let locator = f.context().locator();
|
||||||
|
|
||||||
let result = StringPart::from_source(self.value.range(), &locator)
|
let result = StringNormalizer::from_context(f.context())
|
||||||
|
.with_quoting(self.quoting)
|
||||||
|
.with_preferred_quote_style(f.options().quote_style())
|
||||||
.normalize(
|
.normalize(
|
||||||
self.quoting,
|
&StringPart::from_source(self.value.range(), &locator),
|
||||||
&locator,
|
&locator,
|
||||||
f.options().quote_style(),
|
|
||||||
f.context().docstring(),
|
|
||||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
|
||||||
)
|
)
|
||||||
.fmt(f);
|
.fmt(f);
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,7 @@ use ruff_python_ast::StringLiteral;
|
||||||
use ruff_text_size::Ranged;
|
use ruff_text_size::Ranged;
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
|
||||||
use crate::string::{docstring, Quoting, StringPart};
|
|
||||||
use crate::QuoteStyle;
|
use crate::QuoteStyle;
|
||||||
|
|
||||||
pub(crate) struct FormatStringLiteral<'a> {
|
pub(crate) struct FormatStringLiteral<'a> {
|
||||||
|
@ -59,13 +58,13 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
|
||||||
quote_style
|
quote_style
|
||||||
};
|
};
|
||||||
|
|
||||||
let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
|
let normalized = StringNormalizer::from_context(f.context())
|
||||||
self.layout.quoting(),
|
.with_quoting(self.layout.quoting())
|
||||||
&locator,
|
.with_preferred_quote_style(quote_style)
|
||||||
quote_style,
|
.normalize(
|
||||||
f.context().docstring(),
|
&StringPart::from_source(self.value.range(), &locator),
|
||||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
&locator,
|
||||||
);
|
);
|
||||||
|
|
||||||
if self.layout.is_docstring() {
|
if self.layout.is_docstring() {
|
||||||
docstring::format(&normalized, f)
|
docstring::format(&normalized, f)
|
||||||
|
|
|
@ -18,6 +18,7 @@ use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space
|
||||||
use crate::other::f_string::FormatFString;
|
use crate::other::f_string::FormatFString;
|
||||||
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
||||||
use crate::QuoteStyle;
|
use crate::QuoteStyle;
|
||||||
|
|
||||||
pub(crate) mod docstring;
|
pub(crate) mod docstring;
|
||||||
|
@ -291,23 +292,54 @@ impl StringPart {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the strings preferred quotes and normalizes its content.
|
/// Returns the prefix of the string part.
|
||||||
///
|
pub(crate) const fn prefix(&self) -> StringPrefix {
|
||||||
/// The parent docstring quote style should be set when formatting a code
|
self.prefix
|
||||||
/// snippet within the docstring. The quote style should correspond to the
|
}
|
||||||
/// style of quotes used by said docstring. Normalization will ensure the
|
|
||||||
/// quoting styles don't conflict.
|
/// Returns the surrounding quotes of the string part.
|
||||||
pub(crate) fn normalize<'a>(
|
pub(crate) const fn quotes(&self) -> StringQuotes {
|
||||||
self,
|
self.quotes
|
||||||
quoting: Quoting,
|
}
|
||||||
locator: &'a Locator,
|
|
||||||
configured_style: QuoteStyle,
|
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||||
parent_docstring_quote_char: Option<QuoteChar>,
|
pub(crate) const fn content_range(&self) -> TextRange {
|
||||||
normalize_hex: bool,
|
self.content_range
|
||||||
) -> NormalizedString<'a> {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct StringNormalizer {
|
||||||
|
quoting: Quoting,
|
||||||
|
preferred_quote_style: QuoteStyle,
|
||||||
|
parent_docstring_quote_char: Option<QuoteChar>,
|
||||||
|
normalize_hex: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringNormalizer {
|
||||||
|
pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
|
||||||
|
Self {
|
||||||
|
quoting: Quoting::default(),
|
||||||
|
preferred_quote_style: QuoteStyle::default(),
|
||||||
|
parent_docstring_quote_char: context.docstring(),
|
||||||
|
normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
|
||||||
|
self.preferred_quote_style = quote_style;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
|
||||||
|
self.quoting = quoting;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the strings preferred quotes.
|
||||||
|
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
|
||||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||||
// Except when using quote-style-preserve.
|
// Except when using quote-style-preserve.
|
||||||
let preferred_style = if self.quotes.triple {
|
let preferred_style = if string.quotes().triple {
|
||||||
// ... unless we're formatting a code snippet inside a docstring,
|
// ... unless we're formatting a code snippet inside a docstring,
|
||||||
// then we specifically want to invert our quote style to avoid
|
// then we specifically want to invert our quote style to avoid
|
||||||
// writing out invalid Python.
|
// writing out invalid Python.
|
||||||
|
@ -353,39 +385,49 @@ impl StringPart {
|
||||||
// Overall this is a bit of a corner case and just inverting the
|
// Overall this is a bit of a corner case and just inverting the
|
||||||
// style from what the parent ultimately decided upon works, even
|
// style from what the parent ultimately decided upon works, even
|
||||||
// if it doesn't have perfect alignment with PEP8.
|
// if it doesn't have perfect alignment with PEP8.
|
||||||
if let Some(quote) = parent_docstring_quote_char {
|
if let Some(quote) = self.parent_docstring_quote_char {
|
||||||
QuoteStyle::from(quote.invert())
|
QuoteStyle::from(quote.invert())
|
||||||
} else if configured_style.is_preserve() {
|
} else if self.preferred_quote_style.is_preserve() {
|
||||||
QuoteStyle::Preserve
|
QuoteStyle::Preserve
|
||||||
} else {
|
} else {
|
||||||
QuoteStyle::Double
|
QuoteStyle::Double
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
configured_style
|
self.preferred_quote_style
|
||||||
};
|
};
|
||||||
|
|
||||||
let raw_content = &locator.slice(self.content_range);
|
match self.quoting {
|
||||||
|
Quoting::Preserve => string.quotes(),
|
||||||
let quotes = match quoting {
|
|
||||||
Quoting::Preserve => self.quotes,
|
|
||||||
Quoting::CanChange => {
|
Quoting::CanChange => {
|
||||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||||
if self.prefix.is_raw_string() {
|
let raw_content = locator.slice(string.content_range());
|
||||||
choose_quotes_raw(raw_content, self.quotes, preferred_quote)
|
if string.prefix().is_raw_string() {
|
||||||
|
choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
|
||||||
} else {
|
} else {
|
||||||
choose_quotes(raw_content, self.quotes, preferred_quote)
|
choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.quotes
|
string.quotes()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let normalized = normalize_string(raw_content, quotes, self.prefix, normalize_hex);
|
/// Computes the strings preferred quotes and normalizes its content.
|
||||||
|
pub(crate) fn normalize<'a>(
|
||||||
|
&self,
|
||||||
|
string: &StringPart,
|
||||||
|
locator: &'a Locator,
|
||||||
|
) -> NormalizedString<'a> {
|
||||||
|
let raw_content = locator.slice(string.content_range());
|
||||||
|
|
||||||
|
let quotes = self.choose_quotes(string, locator);
|
||||||
|
|
||||||
|
let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);
|
||||||
|
|
||||||
NormalizedString {
|
NormalizedString {
|
||||||
prefix: self.prefix,
|
prefix: string.prefix(),
|
||||||
content_range: self.content_range,
|
content_range: string.content_range(),
|
||||||
text: normalized,
|
text: normalized,
|
||||||
quotes,
|
quotes,
|
||||||
}
|
}
|
||||||
|
@ -512,7 +554,7 @@ impl Format<PyFormatContext<'_>> for StringPrefix {
|
||||||
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
|
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
|
||||||
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
|
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
|
||||||
/// style is double quotes.
|
/// style is double quotes.
|
||||||
fn choose_quotes_raw(
|
fn choose_quotes_for_raw_string(
|
||||||
input: &str,
|
input: &str,
|
||||||
quotes: StringQuotes,
|
quotes: StringQuotes,
|
||||||
preferred_quote: QuoteChar,
|
preferred_quote: QuoteChar,
|
||||||
|
@ -571,7 +613,11 @@ fn choose_quotes_raw(
|
||||||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||||
/// used unless the string contains `"""`).
|
/// used unless the string contains `"""`).
|
||||||
fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) -> StringQuotes {
|
fn choose_quotes_impl(
|
||||||
|
input: &str,
|
||||||
|
quotes: StringQuotes,
|
||||||
|
preferred_quote: QuoteChar,
|
||||||
|
) -> StringQuotes {
|
||||||
let quote = if quotes.triple {
|
let quote = if quotes.triple {
|
||||||
// True if the string contains a triple quote sequence of the configured quote style.
|
// True if the string contains a triple quote sequence of the configured quote style.
|
||||||
let mut uses_triple_quotes = false;
|
let mut uses_triple_quotes = false;
|
||||||
|
@ -780,7 +826,7 @@ impl TryFrom<char> for QuoteChar {
|
||||||
/// with the provided [`StringQuotes`] style.
|
/// with the provided [`StringQuotes`] style.
|
||||||
///
|
///
|
||||||
/// Returns the normalized string and whether it contains new lines.
|
/// Returns the normalized string and whether it contains new lines.
|
||||||
fn normalize_string(
|
pub(crate) fn normalize_string(
|
||||||
input: &str,
|
input: &str,
|
||||||
quotes: StringQuotes,
|
quotes: StringQuotes,
|
||||||
prefix: StringPrefix,
|
prefix: StringPrefix,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue