mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:24:57 +00:00
Perf: Skip string normalization when possible (#10116)
This commit is contained in:
parent
15b87ea8be
commit
8dc22d5793
3 changed files with 170 additions and 88 deletions
|
@ -59,16 +59,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
let quotes = normalizer.choose_quotes(&string, &locator);
|
let quote_selection = normalizer.choose_quotes(&string, &locator);
|
||||||
|
|
||||||
let context = FStringContext::new(
|
let context = FStringContext::new(
|
||||||
string.prefix(),
|
string.prefix(),
|
||||||
quotes,
|
quote_selection.quotes(),
|
||||||
FStringLayout::from_f_string(self.value, &locator),
|
FStringLayout::from_f_string(self.value, &locator),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Starting prefix and quote
|
// Starting prefix and quote
|
||||||
write!(f, [string.prefix(), quotes])?;
|
write!(f, [string.prefix(), quote_selection.quotes()])?;
|
||||||
|
|
||||||
f.join()
|
f.join()
|
||||||
.entries(
|
.entries(
|
||||||
|
@ -80,7 +80,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||||
.finish()?;
|
.finish()?;
|
||||||
|
|
||||||
// Ending quote
|
// Ending quote
|
||||||
quotes.fmt(f)
|
quote_selection.quotes().fmt(f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@ impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
|
||||||
let literal_content = f.context().locator().slice(self.element.range());
|
let literal_content = f.context().locator().slice(self.element.range());
|
||||||
let normalized = normalize_string(
|
let normalized = normalize_string(
|
||||||
literal_content,
|
literal_content,
|
||||||
|
0,
|
||||||
self.context.quotes(),
|
self.context.quotes(),
|
||||||
self.context.prefix(),
|
self.context.prefix(),
|
||||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::iter::FusedIterator;
|
||||||
|
|
||||||
use ruff_formatter::FormatContext;
|
use ruff_formatter::FormatContext;
|
||||||
use ruff_source_file::Locator;
|
use ruff_source_file::Locator;
|
||||||
|
@ -44,68 +45,8 @@ impl StringNormalizer {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the strings preferred quotes.
|
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
|
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
|
||||||
// Except when using quote-style-preserve.
|
|
||||||
let preferred_style = if string.quotes().triple {
|
|
||||||
// ... unless we're formatting a code snippet inside a docstring,
|
|
||||||
// then we specifically want to invert our quote style to avoid
|
|
||||||
// writing out invalid Python.
|
|
||||||
//
|
|
||||||
// It's worth pointing out that we can actually wind up being
|
|
||||||
// somewhat out of sync with PEP8 in this case. Consider this
|
|
||||||
// example:
|
|
||||||
//
|
|
||||||
// def foo():
|
|
||||||
// '''
|
|
||||||
// Something.
|
|
||||||
//
|
|
||||||
// >>> """tricksy"""
|
|
||||||
// '''
|
|
||||||
// pass
|
|
||||||
//
|
|
||||||
// Ideally, this would be reformatted as:
|
|
||||||
//
|
|
||||||
// def foo():
|
|
||||||
// """
|
|
||||||
// Something.
|
|
||||||
//
|
|
||||||
// >>> '''tricksy'''
|
|
||||||
// """
|
|
||||||
// pass
|
|
||||||
//
|
|
||||||
// But the logic here results in the original quoting being
|
|
||||||
// preserved. This is because the quoting style of the outer
|
|
||||||
// docstring is determined, in part, by looking at its contents. In
|
|
||||||
// this case, it notices that it contains a `"""` and thus infers
|
|
||||||
// that using `'''` would overall read better because it avoids
|
|
||||||
// the need to escape the interior `"""`. Except... in this case,
|
|
||||||
// the `"""` is actually part of a code snippet that could get
|
|
||||||
// reformatted to using a different quoting style itself.
|
|
||||||
//
|
|
||||||
// Fixing this would, I believe, require some fairly seismic
|
|
||||||
// changes to how formatting strings works. Namely, we would need
|
|
||||||
// to look for code snippets before normalizing the docstring, and
|
|
||||||
// then figure out the quoting style more holistically by looking
|
|
||||||
// at the various kinds of quotes used in the code snippets and
|
|
||||||
// what reformatting them might look like.
|
|
||||||
//
|
|
||||||
// Overall this is a bit of a corner case and just inverting the
|
|
||||||
// style from what the parent ultimately decided upon works, even
|
|
||||||
// if it doesn't have perfect alignment with PEP8.
|
|
||||||
if let Some(quote) = self.parent_docstring_quote_char {
|
|
||||||
QuoteStyle::from(quote.invert())
|
|
||||||
} else if self.preferred_quote_style.is_preserve() {
|
|
||||||
QuoteStyle::Preserve
|
|
||||||
} else {
|
|
||||||
QuoteStyle::Double
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.preferred_quote_style
|
|
||||||
};
|
|
||||||
|
|
||||||
let quoting = if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
|
||||||
// If we're inside an f-string, we need to make sure to preserve the
|
// If we're inside an f-string, we need to make sure to preserve the
|
||||||
// existing quotes unless we're inside a triple-quoted f-string and
|
// existing quotes unless we're inside a triple-quoted f-string and
|
||||||
// the inner string itself isn't triple-quoted. For example:
|
// the inner string itself isn't triple-quoted. For example:
|
||||||
|
@ -129,22 +70,110 @@ impl StringNormalizer {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.quoting
|
self.quoting
|
||||||
};
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match quoting {
|
/// Computes the strings preferred quotes.
|
||||||
|
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||||
|
let raw_content = locator.slice(string.content_range());
|
||||||
|
let first_quote_or_normalized_char_offset = raw_content
|
||||||
|
.bytes()
|
||||||
|
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||||
|
|
||||||
|
let quotes = match self.quoting(string) {
|
||||||
Quoting::Preserve => string.quotes(),
|
Quoting::Preserve => string.quotes(),
|
||||||
Quoting::CanChange => {
|
Quoting::CanChange => {
|
||||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||||
let raw_content = locator.slice(string.content_range());
|
// Except when using quote-style-preserve.
|
||||||
if string.prefix().is_raw_string() {
|
let preferred_style = if string.quotes().triple {
|
||||||
choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
|
// ... unless we're formatting a code snippet inside a docstring,
|
||||||
|
// then we specifically want to invert our quote style to avoid
|
||||||
|
// writing out invalid Python.
|
||||||
|
//
|
||||||
|
// It's worth pointing out that we can actually wind up being
|
||||||
|
// somewhat out of sync with PEP8 in this case. Consider this
|
||||||
|
// example:
|
||||||
|
//
|
||||||
|
// def foo():
|
||||||
|
// '''
|
||||||
|
// Something.
|
||||||
|
//
|
||||||
|
// >>> """tricksy"""
|
||||||
|
// '''
|
||||||
|
// pass
|
||||||
|
//
|
||||||
|
// Ideally, this would be reformatted as:
|
||||||
|
//
|
||||||
|
// def foo():
|
||||||
|
// """
|
||||||
|
// Something.
|
||||||
|
//
|
||||||
|
// >>> '''tricksy'''
|
||||||
|
// """
|
||||||
|
// pass
|
||||||
|
//
|
||||||
|
// But the logic here results in the original quoting being
|
||||||
|
// preserved. This is because the quoting style of the outer
|
||||||
|
// docstring is determined, in part, by looking at its contents. In
|
||||||
|
// this case, it notices that it contains a `"""` and thus infers
|
||||||
|
// that using `'''` would overall read better because it avoids
|
||||||
|
// the need to escape the interior `"""`. Except... in this case,
|
||||||
|
// the `"""` is actually part of a code snippet that could get
|
||||||
|
// reformatted to using a different quoting style itself.
|
||||||
|
//
|
||||||
|
// Fixing this would, I believe, require some fairly seismic
|
||||||
|
// changes to how formatting strings works. Namely, we would need
|
||||||
|
// to look for code snippets before normalizing the docstring, and
|
||||||
|
// then figure out the quoting style more holistically by looking
|
||||||
|
// at the various kinds of quotes used in the code snippets and
|
||||||
|
// what reformatting them might look like.
|
||||||
|
//
|
||||||
|
// Overall this is a bit of a corner case and just inverting the
|
||||||
|
// style from what the parent ultimately decided upon works, even
|
||||||
|
// if it doesn't have perfect alignment with PEP8.
|
||||||
|
if let Some(quote) = self.parent_docstring_quote_char {
|
||||||
|
QuoteStyle::from(quote.invert())
|
||||||
|
} else if self.preferred_quote_style.is_preserve() {
|
||||||
|
QuoteStyle::Preserve
|
||||||
} else {
|
} else {
|
||||||
choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
|
QuoteStyle::Double
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.preferred_quote_style
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||||
|
if let Some(first_quote_or_normalized_char_offset) =
|
||||||
|
first_quote_or_normalized_char_offset
|
||||||
|
{
|
||||||
|
if string.prefix().is_raw_string() {
|
||||||
|
choose_quotes_for_raw_string(
|
||||||
|
&raw_content[first_quote_or_normalized_char_offset..],
|
||||||
|
string.quotes(),
|
||||||
|
preferred_quote,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
choose_quotes_impl(
|
||||||
|
&raw_content[first_quote_or_normalized_char_offset..],
|
||||||
|
string.quotes(),
|
||||||
|
preferred_quote,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
StringQuotes {
|
||||||
|
quote_char: preferred_quote,
|
||||||
|
triple: string.quotes().is_triple(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
string.quotes()
|
string.quotes()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
QuoteSelection {
|
||||||
|
quotes,
|
||||||
|
first_quote_or_normalized_char_offset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,25 +185,48 @@ impl StringNormalizer {
|
||||||
) -> NormalizedString<'a> {
|
) -> NormalizedString<'a> {
|
||||||
let raw_content = locator.slice(string.content_range());
|
let raw_content = locator.slice(string.content_range());
|
||||||
|
|
||||||
let quotes = self.choose_quotes(string, locator);
|
let quote_selection = self.choose_quotes(string, locator);
|
||||||
|
|
||||||
let normalized = normalize_string(
|
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||||
raw_content,
|
quote_selection.first_quote_or_normalized_char_offset
|
||||||
quotes,
|
{
|
||||||
string.prefix(),
|
normalize_string(
|
||||||
self.normalize_hex,
|
raw_content,
|
||||||
self.format_fstring,
|
first_quote_or_escape_offset,
|
||||||
);
|
quote_selection.quotes,
|
||||||
|
string.prefix(),
|
||||||
|
self.normalize_hex,
|
||||||
|
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||||
|
// `format_fstring` preview style
|
||||||
|
self.format_fstring,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
Cow::Borrowed(raw_content)
|
||||||
|
};
|
||||||
|
|
||||||
NormalizedString {
|
NormalizedString {
|
||||||
prefix: string.prefix(),
|
prefix: string.prefix(),
|
||||||
content_range: string.content_range(),
|
content_range: string.content_range(),
|
||||||
text: normalized,
|
text: normalized,
|
||||||
quotes,
|
quotes: quote_selection.quotes,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct QuoteSelection {
|
||||||
|
quotes: StringQuotes,
|
||||||
|
|
||||||
|
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||||
|
first_quote_or_normalized_char_offset: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QuoteSelection {
|
||||||
|
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||||
|
self.quotes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct NormalizedString<'a> {
|
pub(crate) struct NormalizedString<'a> {
|
||||||
prefix: crate::string::StringPrefix,
|
prefix: crate::string::StringPrefix,
|
||||||
|
@ -399,6 +451,7 @@ fn choose_quotes_impl(
|
||||||
/// Returns the normalized string and whether it contains new lines.
|
/// Returns the normalized string and whether it contains new lines.
|
||||||
pub(crate) fn normalize_string(
|
pub(crate) fn normalize_string(
|
||||||
input: &str,
|
input: &str,
|
||||||
|
start_offset: usize,
|
||||||
quotes: StringQuotes,
|
quotes: StringQuotes,
|
||||||
prefix: StringPrefix,
|
prefix: StringPrefix,
|
||||||
normalize_hex: bool,
|
normalize_hex: bool,
|
||||||
|
@ -415,7 +468,7 @@ pub(crate) fn normalize_string(
|
||||||
let preferred_quote = quote.as_char();
|
let preferred_quote = quote.as_char();
|
||||||
let opposite_quote = quote.invert().as_char();
|
let opposite_quote = quote.invert().as_char();
|
||||||
|
|
||||||
let mut chars = input.char_indices().peekable();
|
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||||
|
|
||||||
let is_raw = prefix.is_raw_string();
|
let is_raw = prefix.is_raw_string();
|
||||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||||
|
@ -454,13 +507,11 @@ pub(crate) fn normalize_string(
|
||||||
// Skip over escaped backslashes
|
// Skip over escaped backslashes
|
||||||
chars.next();
|
chars.next();
|
||||||
} else if normalize_hex {
|
} else if normalize_hex {
|
||||||
|
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||||
|
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||||
.and_then(|escape| {
|
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||||
escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
|
|
||||||
})
|
|
||||||
{
|
{
|
||||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
|
||||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
|
||||||
let escape_start_offset = index + escape_start_len;
|
let escape_start_offset = index + escape_start_len;
|
||||||
if let Cow::Owned(normalised) = &normalised {
|
if let Cow::Owned(normalised) = &normalised {
|
||||||
output.push_str(&input[last_index..escape_start_offset]);
|
output.push_str(&input[last_index..escape_start_offset]);
|
||||||
|
@ -510,6 +561,35 @@ pub(crate) fn normalize_string(
|
||||||
normalized
|
normalized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct CharIndicesWithOffset<'str> {
|
||||||
|
chars: std::str::Chars<'str>,
|
||||||
|
next_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'str> CharIndicesWithOffset<'str> {
|
||||||
|
fn new(input: &'str str, start_offset: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
chars: input[start_offset..].chars(),
|
||||||
|
next_offset: start_offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'str> Iterator for CharIndicesWithOffset<'str> {
|
||||||
|
type Item = (usize, char);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.chars.next().map(|c| {
|
||||||
|
let index = self.next_offset;
|
||||||
|
self.next_offset += c.len_utf8();
|
||||||
|
(index, c)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for CharIndicesWithOffset<'_> {}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
enum UnicodeEscape {
|
enum UnicodeEscape {
|
||||||
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
||||||
|
@ -651,6 +731,7 @@ mod tests {
|
||||||
|
|
||||||
let normalized = normalize_string(
|
let normalized = normalize_string(
|
||||||
input,
|
input,
|
||||||
|
0,
|
||||||
StringQuotes {
|
StringQuotes {
|
||||||
triple: false,
|
triple: false,
|
||||||
quote_char: QuoteChar::Double,
|
quote_char: QuoteChar::Double,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue