mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 21:34:57 +00:00
Perf: Skip string normalization when possible (#10116)
This commit is contained in:
parent
15b87ea8be
commit
8dc22d5793
3 changed files with 170 additions and 88 deletions
|
@ -59,16 +59,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
let quotes = normalizer.choose_quotes(&string, &locator);
|
let quote_selection = normalizer.choose_quotes(&string, &locator);
|
||||||
|
|
||||||
let context = FStringContext::new(
|
let context = FStringContext::new(
|
||||||
string.prefix(),
|
string.prefix(),
|
||||||
quotes,
|
quote_selection.quotes(),
|
||||||
FStringLayout::from_f_string(self.value, &locator),
|
FStringLayout::from_f_string(self.value, &locator),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Starting prefix and quote
|
// Starting prefix and quote
|
||||||
write!(f, [string.prefix(), quotes])?;
|
write!(f, [string.prefix(), quote_selection.quotes()])?;
|
||||||
|
|
||||||
f.join()
|
f.join()
|
||||||
.entries(
|
.entries(
|
||||||
|
@ -80,7 +80,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||||
.finish()?;
|
.finish()?;
|
||||||
|
|
||||||
// Ending quote
|
// Ending quote
|
||||||
quotes.fmt(f)
|
quote_selection.quotes().fmt(f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@ impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
|
||||||
let literal_content = f.context().locator().slice(self.element.range());
|
let literal_content = f.context().locator().slice(self.element.range());
|
||||||
let normalized = normalize_string(
|
let normalized = normalize_string(
|
||||||
literal_content,
|
literal_content,
|
||||||
|
0,
|
||||||
self.context.quotes(),
|
self.context.quotes(),
|
||||||
self.context.prefix(),
|
self.context.prefix(),
|
||||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::iter::FusedIterator;
|
||||||
|
|
||||||
use ruff_formatter::FormatContext;
|
use ruff_formatter::FormatContext;
|
||||||
use ruff_source_file::Locator;
|
use ruff_source_file::Locator;
|
||||||
|
@ -44,8 +45,44 @@ impl StringNormalizer {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||||
|
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||||
|
// If we're inside an f-string, we need to make sure to preserve the
|
||||||
|
// existing quotes unless we're inside a triple-quoted f-string and
|
||||||
|
// the inner string itself isn't triple-quoted. For example:
|
||||||
|
//
|
||||||
|
// ```python
|
||||||
|
// f"""outer {"inner"}""" # Valid
|
||||||
|
// f"""outer {"""inner"""}""" # Invalid
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// Or, if the target version supports PEP 701.
|
||||||
|
//
|
||||||
|
// The reason to preserve the quotes is based on the assumption that
|
||||||
|
// the original f-string is valid in terms of quoting, and we don't
|
||||||
|
// want to change that to make it invalid.
|
||||||
|
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||||
|
|| self.target_version.supports_pep_701()
|
||||||
|
{
|
||||||
|
self.quoting
|
||||||
|
} else {
|
||||||
|
Quoting::Preserve
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.quoting
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Computes the strings preferred quotes.
|
/// Computes the strings preferred quotes.
|
||||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
|
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||||
|
let raw_content = locator.slice(string.content_range());
|
||||||
|
let first_quote_or_normalized_char_offset = raw_content
|
||||||
|
.bytes()
|
||||||
|
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||||
|
|
||||||
|
let quotes = match self.quoting(string) {
|
||||||
|
Quoting::Preserve => string.quotes(),
|
||||||
|
Quoting::CanChange => {
|
||||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||||
// Except when using quote-style-preserve.
|
// Except when using quote-style-preserve.
|
||||||
let preferred_style = if string.quotes().triple {
|
let preferred_style = if string.quotes().triple {
|
||||||
|
@ -105,46 +142,38 @@ impl StringNormalizer {
|
||||||
self.preferred_quote_style
|
self.preferred_quote_style
|
||||||
};
|
};
|
||||||
|
|
||||||
let quoting = if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||||
// If we're inside an f-string, we need to make sure to preserve the
|
if let Some(first_quote_or_normalized_char_offset) =
|
||||||
// existing quotes unless we're inside a triple-quoted f-string and
|
first_quote_or_normalized_char_offset
|
||||||
// the inner string itself isn't triple-quoted. For example:
|
|
||||||
//
|
|
||||||
// ```python
|
|
||||||
// f"""outer {"inner"}""" # Valid
|
|
||||||
// f"""outer {"""inner"""}""" # Invalid
|
|
||||||
// ```
|
|
||||||
//
|
|
||||||
// Or, if the target version supports PEP 701.
|
|
||||||
//
|
|
||||||
// The reason to preserve the quotes is based on the assumption that
|
|
||||||
// the original f-string is valid in terms of quoting, and we don't
|
|
||||||
// want to change that to make it invalid.
|
|
||||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
|
||||||
|| self.target_version.supports_pep_701()
|
|
||||||
{
|
{
|
||||||
self.quoting
|
if string.prefix().is_raw_string() {
|
||||||
|
choose_quotes_for_raw_string(
|
||||||
|
&raw_content[first_quote_or_normalized_char_offset..],
|
||||||
|
string.quotes(),
|
||||||
|
preferred_quote,
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
Quoting::Preserve
|
choose_quotes_impl(
|
||||||
|
&raw_content[first_quote_or_normalized_char_offset..],
|
||||||
|
string.quotes(),
|
||||||
|
preferred_quote,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.quoting
|
StringQuotes {
|
||||||
};
|
quote_char: preferred_quote,
|
||||||
|
triple: string.quotes().is_triple(),
|
||||||
match quoting {
|
}
|
||||||
Quoting::Preserve => string.quotes(),
|
|
||||||
Quoting::CanChange => {
|
|
||||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
|
||||||
let raw_content = locator.slice(string.content_range());
|
|
||||||
if string.prefix().is_raw_string() {
|
|
||||||
choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
|
|
||||||
} else {
|
|
||||||
choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
string.quotes()
|
string.quotes()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
QuoteSelection {
|
||||||
|
quotes,
|
||||||
|
first_quote_or_normalized_char_offset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,25 +185,48 @@ impl StringNormalizer {
|
||||||
) -> NormalizedString<'a> {
|
) -> NormalizedString<'a> {
|
||||||
let raw_content = locator.slice(string.content_range());
|
let raw_content = locator.slice(string.content_range());
|
||||||
|
|
||||||
let quotes = self.choose_quotes(string, locator);
|
let quote_selection = self.choose_quotes(string, locator);
|
||||||
|
|
||||||
let normalized = normalize_string(
|
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||||
|
quote_selection.first_quote_or_normalized_char_offset
|
||||||
|
{
|
||||||
|
normalize_string(
|
||||||
raw_content,
|
raw_content,
|
||||||
quotes,
|
first_quote_or_escape_offset,
|
||||||
|
quote_selection.quotes,
|
||||||
string.prefix(),
|
string.prefix(),
|
||||||
self.normalize_hex,
|
self.normalize_hex,
|
||||||
|
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||||
|
// `format_fstring` preview style
|
||||||
self.format_fstring,
|
self.format_fstring,
|
||||||
);
|
)
|
||||||
|
} else {
|
||||||
|
Cow::Borrowed(raw_content)
|
||||||
|
};
|
||||||
|
|
||||||
NormalizedString {
|
NormalizedString {
|
||||||
prefix: string.prefix(),
|
prefix: string.prefix(),
|
||||||
content_range: string.content_range(),
|
content_range: string.content_range(),
|
||||||
text: normalized,
|
text: normalized,
|
||||||
quotes,
|
quotes: quote_selection.quotes,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct QuoteSelection {
|
||||||
|
quotes: StringQuotes,
|
||||||
|
|
||||||
|
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||||
|
first_quote_or_normalized_char_offset: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QuoteSelection {
|
||||||
|
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||||
|
self.quotes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct NormalizedString<'a> {
|
pub(crate) struct NormalizedString<'a> {
|
||||||
prefix: crate::string::StringPrefix,
|
prefix: crate::string::StringPrefix,
|
||||||
|
@ -399,6 +451,7 @@ fn choose_quotes_impl(
|
||||||
/// Returns the normalized string and whether it contains new lines.
|
/// Returns the normalized string and whether it contains new lines.
|
||||||
pub(crate) fn normalize_string(
|
pub(crate) fn normalize_string(
|
||||||
input: &str,
|
input: &str,
|
||||||
|
start_offset: usize,
|
||||||
quotes: StringQuotes,
|
quotes: StringQuotes,
|
||||||
prefix: StringPrefix,
|
prefix: StringPrefix,
|
||||||
normalize_hex: bool,
|
normalize_hex: bool,
|
||||||
|
@ -415,7 +468,7 @@ pub(crate) fn normalize_string(
|
||||||
let preferred_quote = quote.as_char();
|
let preferred_quote = quote.as_char();
|
||||||
let opposite_quote = quote.invert().as_char();
|
let opposite_quote = quote.invert().as_char();
|
||||||
|
|
||||||
let mut chars = input.char_indices().peekable();
|
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||||
|
|
||||||
let is_raw = prefix.is_raw_string();
|
let is_raw = prefix.is_raw_string();
|
||||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||||
|
@ -454,13 +507,11 @@ pub(crate) fn normalize_string(
|
||||||
// Skip over escaped backslashes
|
// Skip over escaped backslashes
|
||||||
chars.next();
|
chars.next();
|
||||||
} else if normalize_hex {
|
} else if normalize_hex {
|
||||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
|
||||||
.and_then(|escape| {
|
|
||||||
escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
|
|
||||||
})
|
|
||||||
{
|
|
||||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||||
|
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||||
|
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||||
|
{
|
||||||
let escape_start_offset = index + escape_start_len;
|
let escape_start_offset = index + escape_start_len;
|
||||||
if let Cow::Owned(normalised) = &normalised {
|
if let Cow::Owned(normalised) = &normalised {
|
||||||
output.push_str(&input[last_index..escape_start_offset]);
|
output.push_str(&input[last_index..escape_start_offset]);
|
||||||
|
@ -510,6 +561,35 @@ pub(crate) fn normalize_string(
|
||||||
normalized
|
normalized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct CharIndicesWithOffset<'str> {
|
||||||
|
chars: std::str::Chars<'str>,
|
||||||
|
next_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'str> CharIndicesWithOffset<'str> {
|
||||||
|
fn new(input: &'str str, start_offset: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
chars: input[start_offset..].chars(),
|
||||||
|
next_offset: start_offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'str> Iterator for CharIndicesWithOffset<'str> {
|
||||||
|
type Item = (usize, char);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.chars.next().map(|c| {
|
||||||
|
let index = self.next_offset;
|
||||||
|
self.next_offset += c.len_utf8();
|
||||||
|
(index, c)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for CharIndicesWithOffset<'_> {}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
enum UnicodeEscape {
|
enum UnicodeEscape {
|
||||||
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
||||||
|
@ -651,6 +731,7 @@ mod tests {
|
||||||
|
|
||||||
let normalized = normalize_string(
|
let normalized = normalize_string(
|
||||||
input,
|
input,
|
||||||
|
0,
|
||||||
StringQuotes {
|
StringQuotes {
|
||||||
triple: false,
|
triple: false,
|
||||||
quote_char: QuoteChar::Double,
|
quote_char: QuoteChar::Double,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue