mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:24:57 +00:00
Perf: Skip string normalization when possible (#10116)
This commit is contained in:
parent
15b87ea8be
commit
8dc22d5793
3 changed files with 170 additions and 88 deletions
|
@ -59,16 +59,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
return result;
|
||||
}
|
||||
|
||||
let quotes = normalizer.choose_quotes(&string, &locator);
|
||||
let quote_selection = normalizer.choose_quotes(&string, &locator);
|
||||
|
||||
let context = FStringContext::new(
|
||||
string.prefix(),
|
||||
quotes,
|
||||
quote_selection.quotes(),
|
||||
FStringLayout::from_f_string(self.value, &locator),
|
||||
);
|
||||
|
||||
// Starting prefix and quote
|
||||
write!(f, [string.prefix(), quotes])?;
|
||||
write!(f, [string.prefix(), quote_selection.quotes()])?;
|
||||
|
||||
f.join()
|
||||
.entries(
|
||||
|
@ -80,7 +80,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
.finish()?;
|
||||
|
||||
// Ending quote
|
||||
quotes.fmt(f)
|
||||
quote_selection.quotes().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
|
|||
let literal_content = f.context().locator().slice(self.element.range());
|
||||
let normalized = normalize_string(
|
||||
literal_content,
|
||||
0,
|
||||
self.context.quotes(),
|
||||
self.context.prefix(),
|
||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use std::borrow::Cow;
|
||||
use std::iter::FusedIterator;
|
||||
|
||||
use ruff_formatter::FormatContext;
|
||||
use ruff_source_file::Locator;
|
||||
|
@ -44,8 +45,44 @@ impl StringNormalizer {
|
|||
self
|
||||
}
|
||||
|
||||
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
// the inner string itself isn't triple-quoted. For example:
|
||||
//
|
||||
// ```python
|
||||
// f"""outer {"inner"}""" # Valid
|
||||
// f"""outer {"""inner"""}""" # Invalid
|
||||
// ```
|
||||
//
|
||||
// Or, if the target version supports PEP 701.
|
||||
//
|
||||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||
|| self.target_version.supports_pep_701()
|
||||
{
|
||||
self.quoting
|
||||
} else {
|
||||
Quoting::Preserve
|
||||
}
|
||||
} else {
|
||||
self.quoting
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the strings preferred quotes.
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
let first_quote_or_normalized_char_offset = raw_content
|
||||
.bytes()
|
||||
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||
|
||||
let quotes = match self.quoting(string) {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
Quoting::CanChange => {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if string.quotes().triple {
|
||||
|
@ -105,46 +142,38 @@ impl StringNormalizer {
|
|||
self.preferred_quote_style
|
||||
};
|
||||
|
||||
let quoting = if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
// the inner string itself isn't triple-quoted. For example:
|
||||
//
|
||||
// ```python
|
||||
// f"""outer {"inner"}""" # Valid
|
||||
// f"""outer {"""inner"""}""" # Invalid
|
||||
// ```
|
||||
//
|
||||
// Or, if the target version supports PEP 701.
|
||||
//
|
||||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||
|| self.target_version.supports_pep_701()
|
||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||
if let Some(first_quote_or_normalized_char_offset) =
|
||||
first_quote_or_normalized_char_offset
|
||||
{
|
||||
self.quoting
|
||||
if string.prefix().is_raw_string() {
|
||||
choose_quotes_for_raw_string(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
preferred_quote,
|
||||
)
|
||||
} else {
|
||||
Quoting::Preserve
|
||||
choose_quotes_impl(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
preferred_quote,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
self.quoting
|
||||
};
|
||||
|
||||
match quoting {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
Quoting::CanChange => {
|
||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
if string.prefix().is_raw_string() {
|
||||
choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
|
||||
} else {
|
||||
choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
|
||||
StringQuotes {
|
||||
quote_char: preferred_quote,
|
||||
triple: string.quotes().is_triple(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
string.quotes()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
QuoteSelection {
|
||||
quotes,
|
||||
first_quote_or_normalized_char_offset,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,25 +185,48 @@ impl StringNormalizer {
|
|||
) -> NormalizedString<'a> {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
|
||||
let quotes = self.choose_quotes(string, locator);
|
||||
let quote_selection = self.choose_quotes(string, locator);
|
||||
|
||||
let normalized = normalize_string(
|
||||
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||
quote_selection.first_quote_or_normalized_char_offset
|
||||
{
|
||||
normalize_string(
|
||||
raw_content,
|
||||
quotes,
|
||||
first_quote_or_escape_offset,
|
||||
quote_selection.quotes,
|
||||
string.prefix(),
|
||||
self.normalize_hex,
|
||||
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||
// `format_fstring` preview style
|
||||
self.format_fstring,
|
||||
);
|
||||
)
|
||||
} else {
|
||||
Cow::Borrowed(raw_content)
|
||||
};
|
||||
|
||||
NormalizedString {
|
||||
prefix: string.prefix(),
|
||||
content_range: string.content_range(),
|
||||
text: normalized,
|
||||
quotes,
|
||||
quotes: quote_selection.quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QuoteSelection {
|
||||
quotes: StringQuotes,
|
||||
|
||||
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||
first_quote_or_normalized_char_offset: Option<usize>,
|
||||
}
|
||||
|
||||
impl QuoteSelection {
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: crate::string::StringPrefix,
|
||||
|
@ -399,6 +451,7 @@ fn choose_quotes_impl(
|
|||
/// Returns the normalized string and whether it contains new lines.
|
||||
pub(crate) fn normalize_string(
|
||||
input: &str,
|
||||
start_offset: usize,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
normalize_hex: bool,
|
||||
|
@ -415,7 +468,7 @@ pub(crate) fn normalize_string(
|
|||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.invert().as_char();
|
||||
|
||||
let mut chars = input.char_indices().peekable();
|
||||
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||
|
@ -454,13 +507,11 @@ pub(crate) fn normalize_string(
|
|||
// Skip over escaped backslashes
|
||||
chars.next();
|
||||
} else if normalize_hex {
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
.and_then(|escape| {
|
||||
escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
|
||||
})
|
||||
{
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||
{
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
if let Cow::Owned(normalised) = &normalised {
|
||||
output.push_str(&input[last_index..escape_start_offset]);
|
||||
|
@ -510,6 +561,35 @@ pub(crate) fn normalize_string(
|
|||
normalized
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct CharIndicesWithOffset<'str> {
|
||||
chars: std::str::Chars<'str>,
|
||||
next_offset: usize,
|
||||
}
|
||||
|
||||
impl<'str> CharIndicesWithOffset<'str> {
|
||||
fn new(input: &'str str, start_offset: usize) -> Self {
|
||||
Self {
|
||||
chars: input[start_offset..].chars(),
|
||||
next_offset: start_offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'str> Iterator for CharIndicesWithOffset<'str> {
|
||||
type Item = (usize, char);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.chars.next().map(|c| {
|
||||
let index = self.next_offset;
|
||||
self.next_offset += c.len_utf8();
|
||||
(index, c)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedIterator for CharIndicesWithOffset<'_> {}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
enum UnicodeEscape {
|
||||
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
||||
|
@ -651,6 +731,7 @@ mod tests {
|
|||
|
||||
let normalized = normalize_string(
|
||||
input,
|
||||
0,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: QuoteChar::Double,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue