split string module (#9987)

2025-09-26 11:59:10 +00:00 · 2024-02-14 18:54:55 +01:00 · 2024-02-14 18:54:55 +01:00 · fe79798c12
commit fe79798c12
parent bb8d2034e2
4 changed files with 847 additions and 813 deletions
--- a/crates/ruff_python_formatter/src/string/any.rs
+++ b/crates/ruff_python_formatter/src/string/any.rs
@ -0,0 +1,212 @@
 use std::iter::FusedIterator;
 use memchr::memchr2;
 use ruff_python_ast::{
    self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
    StringLiteral,
 };
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextLen, TextRange};
 use crate::expression::expr_f_string::f_string_quoting;
 use crate::other::f_string::FormatFString;
 use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
 use crate::prelude::*;
 use crate::string::{Quoting, StringPrefix, StringQuotes};
 /// Represents any kind of string expression. This could be either a string,
 /// bytes or f-string.
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum AnyString<'a> {
    String(&'a ExprStringLiteral),
    Bytes(&'a ExprBytesLiteral),
    FString(&'a ExprFString),
 }
 impl<'a> AnyString<'a> {
    /// Creates a new [`AnyString`] from the given [`Expr`].
    ///
    /// Returns `None` if the expression is not either a string, bytes or f-string.
    pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
        match expression {
            Expr::StringLiteral(string) => Some(AnyString::String(string)),
            Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
            Expr::FString(fstring) => Some(AnyString::FString(fstring)),
            _ => None,
        }
    }
    /// Returns `true` if the string is implicitly concatenated.
    pub(crate) fn is_implicit_concatenated(self) -> bool {
        match self {
            Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
            Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
            Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
        }
    }
    /// Returns the quoting to be used for this string.
    pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
        match self {
            Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
            Self::FString(f_string) => f_string_quoting(f_string, locator),
        }
    }
    /// Returns a vector of all the [`AnyStringPart`] of this string.
    pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
        match self {
            Self::String(ExprStringLiteral { value, .. }) => {
                AnyStringPartsIter::String(value.iter())
            }
            Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
            Self::FString(ExprFString { value, .. }) => {
                AnyStringPartsIter::FString(value.iter(), quoting)
            }
        }
    }
    pub(crate) fn is_multiline(self, source: &str) -> bool {
        match self {
            AnyString::String(_) | AnyString::Bytes(_) => {
                let contents = &source[self.range()];
                let prefix = StringPrefix::parse(contents);
                let quotes = StringQuotes::parse(
                    &contents[TextRange::new(prefix.text_len(), contents.text_len())],
                );
                quotes.is_some_and(StringQuotes::is_triple)
                    && memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
            }
            AnyString::FString(fstring) => {
                memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
            }
        }
    }
 }
 impl Ranged for AnyString<'_> {
    fn range(&self) -> TextRange {
        match self {
            Self::String(expr) => expr.range(),
            Self::Bytes(expr) => expr.range(),
            Self::FString(expr) => expr.range(),
        }
    }
 }
 impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
    fn from(value: &AnyString<'a>) -> Self {
        match value {
            AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
            AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
            AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
        }
    }
 }
 impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
    fn from(value: AnyString<'a>) -> Self {
        AnyNodeRef::from(&value)
    }
 }
 impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
    fn from(value: &AnyString<'a>) -> Self {
        match value {
            AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
            AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
            AnyString::FString(expr) => ExpressionRef::FString(expr),
        }
    }
 }
 pub(super) enum AnyStringPartsIter<'a> {
    String(std::slice::Iter<'a, StringLiteral>),
    Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
    FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
 }
 impl<'a> Iterator for AnyStringPartsIter<'a> {
    type Item = AnyStringPart<'a>;
    fn next(&mut self) -> Option<Self::Item> {
        let part = match self {
            Self::String(inner) => {
                let part = inner.next()?;
                AnyStringPart::String {
                    part,
                    layout: StringLiteralKind::String,
                }
            }
            Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
            Self::FString(inner, quoting) => {
                let part = inner.next()?;
                match part {
                    ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
                        part: string_literal,
                        layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
                    },
                    ast::FStringPart::FString(f_string) => AnyStringPart::FString {
                        part: f_string,
                        quoting: *quoting,
                    },
                }
            }
        };
        Some(part)
    }
 }
 impl FusedIterator for AnyStringPartsIter<'_> {}
 /// Represents any kind of string which is part of an implicitly concatenated
 /// string. This could be either a string, bytes or f-string.
 ///
 /// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
 #[derive(Clone, Debug)]
 pub(super) enum AnyStringPart<'a> {
    String {
        part: &'a ast::StringLiteral,
        layout: StringLiteralKind,
    },
    Bytes(&'a ast::BytesLiteral),
    FString {
        part: &'a ast::FString,
        quoting: Quoting,
    },
 }
 impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
    fn from(value: &AnyStringPart<'a>) -> Self {
        match value {
            AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
            AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
            AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
        }
    }
 }
 impl Ranged for AnyStringPart<'_> {
    fn range(&self) -> TextRange {
        match self {
            Self::String { part, .. } => part.range(),
            Self::Bytes(part) => part.range(),
            Self::FString { part, .. } => part.range(),
        }
    }
 }
 impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        match self {
            AnyStringPart::String { part, layout } => {
                FormatStringLiteral::new(part, *layout).fmt(f)
            }
            AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
            AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
        }
    }
 }
--- a/crates/ruff_python_formatter/src/string/docstring.rs
+++ b/crates/ruff_python_formatter/src/string/docstring.rs
@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
 /// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
 /// that use spaces for alignment.
 pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
-    let docstring = &normalized.text;
+    let docstring = &normalized.text();
    // Black doesn't change the indentation of docstrings that contain an escaped newline
    if contains_unescaped_newline(docstring) {
@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
    let mut lines = docstring.split('\n').peekable();
    // Start the string
-    write!(f, [normalized.prefix, normalized.quotes])?;
+    write!(f, [normalized.prefix(), normalized.quotes()])?;
    // We track where in the source docstring we are (in source code byte offsets)
    let mut offset = normalized.start();
@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
    // Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
    // inner quotes and closing quotes from getting to close to avoid `""""content`
-    if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
+    if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
        space().fmt(f)?;
    }
@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
        {
            space().fmt(f)?;
        }
-        normalized.quotes.fmt(f)?;
+        normalized.quotes().fmt(f)?;
        return Ok(());
    }
@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
        offset,
        stripped_indentation,
        already_normalized,
-        quote_char: normalized.quotes.quote_char,
+        quote_char: normalized.quotes().quote_char,
        code_example: CodeExample::default(),
    }
    .add_iter(lines)?;
@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
        space().fmt(f)?;
    }
-    write!(f, [normalized.quotes])
+    write!(f, [normalized.quotes()])
 }
 fn contains_unescaped_newline(haystack: &str) -> bool {
@ -1569,7 +1569,7 @@ fn docstring_format_source(
 /// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
 /// so `content\\ """` doesn't need a space while `content\\\ """` does.
 fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
-    trim_end.ends_with(normalized.quotes.quote_char.as_char())
+    trim_end.ends_with(normalized.quotes().quote_char.as_char())
        || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
 }
--- a/crates/ruff_python_formatter/src/string/mod.rs
+++ b/crates/ruff_python_formatter/src/string/mod.rs
@ -1,27 +1,19 @@
 use std::borrow::Cow;
 use std::iter::FusedIterator;
 use bitflags::bitflags;
 use memchr::memchr2;
-use ruff_formatter::{format_args, write};
+pub(crate) use any::AnyString;
-use ruff_python_ast::{
+pub(crate) use normalize::{NormalizedString, StringNormalizer};
-    self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
+use ruff_formatter::format_args;
 };
 use ruff_python_ast::{AnyNodeRef, StringLiteral};
 use ruff_source_file::Locator;
-use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+use ruff_text_size::{TextLen, TextRange, TextSize};
 use crate::comments::{leading_comments, trailing_comments};
 use crate::expression::expr_f_string::f_string_quoting;
 use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
 use crate::other::f_string::FormatFString;
 use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
 use crate::prelude::*;
 use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
 use crate::QuoteStyle;
 mod any;
 pub(crate) mod docstring;
 mod normalize;
 #[derive(Copy, Clone, Debug, Default)]
 pub(crate) enum Quoting {
@ -30,202 +22,6 @@ pub(crate) enum Quoting {
    Preserve,
 }
 /// Represents any kind of string expression. This could be either a string,
 /// bytes or f-string.
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum AnyString<'a> {
    String(&'a ExprStringLiteral),
    Bytes(&'a ExprBytesLiteral),
    FString(&'a ExprFString),
 }
 impl<'a> AnyString<'a> {
    /// Creates a new [`AnyString`] from the given [`Expr`].
    ///
    /// Returns `None` if the expression is not either a string, bytes or f-string.
    pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
        match expression {
            Expr::StringLiteral(string) => Some(AnyString::String(string)),
            Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
            Expr::FString(fstring) => Some(AnyString::FString(fstring)),
            _ => None,
        }
    }
    /// Returns `true` if the string is implicitly concatenated.
    pub(crate) fn is_implicit_concatenated(self) -> bool {
        match self {
            Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
            Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
            Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
        }
    }
    /// Returns the quoting to be used for this string.
    fn quoting(self, locator: &Locator<'_>) -> Quoting {
        match self {
            Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
            Self::FString(f_string) => f_string_quoting(f_string, locator),
        }
    }
    /// Returns a vector of all the [`AnyStringPart`] of this string.
    fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
        match self {
            Self::String(ExprStringLiteral { value, .. }) => {
                AnyStringPartsIter::String(value.iter())
            }
            Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
            Self::FString(ExprFString { value, .. }) => {
                AnyStringPartsIter::FString(value.iter(), quoting)
            }
        }
    }
    pub(crate) fn is_multiline(self, source: &str) -> bool {
        match self {
            AnyString::String(_) | AnyString::Bytes(_) => {
                let contents = &source[self.range()];
                let prefix = StringPrefix::parse(contents);
                let quotes = StringQuotes::parse(
                    &contents[TextRange::new(prefix.text_len(), contents.text_len())],
                );
                quotes.is_some_and(StringQuotes::is_triple)
                    && memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
            }
            AnyString::FString(fstring) => {
                memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
            }
        }
    }
 }
 impl Ranged for AnyString<'_> {
    fn range(&self) -> TextRange {
        match self {
            Self::String(expr) => expr.range(),
            Self::Bytes(expr) => expr.range(),
            Self::FString(expr) => expr.range(),
        }
    }
 }
 impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
    fn from(value: &AnyString<'a>) -> Self {
        match value {
            AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
            AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
            AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
        }
    }
 }
 impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
    fn from(value: AnyString<'a>) -> Self {
        AnyNodeRef::from(&value)
    }
 }
 impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
    fn from(value: &AnyString<'a>) -> Self {
        match value {
            AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
            AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
            AnyString::FString(expr) => ExpressionRef::FString(expr),
        }
    }
 }
 enum AnyStringPartsIter<'a> {
    String(std::slice::Iter<'a, StringLiteral>),
    Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
    FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
 }
 impl<'a> Iterator for AnyStringPartsIter<'a> {
    type Item = AnyStringPart<'a>;
    fn next(&mut self) -> Option<Self::Item> {
        let part = match self {
            Self::String(inner) => {
                let part = inner.next()?;
                AnyStringPart::String {
                    part,
                    layout: StringLiteralKind::String,
                }
            }
            Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
            Self::FString(inner, quoting) => {
                let part = inner.next()?;
                match part {
                    ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
                        part: string_literal,
                        layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
                    },
                    ast::FStringPart::FString(f_string) => AnyStringPart::FString {
                        part: f_string,
                        quoting: *quoting,
                    },
                }
            }
        };
        Some(part)
    }
 }
 impl FusedIterator for AnyStringPartsIter<'_> {}
 /// Represents any kind of string which is part of an implicitly concatenated
 /// string. This could be either a string, bytes or f-string.
 ///
 /// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
 #[derive(Clone, Debug)]
 enum AnyStringPart<'a> {
    String {
        part: &'a ast::StringLiteral,
        layout: StringLiteralKind,
    },
    Bytes(&'a ast::BytesLiteral),
    FString {
        part: &'a ast::FString,
        quoting: Quoting,
    },
 }
 impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
    fn from(value: &AnyStringPart<'a>) -> Self {
        match value {
            AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
            AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
            AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
        }
    }
 }
 impl Ranged for AnyStringPart<'_> {
    fn range(&self) -> TextRange {
        match self {
            Self::String { part, .. } => part.range(),
            Self::Bytes(part) => part.range(),
            Self::FString { part, .. } => part.range(),
        }
    }
 }
 impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        match self {
            AnyStringPart::String { part, layout } => {
                FormatStringLiteral::new(part, *layout).fmt(f)
            }
            AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
            AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
        }
    }
 }
 /// Formats any implicitly concatenated string. This could be any valid combination
 /// of string, bytes or f-string literals.
 pub(crate) struct FormatStringContinuation<'a> {
@ -308,167 +104,6 @@ impl StringPart {
    }
 }
 pub(crate) struct StringNormalizer {
    quoting: Quoting,
    preferred_quote_style: QuoteStyle,
    parent_docstring_quote_char: Option<QuoteChar>,
    normalize_hex: bool,
 }
 impl StringNormalizer {
    pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
        Self {
            quoting: Quoting::default(),
            preferred_quote_style: QuoteStyle::default(),
            parent_docstring_quote_char: context.docstring(),
            normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
        }
    }
    pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
        self.preferred_quote_style = quote_style;
        self
    }
    pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
        self.quoting = quoting;
        self
    }
    /// Computes the strings preferred quotes.
    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
        // Per PEP 8, always prefer double quotes for triple-quoted strings.
        // Except when using quote-style-preserve.
        let preferred_style = if string.quotes().triple {
            // ... unless we're formatting a code snippet inside a docstring,
            // then we specifically want to invert our quote style to avoid
            // writing out invalid Python.
            //
            // It's worth pointing out that we can actually wind up being
            // somewhat out of sync with PEP8 in this case. Consider this
            // example:
            //
            //     def foo():
            //         '''
            //         Something.
            //
            //         >>> """tricksy"""
            //         '''
            //         pass
            //
            // Ideally, this would be reformatted as:
            //
            //     def foo():
            //         """
            //         Something.
            //
            //         >>> '''tricksy'''
            //         """
            //         pass
            //
            // But the logic here results in the original quoting being
            // preserved. This is because the quoting style of the outer
            // docstring is determined, in part, by looking at its contents. In
            // this case, it notices that it contains a `"""` and thus infers
            // that using `'''` would overall read better because it avoids
            // the need to escape the interior `"""`. Except... in this case,
            // the `"""` is actually part of a code snippet that could get
            // reformatted to using a different quoting style itself.
            //
            // Fixing this would, I believe, require some fairly seismic
            // changes to how formatting strings works. Namely, we would need
            // to look for code snippets before normalizing the docstring, and
            // then figure out the quoting style more holistically by looking
            // at the various kinds of quotes used in the code snippets and
            // what reformatting them might look like.
            //
            // Overall this is a bit of a corner case and just inverting the
            // style from what the parent ultimately decided upon works, even
            // if it doesn't have perfect alignment with PEP8.
            if let Some(quote) = self.parent_docstring_quote_char {
                QuoteStyle::from(quote.invert())
            } else if self.preferred_quote_style.is_preserve() {
                QuoteStyle::Preserve
            } else {
                QuoteStyle::Double
            }
        } else {
            self.preferred_quote_style
        };
        match self.quoting {
            Quoting::Preserve => string.quotes(),
            Quoting::CanChange => {
                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
                    let raw_content = locator.slice(string.content_range());
                    if string.prefix().is_raw_string() {
                        choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
                    } else {
                        choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
                    }
                } else {
                    string.quotes()
                }
            }
        }
    }
    /// Computes the strings preferred quotes and normalizes its content.
    pub(crate) fn normalize<'a>(
        &self,
        string: &StringPart,
        locator: &'a Locator,
    ) -> NormalizedString<'a> {
        let raw_content = locator.slice(string.content_range());
        let quotes = self.choose_quotes(string, locator);
        let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);
        NormalizedString {
            prefix: string.prefix(),
            content_range: string.content_range(),
            text: normalized,
            quotes,
        }
    }
 }
 #[derive(Debug)]
 pub(crate) struct NormalizedString<'a> {
    prefix: StringPrefix,
    /// The quotes of the normalized string (preferred quotes)
    quotes: StringQuotes,
    /// The range of the string's content in the source (minus prefix and quotes).
    content_range: TextRange,
    /// The normalized text
    text: Cow<'a, str>,
 }
 impl Ranged for NormalizedString<'_> {
    fn range(&self) -> TextRange {
        self.content_range
    }
 }
 impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
        write!(f, [self.prefix, self.quotes])?;
        match &self.text {
            Cow::Borrowed(_) => {
                source_text_slice(self.range()).fmt(f)?;
            }
            Cow::Owned(normalized) => {
                text(normalized).fmt(f)?;
            }
        }
        self.quotes.fmt(f)
    }
 }
 bitflags! {
    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
    pub(crate) struct StringPrefix: u8 {
@ -549,175 +184,6 @@ impl Format<PyFormatContext<'_>> for StringPrefix {
    }
 }
 /// Choose the appropriate quote style for a raw string.
 ///
 /// The preferred quote style is chosen unless the string contains unescaped quotes of the
 /// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
 /// style is double quotes.
 fn choose_quotes_for_raw_string(
    input: &str,
    quotes: StringQuotes,
    preferred_quote: QuoteChar,
 ) -> StringQuotes {
    let preferred_quote_char = preferred_quote.as_char();
    let mut chars = input.chars().peekable();
    let contains_unescaped_configured_quotes = loop {
        match chars.next() {
            Some('\\') => {
                // Ignore escaped characters
                chars.next();
            }
            // `"` or `'`
            Some(c) if c == preferred_quote_char => {
                if !quotes.triple {
                    break true;
                }
                match chars.peek() {
                    // We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
                    // about where the closing triple quotes start
                    None => break true,
                    Some(next) if *next == preferred_quote_char => {
                        // `""` or `''`
                        chars.next();
                        // We can't turn `r'''""'''` into `r""""""""`, nor can we have
                        // `"""` or `'''` respectively inside the string
                        if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
                            break true;
                        }
                    }
                    _ => {}
                }
            }
            Some(_) => continue,
            None => break false,
        }
    };
    StringQuotes {
        triple: quotes.triple,
        quote_char: if contains_unescaped_configured_quotes {
            quotes.quote_char
        } else {
            preferred_quote
        },
    }
 }
 /// Choose the appropriate quote style for a string.
 ///
 /// For single quoted strings, the preferred quote style is used, unless the alternative quote style
 /// would require fewer escapes.
 ///
 /// For triple quoted strings, the preferred quote style is always used, unless the string contains
 /// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
 /// used unless the string contains `"""`).
 fn choose_quotes_impl(
    input: &str,
    quotes: StringQuotes,
    preferred_quote: QuoteChar,
 ) -> StringQuotes {
    let quote = if quotes.triple {
        // True if the string contains a triple quote sequence of the configured quote style.
        let mut uses_triple_quotes = false;
        let mut chars = input.chars().peekable();
        while let Some(c) = chars.next() {
            let preferred_quote_char = preferred_quote.as_char();
            match c {
                '\\' => {
                    if matches!(chars.peek(), Some('"' | '\\')) {
                        chars.next();
                    }
                }
                // `"` or `'`
                c if c == preferred_quote_char => {
                    match chars.peek().copied() {
                        Some(c) if c == preferred_quote_char => {
                            // `""` or `''`
                            chars.next();
                            match chars.peek().copied() {
                                Some(c) if c == preferred_quote_char => {
                                    // `"""` or `'''`
                                    chars.next();
                                    uses_triple_quotes = true;
                                    break;
                                }
                                Some(_) => {}
                                None => {
                                    // Handle `''' ""'''`. At this point we have consumed both
                                    // double quotes, so on the next iteration the iterator is empty
                                    // and we'd miss the string ending with a preferred quote
                                    uses_triple_quotes = true;
                                    break;
                                }
                            }
                        }
                        Some(_) => {
                            // A single quote char, this is ok
                        }
                        None => {
                            // Trailing quote at the end of the comment
                            uses_triple_quotes = true;
                            break;
                        }
                    }
                }
                _ => continue,
            }
        }
        if uses_triple_quotes {
            // String contains a triple quote sequence of the configured quote style.
            // Keep the existing quote style.
            quotes.quote_char
        } else {
            preferred_quote
        }
    } else {
        let mut single_quotes = 0u32;
        let mut double_quotes = 0u32;
        for c in input.chars() {
            match c {
                '\'' => {
                    single_quotes += 1;
                }
                '"' => {
                    double_quotes += 1;
                }
                _ => continue,
            }
        }
        match preferred_quote {
            QuoteChar::Single => {
                if single_quotes > double_quotes {
                    QuoteChar::Double
                } else {
                    QuoteChar::Single
                }
            }
            QuoteChar::Double => {
                if double_quotes > single_quotes {
                    QuoteChar::Single
                } else {
                    QuoteChar::Double
                }
            }
        }
    };
    StringQuotes {
        triple: quotes.triple,
        quote_char: quote,
    }
 }
 #[derive(Copy, Clone, Debug)]
 pub(crate) struct StringQuotes {
    triple: bool,
@ -821,269 +287,3 @@ impl TryFrom<char> for QuoteChar {
        }
    }
 }
 /// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
 /// with the provided [`StringQuotes`] style.
 ///
 /// Returns the normalized string and whether it contains new lines.
 pub(crate) fn normalize_string(
    input: &str,
    quotes: StringQuotes,
    prefix: StringPrefix,
    normalize_hex: bool,
 ) -> Cow<str> {
    // The normalized string if `input` is not yet normalized.
    // `output` must remain empty if `input` is already normalized.
    let mut output = String::new();
    // Tracks the last index of `input` that has been written to `output`.
    // If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
    let mut last_index = 0;
    let quote = quotes.quote_char;
    let preferred_quote = quote.as_char();
    let opposite_quote = quote.invert().as_char();
    let mut chars = input.char_indices().peekable();
    let is_raw = prefix.is_raw_string();
    let is_fstring = prefix.is_fstring();
    let mut formatted_value_nesting = 0u32;
    while let Some((index, c)) = chars.next() {
        if is_fstring && matches!(c, '{' | '}') {
            if chars.peek().copied().is_some_and(|(_, next)| next == c) {
                // Skip over the second character of the double braces
                chars.next();
            } else if c == '{' {
                formatted_value_nesting += 1;
            } else {
                // Safe to assume that `c == '}'` here because of the matched pattern above
                formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
            }
            continue;
        }
        if c == '\r' {
            output.push_str(&input[last_index..index]);
            // Skip over the '\r' character, keep the `\n`
            if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
                chars.next();
            }
            // Replace the `\r` with a `\n`
            else {
                output.push('\n');
            }
            last_index = index + '\r'.len_utf8();
        } else if !is_raw {
            if c == '\\' {
                if let Some((_, next)) = chars.clone().next() {
                    if next == '\\' {
                        // Skip over escaped backslashes
                        chars.next();
                    } else if normalize_hex {
                        if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
                            .and_then(|escape| {
                                escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
                            })
                        {
                            // Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
                            let escape_start_len = '\\'.len_utf8() + next.len_utf8();
                            let escape_start_offset = index + escape_start_len;
                            if let Cow::Owned(normalised) = &normalised {
                                output.push_str(&input[last_index..escape_start_offset]);
                                output.push_str(normalised);
                                last_index = escape_start_offset + normalised.len();
                            };
                            // Move the `chars` iterator passed the escape sequence.
                            // Simply reassigning `chars` doesn't work because the indices` would
                            // then be off.
                            for _ in 0..next.len_utf8() + normalised.len() {
                                chars.next();
                            }
                        }
                    }
                    if !quotes.triple {
                        #[allow(clippy::if_same_then_else)]
                        if next == opposite_quote && formatted_value_nesting == 0 {
                            // Remove the escape by ending before the backslash and starting again with the quote
                            chars.next();
                            output.push_str(&input[last_index..index]);
                            last_index = index + '\\'.len_utf8();
                        } else if next == preferred_quote {
                            // Quote is already escaped, skip over it.
                            chars.next();
                        }
                    }
                }
            } else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
                // Escape the quote
                output.push_str(&input[last_index..index]);
                output.push('\\');
                output.push(c);
                last_index = index + preferred_quote.len_utf8();
            }
        }
    }
    let normalized = if last_index == 0 {
        Cow::Borrowed(input)
    } else {
        output.push_str(&input[last_index..]);
        Cow::Owned(output)
    };
    normalized
 }
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum UnicodeEscape {
    /// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
    Hex(usize),
    /// An escaped unicode name (`\N{name}`)
    CharacterName,
 }
 impl UnicodeEscape {
    fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
        Some(match first {
            'x' => UnicodeEscape::Hex(2),
            'u' if allow_unicode => UnicodeEscape::Hex(4),
            'U' if allow_unicode => UnicodeEscape::Hex(8),
            'N' if allow_unicode => UnicodeEscape::CharacterName,
            _ => return None,
        })
    }
    /// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
    ///
    /// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
    /// * `\N`: To use uppercase letters
    fn normalize(self, input: &str) -> Option<Cow<str>> {
        let mut normalised = String::new();
        let len = match self {
            UnicodeEscape::Hex(len) => {
                // It's not a valid escape sequence if the input string has fewer characters
                // left than required by the escape sequence.
                if input.len() < len {
                    return None;
                }
                for (index, c) in input.char_indices().take(len) {
                    match c {
                        '0'..='9' | 'a'..='f' => {
                            if !normalised.is_empty() {
                                normalised.push(c);
                            }
                        }
                        'A'..='F' => {
                            if normalised.is_empty() {
                                normalised.reserve(len);
                                normalised.push_str(&input[..index]);
                                normalised.push(c.to_ascii_lowercase());
                            } else {
                                normalised.push(c.to_ascii_lowercase());
                            }
                        }
                        _ => {
                            // not a valid escape sequence
                            return None;
                        }
                    }
                }
                len
            }
            UnicodeEscape::CharacterName => {
                let mut char_indices = input.char_indices();
                if !matches!(char_indices.next(), Some((_, '{'))) {
                    return None;
                }
                loop {
                    if let Some((index, c)) = char_indices.next() {
                        match c {
                            '}' => {
                                if !normalised.is_empty() {
                                    normalised.push('}');
                                }
                                // Name must be at least two characters long.
                                if index < 3 {
                                    return None;
                                }
                                break index + '}'.len_utf8();
                            }
                            '0'..='9' | 'A'..='Z' | ' ' | '-' => {
                                if !normalised.is_empty() {
                                    normalised.push(c);
                                }
                            }
                            'a'..='z' => {
                                if normalised.is_empty() {
                                    normalised.reserve(c.len_utf8() + '}'.len_utf8());
                                    normalised.push_str(&input[..index]);
                                    normalised.push(c.to_ascii_uppercase());
                                } else {
                                    normalised.push(c.to_ascii_uppercase());
                                }
                            }
                            _ => {
                                // Seems like an invalid escape sequence, don't normalise it.
                                return None;
                            }
                        }
                    } else {
                        // Unterminated escape sequence, don't normalise it.
                        return None;
                    }
                }
            }
        };
        Some(if normalised.is_empty() {
            Cow::Borrowed(&input[..len])
        } else {
            Cow::Owned(normalised)
        })
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::string::{normalize_string, QuoteChar, StringPrefix, StringQuotes, UnicodeEscape};
    use std::borrow::Cow;
    #[test]
    fn normalize_32_escape() {
        let escape_sequence = UnicodeEscape::new('U', true).unwrap();
        assert_eq!(
            Some(Cow::Owned("0001f60e".to_string())),
            escape_sequence.normalize("0001F60E")
        );
    }
    #[test]
    fn normalize_hex_in_byte_string() {
        let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
        let normalized = normalize_string(
            input,
            StringQuotes {
                triple: false,
                quote_char: QuoteChar::Double,
            },
            StringPrefix::BYTE,
            true,
        );
        assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
    }
 }
--- a/crates/ruff_python_formatter/src/string/normalize.rs
+++ b/crates/ruff_python_formatter/src/string/normalize.rs
@ -0,0 +1,622 @@
 use std::borrow::Cow;
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};
 use crate::prelude::*;
 use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
 use crate::string::{QuoteChar, Quoting, StringPart, StringPrefix, StringQuotes};
 use crate::QuoteStyle;
 pub(crate) struct StringNormalizer {
    quoting: Quoting,
    preferred_quote_style: QuoteStyle,
    parent_docstring_quote_char: Option<QuoteChar>,
    normalize_hex: bool,
 }
 impl StringNormalizer {
    pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
        Self {
            quoting: Quoting::default(),
            preferred_quote_style: QuoteStyle::default(),
            parent_docstring_quote_char: context.docstring(),
            normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
        }
    }
    pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
        self.preferred_quote_style = quote_style;
        self
    }
    pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
        self.quoting = quoting;
        self
    }
    /// Computes the strings preferred quotes.
    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
        // Per PEP 8, always prefer double quotes for triple-quoted strings.
        // Except when using quote-style-preserve.
        let preferred_style = if string.quotes().triple {
            // ... unless we're formatting a code snippet inside a docstring,
            // then we specifically want to invert our quote style to avoid
            // writing out invalid Python.
            //
            // It's worth pointing out that we can actually wind up being
            // somewhat out of sync with PEP8 in this case. Consider this
            // example:
            //
            //     def foo():
            //         '''
            //         Something.
            //
            //         >>> """tricksy"""
            //         '''
            //         pass
            //
            // Ideally, this would be reformatted as:
            //
            //     def foo():
            //         """
            //         Something.
            //
            //         >>> '''tricksy'''
            //         """
            //         pass
            //
            // But the logic here results in the original quoting being
            // preserved. This is because the quoting style of the outer
            // docstring is determined, in part, by looking at its contents. In
            // this case, it notices that it contains a `"""` and thus infers
            // that using `'''` would overall read better because it avoids
            // the need to escape the interior `"""`. Except... in this case,
            // the `"""` is actually part of a code snippet that could get
            // reformatted to using a different quoting style itself.
            //
            // Fixing this would, I believe, require some fairly seismic
            // changes to how formatting strings works. Namely, we would need
            // to look for code snippets before normalizing the docstring, and
            // then figure out the quoting style more holistically by looking
            // at the various kinds of quotes used in the code snippets and
            // what reformatting them might look like.
            //
            // Overall this is a bit of a corner case and just inverting the
            // style from what the parent ultimately decided upon works, even
            // if it doesn't have perfect alignment with PEP8.
            if let Some(quote) = self.parent_docstring_quote_char {
                QuoteStyle::from(quote.invert())
            } else if self.preferred_quote_style.is_preserve() {
                QuoteStyle::Preserve
            } else {
                QuoteStyle::Double
            }
        } else {
            self.preferred_quote_style
        };
        match self.quoting {
            Quoting::Preserve => string.quotes(),
            Quoting::CanChange => {
                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
                    let raw_content = locator.slice(string.content_range());
                    if string.prefix().is_raw_string() {
                        choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
                    } else {
                        choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
                    }
                } else {
                    string.quotes()
                }
            }
        }
    }
    /// Computes the strings preferred quotes and normalizes its content.
    pub(crate) fn normalize<'a>(
        &self,
        string: &StringPart,
        locator: &'a Locator,
    ) -> NormalizedString<'a> {
        let raw_content = locator.slice(string.content_range());
        let quotes = self.choose_quotes(string, locator);
        let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);
        NormalizedString {
            prefix: string.prefix(),
            content_range: string.content_range(),
            text: normalized,
            quotes,
        }
    }
 }
 #[derive(Debug)]
 pub(crate) struct NormalizedString<'a> {
    prefix: crate::string::StringPrefix,
    /// The quotes of the normalized string (preferred quotes)
    quotes: StringQuotes,
    /// The range of the string's content in the source (minus prefix and quotes).
    content_range: TextRange,
    /// The normalized text
    text: Cow<'a, str>,
 }
 impl<'a> NormalizedString<'a> {
    pub(crate) fn text(&self) -> &Cow<'a, str> {
        &self.text
    }
    pub(crate) fn quotes(&self) -> StringQuotes {
        self.quotes
    }
    pub(crate) fn prefix(&self) -> StringPrefix {
        self.prefix
    }
 }
 impl Ranged for NormalizedString<'_> {
    fn range(&self) -> TextRange {
        self.content_range
    }
 }
 impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
        ruff_formatter::write!(f, [self.prefix, self.quotes])?;
        match &self.text {
            Cow::Borrowed(_) => {
                source_text_slice(self.range()).fmt(f)?;
            }
            Cow::Owned(normalized) => {
                text(normalized).fmt(f)?;
            }
        }
        self.quotes.fmt(f)
    }
 }
 /// Choose the appropriate quote style for a raw string.
 ///
 /// The preferred quote style is chosen unless the string contains unescaped quotes of the
 /// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
 /// style is double quotes.
 fn choose_quotes_for_raw_string(
    input: &str,
    quotes: StringQuotes,
    preferred_quote: QuoteChar,
 ) -> StringQuotes {
    let preferred_quote_char = preferred_quote.as_char();
    let mut chars = input.chars().peekable();
    let contains_unescaped_configured_quotes = loop {
        match chars.next() {
            Some('\\') => {
                // Ignore escaped characters
                chars.next();
            }
            // `"` or `'`
            Some(c) if c == preferred_quote_char => {
                if !quotes.triple {
                    break true;
                }
                match chars.peek() {
                    // We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
                    // about where the closing triple quotes start
                    None => break true,
                    Some(next) if *next == preferred_quote_char => {
                        // `""` or `''`
                        chars.next();
                        // We can't turn `r'''""'''` into `r""""""""`, nor can we have
                        // `"""` or `'''` respectively inside the string
                        if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
                            break true;
                        }
                    }
                    _ => {}
                }
            }
            Some(_) => continue,
            None => break false,
        }
    };
    StringQuotes {
        triple: quotes.triple,
        quote_char: if contains_unescaped_configured_quotes {
            quotes.quote_char
        } else {
            preferred_quote
        },
    }
 }
 /// Choose the appropriate quote style for a string.
 ///
 /// For single quoted strings, the preferred quote style is used, unless the alternative quote style
 /// would require fewer escapes.
 ///
 /// For triple quoted strings, the preferred quote style is always used, unless the string contains
 /// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
 /// used unless the string contains `"""`).
 fn choose_quotes_impl(
    input: &str,
    quotes: StringQuotes,
    preferred_quote: QuoteChar,
 ) -> StringQuotes {
    let quote = if quotes.triple {
        // True if the string contains a triple quote sequence of the configured quote style.
        let mut uses_triple_quotes = false;
        let mut chars = input.chars().peekable();
        while let Some(c) = chars.next() {
            let preferred_quote_char = preferred_quote.as_char();
            match c {
                '\\' => {
                    if matches!(chars.peek(), Some('"' | '\\')) {
                        chars.next();
                    }
                }
                // `"` or `'`
                c if c == preferred_quote_char => {
                    match chars.peek().copied() {
                        Some(c) if c == preferred_quote_char => {
                            // `""` or `''`
                            chars.next();
                            match chars.peek().copied() {
                                Some(c) if c == preferred_quote_char => {
                                    // `"""` or `'''`
                                    chars.next();
                                    uses_triple_quotes = true;
                                    break;
                                }
                                Some(_) => {}
                                None => {
                                    // Handle `''' ""'''`. At this point we have consumed both
                                    // double quotes, so on the next iteration the iterator is empty
                                    // and we'd miss the string ending with a preferred quote
                                    uses_triple_quotes = true;
                                    break;
                                }
                            }
                        }
                        Some(_) => {
                            // A single quote char, this is ok
                        }
                        None => {
                            // Trailing quote at the end of the comment
                            uses_triple_quotes = true;
                            break;
                        }
                    }
                }
                _ => continue,
            }
        }
        if uses_triple_quotes {
            // String contains a triple quote sequence of the configured quote style.
            // Keep the existing quote style.
            quotes.quote_char
        } else {
            preferred_quote
        }
    } else {
        let mut single_quotes = 0u32;
        let mut double_quotes = 0u32;
        for c in input.chars() {
            match c {
                '\'' => {
                    single_quotes += 1;
                }
                '"' => {
                    double_quotes += 1;
                }
                _ => continue,
            }
        }
        match preferred_quote {
            QuoteChar::Single => {
                if single_quotes > double_quotes {
                    QuoteChar::Double
                } else {
                    QuoteChar::Single
                }
            }
            QuoteChar::Double => {
                if double_quotes > single_quotes {
                    QuoteChar::Single
                } else {
                    QuoteChar::Double
                }
            }
        }
    };
    StringQuotes {
        triple: quotes.triple,
        quote_char: quote,
    }
 }
 /// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
 /// with the provided [`StringQuotes`] style.
 ///
 /// Returns the normalized string and whether it contains new lines.
 pub(crate) fn normalize_string(
    input: &str,
    quotes: StringQuotes,
    prefix: StringPrefix,
    normalize_hex: bool,
 ) -> Cow<str> {
    // The normalized string if `input` is not yet normalized.
    // `output` must remain empty if `input` is already normalized.
    let mut output = String::new();
    // Tracks the last index of `input` that has been written to `output`.
    // If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
    let mut last_index = 0;
    let quote = quotes.quote_char;
    let preferred_quote = quote.as_char();
    let opposite_quote = quote.invert().as_char();
    let mut chars = input.char_indices().peekable();
    let is_raw = prefix.is_raw_string();
    let is_fstring = prefix.is_fstring();
    let mut formatted_value_nesting = 0u32;
    while let Some((index, c)) = chars.next() {
        if is_fstring && matches!(c, '{' | '}') {
            if chars.peek().copied().is_some_and(|(_, next)| next == c) {
                // Skip over the second character of the double braces
                chars.next();
            } else if c == '{' {
                formatted_value_nesting += 1;
            } else {
                // Safe to assume that `c == '}'` here because of the matched pattern above
                formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
            }
            continue;
        }
        if c == '\r' {
            output.push_str(&input[last_index..index]);
            // Skip over the '\r' character, keep the `\n`
            if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
                chars.next();
            }
            // Replace the `\r` with a `\n`
            else {
                output.push('\n');
            }
            last_index = index + '\r'.len_utf8();
        } else if !is_raw {
            if c == '\\' {
                if let Some((_, next)) = chars.clone().next() {
                    if next == '\\' {
                        // Skip over escaped backslashes
                        chars.next();
                    } else if normalize_hex {
                        if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
                            .and_then(|escape| {
                                escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
                            })
                        {
                            // Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
                            let escape_start_len = '\\'.len_utf8() + next.len_utf8();
                            let escape_start_offset = index + escape_start_len;
                            if let Cow::Owned(normalised) = &normalised {
                                output.push_str(&input[last_index..escape_start_offset]);
                                output.push_str(normalised);
                                last_index = escape_start_offset + normalised.len();
                            };
                            // Move the `chars` iterator passed the escape sequence.
                            // Simply reassigning `chars` doesn't work because the indices` would
                            // then be off.
                            for _ in 0..next.len_utf8() + normalised.len() {
                                chars.next();
                            }
                        }
                    }
                    if !quotes.triple {
                        #[allow(clippy::if_same_then_else)]
                        if next == opposite_quote && formatted_value_nesting == 0 {
                            // Remove the escape by ending before the backslash and starting again with the quote
                            chars.next();
                            output.push_str(&input[last_index..index]);
                            last_index = index + '\\'.len_utf8();
                        } else if next == preferred_quote {
                            // Quote is already escaped, skip over it.
                            chars.next();
                        }
                    }
                }
            } else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
                // Escape the quote
                output.push_str(&input[last_index..index]);
                output.push('\\');
                output.push(c);
                last_index = index + preferred_quote.len_utf8();
            }
        }
    }
    let normalized = if last_index == 0 {
        Cow::Borrowed(input)
    } else {
        output.push_str(&input[last_index..]);
        Cow::Owned(output)
    };
    normalized
 }
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum UnicodeEscape {
    /// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
    Hex(usize),
    /// An escaped unicode name (`\N{name}`)
    CharacterName,
 }
 impl UnicodeEscape {
    fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
        Some(match first {
            'x' => UnicodeEscape::Hex(2),
            'u' if allow_unicode => UnicodeEscape::Hex(4),
            'U' if allow_unicode => UnicodeEscape::Hex(8),
            'N' if allow_unicode => UnicodeEscape::CharacterName,
            _ => return None,
        })
    }
    /// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
    ///
    /// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
    /// * `\N`: To use uppercase letters
    fn normalize(self, input: &str) -> Option<Cow<str>> {
        let mut normalised = String::new();
        let len = match self {
            UnicodeEscape::Hex(len) => {
                // It's not a valid escape sequence if the input string has fewer characters
                // left than required by the escape sequence.
                if input.len() < len {
                    return None;
                }
                for (index, c) in input.char_indices().take(len) {
                    match c {
                        '0'..='9' | 'a'..='f' => {
                            if !normalised.is_empty() {
                                normalised.push(c);
                            }
                        }
                        'A'..='F' => {
                            if normalised.is_empty() {
                                normalised.reserve(len);
                                normalised.push_str(&input[..index]);
                                normalised.push(c.to_ascii_lowercase());
                            } else {
                                normalised.push(c.to_ascii_lowercase());
                            }
                        }
                        _ => {
                            // not a valid escape sequence
                            return None;
                        }
                    }
                }
                len
            }
            UnicodeEscape::CharacterName => {
                let mut char_indices = input.char_indices();
                if !matches!(char_indices.next(), Some((_, '{'))) {
                    return None;
                }
                loop {
                    if let Some((index, c)) = char_indices.next() {
                        match c {
                            '}' => {
                                if !normalised.is_empty() {
                                    normalised.push('}');
                                }
                                // Name must be at least two characters long.
                                if index < 3 {
                                    return None;
                                }
                                break index + '}'.len_utf8();
                            }
                            '0'..='9' | 'A'..='Z' | ' ' | '-' => {
                                if !normalised.is_empty() {
                                    normalised.push(c);
                                }
                            }
                            'a'..='z' => {
                                if normalised.is_empty() {
                                    normalised.reserve(c.len_utf8() + '}'.len_utf8());
                                    normalised.push_str(&input[..index]);
                                    normalised.push(c.to_ascii_uppercase());
                                } else {
                                    normalised.push(c.to_ascii_uppercase());
                                }
                            }
                            _ => {
                                // Seems like an invalid escape sequence, don't normalise it.
                                return None;
                            }
                        }
                    } else {
                        // Unterminated escape sequence, don't normalise it.
                        return None;
                    }
                }
            }
        };
        Some(if normalised.is_empty() {
            Cow::Borrowed(&input[..len])
        } else {
            Cow::Owned(normalised)
        })
    }
 }
 #[cfg(test)]
 mod tests {
    use std::borrow::Cow;
    use crate::string::{QuoteChar, StringPrefix, StringQuotes};
    use super::{normalize_string, UnicodeEscape};
    #[test]
    fn normalize_32_escape() {
        let escape_sequence = UnicodeEscape::new('U', true).unwrap();
        assert_eq!(
            Some(Cow::Owned("0001f60e".to_string())),
            escape_sequence.normalize("0001F60E")
        );
    }
    #[test]
    fn normalize_hex_in_byte_string() {
        let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
        let normalized = normalize_string(
            input,
            StringQuotes {
                triple: false,
                quote_char: QuoteChar::Double,
            },
            StringPrefix::BYTE,
            true,
        );
        assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
    }
 }