mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-17 00:50:33 +00:00
formatter: Introduce QuoteMetadata
(#13858)
This commit is contained in:
parent
9e3cf14dde
commit
e9dd92107c
1 changed files with 252 additions and 181 deletions
|
@ -14,7 +14,7 @@ use crate::QuoteStyle;
|
||||||
|
|
||||||
pub(crate) struct StringNormalizer<'a, 'src> {
|
pub(crate) struct StringNormalizer<'a, 'src> {
|
||||||
quoting: Quoting,
|
quoting: Quoting,
|
||||||
preferred_quote_style: QuoteStyle,
|
preferred_quote_style: Option<QuoteStyle>,
|
||||||
context: &'a PyFormatContext<'src>,
|
context: &'a PyFormatContext<'src>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,13 +22,13 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
||||||
pub(crate) fn from_context(context: &'a PyFormatContext<'src>) -> Self {
|
pub(crate) fn from_context(context: &'a PyFormatContext<'src>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
quoting: Quoting::default(),
|
quoting: Quoting::default(),
|
||||||
preferred_quote_style: context.options().quote_style(),
|
preferred_quote_style: None,
|
||||||
context,
|
context,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
|
pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
|
||||||
self.preferred_quote_style = quote_style;
|
self.preferred_quote_style = Some(quote_style);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,7 +38,9 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn quoting(&self, string: StringLikePart) -> Quoting {
|
fn quoting(&self, string: StringLikePart) -> Quoting {
|
||||||
if let FStringState::InsideExpressionElement(context) = self.context.f_string_state() {
|
match (self.quoting, self.context.f_string_state()) {
|
||||||
|
(Quoting::Preserve, _) => Quoting::Preserve,
|
||||||
|
|
||||||
// If we're inside an f-string, we need to make sure to preserve the
|
// If we're inside an f-string, we need to make sure to preserve the
|
||||||
// existing quotes unless we're inside a triple-quoted f-string and
|
// existing quotes unless we're inside a triple-quoted f-string and
|
||||||
// the inner string itself isn't triple-quoted. For example:
|
// the inner string itself isn't triple-quoted. For example:
|
||||||
|
@ -53,32 +55,36 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
||||||
// The reason to preserve the quotes is based on the assumption that
|
// The reason to preserve the quotes is based on the assumption that
|
||||||
// the original f-string is valid in terms of quoting, and we don't
|
// the original f-string is valid in terms of quoting, and we don't
|
||||||
// want to change that to make it invalid.
|
// want to change that to make it invalid.
|
||||||
if (context.f_string().flags().is_triple_quoted() && !string.flags().is_triple_quoted())
|
(Quoting::CanChange, FStringState::InsideExpressionElement(context)) => {
|
||||||
|| self.context.options().target_version().supports_pep_701()
|
if (context.f_string().flags().is_triple_quoted()
|
||||||
{
|
&& !string.flags().is_triple_quoted())
|
||||||
self.quoting
|
|| self.context.options().target_version().supports_pep_701()
|
||||||
} else {
|
{
|
||||||
Quoting::Preserve
|
Quoting::CanChange
|
||||||
|
} else {
|
||||||
|
Quoting::Preserve
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
self.quoting
|
(Quoting::CanChange, _) => Quoting::CanChange,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the strings preferred quotes.
|
/// Determines the preferred quote style for `string`.
|
||||||
pub(crate) fn choose_quotes(&self, string: StringLikePart) -> QuoteSelection {
|
/// The formatter should use the preferred quote style unless
|
||||||
let raw_content = self.context.locator().slice(string.content_range());
|
/// it can't because the string contains the preferred quotes OR
|
||||||
let first_quote_or_normalized_char_offset = raw_content
|
/// it leads to more escaping.
|
||||||
.bytes()
|
pub(super) fn preferred_quote_style(&self, string: StringLikePart) -> QuoteStyle {
|
||||||
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
match self.quoting(string) {
|
||||||
let string_flags = string.flags();
|
Quoting::Preserve => QuoteStyle::Preserve,
|
||||||
|
|
||||||
let new_kind = match self.quoting(string) {
|
|
||||||
Quoting::Preserve => string_flags,
|
|
||||||
Quoting::CanChange => {
|
Quoting::CanChange => {
|
||||||
|
let preferred_quote_style = self
|
||||||
|
.preferred_quote_style
|
||||||
|
.unwrap_or(self.context.options().quote_style());
|
||||||
|
|
||||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||||
// Except when using quote-style-preserve.
|
// Except when using quote-style-preserve.
|
||||||
let preferred_style = if string_flags.is_triple_quoted() {
|
if string.flags().is_triple_quoted() {
|
||||||
// ... unless we're formatting a code snippet inside a docstring,
|
// ... unless we're formatting a code snippet inside a docstring,
|
||||||
// then we specifically want to invert our quote style to avoid
|
// then we specifically want to invert our quote style to avoid
|
||||||
// writing out invalid Python.
|
// writing out invalid Python.
|
||||||
|
@ -126,39 +132,48 @@ impl<'a, 'src> StringNormalizer<'a, 'src> {
|
||||||
// if it doesn't have perfect alignment with PEP8.
|
// if it doesn't have perfect alignment with PEP8.
|
||||||
if let Some(quote) = self.context.docstring() {
|
if let Some(quote) = self.context.docstring() {
|
||||||
QuoteStyle::from(quote.opposite())
|
QuoteStyle::from(quote.opposite())
|
||||||
} else if self.preferred_quote_style.is_preserve() {
|
} else if preferred_quote_style.is_preserve() {
|
||||||
QuoteStyle::Preserve
|
QuoteStyle::Preserve
|
||||||
} else {
|
} else {
|
||||||
QuoteStyle::Double
|
QuoteStyle::Double
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.preferred_quote_style
|
preferred_quote_style
|
||||||
};
|
|
||||||
|
|
||||||
if let Ok(preferred_quote) = Quote::try_from(preferred_style) {
|
|
||||||
if let Some(first_quote_or_normalized_char_offset) =
|
|
||||||
first_quote_or_normalized_char_offset
|
|
||||||
{
|
|
||||||
if string_flags.is_raw_string() {
|
|
||||||
choose_quotes_for_raw_string(
|
|
||||||
&raw_content[first_quote_or_normalized_char_offset..],
|
|
||||||
string_flags,
|
|
||||||
preferred_quote,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
choose_quotes_impl(
|
|
||||||
&raw_content[first_quote_or_normalized_char_offset..],
|
|
||||||
string_flags,
|
|
||||||
preferred_quote,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
string_flags.with_quote_style(preferred_quote)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
string_flags
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the strings preferred quotes.
|
||||||
|
pub(crate) fn choose_quotes(&self, string: StringLikePart) -> QuoteSelection {
|
||||||
|
let raw_content = self.context.locator().slice(string.content_range());
|
||||||
|
let first_quote_or_normalized_char_offset = raw_content
|
||||||
|
.bytes()
|
||||||
|
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||||
|
let string_flags = string.flags();
|
||||||
|
let preferred_style = self.preferred_quote_style(string);
|
||||||
|
|
||||||
|
let new_kind = match (
|
||||||
|
Quote::try_from(preferred_style),
|
||||||
|
first_quote_or_normalized_char_offset,
|
||||||
|
) {
|
||||||
|
// The string contains no quotes so it's safe to use the preferred quote style
|
||||||
|
(Ok(preferred_quote), None) => string_flags.with_quote_style(preferred_quote),
|
||||||
|
|
||||||
|
// The preferred quote style is single or double quotes, and the string contains a quote or
|
||||||
|
// another character that may require escaping
|
||||||
|
(Ok(preferred_quote), Some(first_quote_or_normalized_char_offset)) => {
|
||||||
|
let quote = QuoteMetadata::from_str(
|
||||||
|
&raw_content[first_quote_or_normalized_char_offset..],
|
||||||
|
string.flags(),
|
||||||
|
preferred_quote,
|
||||||
|
)
|
||||||
|
.choose(preferred_quote);
|
||||||
|
string_flags.with_quote_style(quote)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The preferred quote style is to preserve the quotes, so let's do that.
|
||||||
|
(Err(()), _) => string_flags,
|
||||||
};
|
};
|
||||||
|
|
||||||
QuoteSelection {
|
QuoteSelection {
|
||||||
|
@ -209,119 +224,93 @@ impl QuoteSelection {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(crate) struct NormalizedString<'a> {
|
pub(crate) struct QuoteMetadata {
|
||||||
/// Holds data about the quotes and prefix of the string
|
kind: QuoteMetadataKind,
|
||||||
flags: AnyStringFlags,
|
|
||||||
|
|
||||||
/// The range of the string's content in the source (minus prefix and quotes).
|
/// The quote style in the source.
|
||||||
content_range: TextRange,
|
source_style: Quote,
|
||||||
|
|
||||||
/// The normalized text
|
|
||||||
text: Cow<'a, str>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> NormalizedString<'a> {
|
/// Tracks information about the used quotes in a string which is used
|
||||||
pub(crate) fn text(&self) -> &Cow<'a, str> {
|
/// to choose the quotes for a part.
|
||||||
&self.text
|
impl QuoteMetadata {
|
||||||
}
|
pub(crate) fn from_str(text: &str, flags: AnyStringFlags, preferred_quote: Quote) -> Self {
|
||||||
|
let kind = if flags.is_raw_string() {
|
||||||
|
QuoteMetadataKind::raw(text, preferred_quote, flags.is_triple_quoted())
|
||||||
|
} else if flags.is_triple_quoted() {
|
||||||
|
QuoteMetadataKind::triple_quoted(text, preferred_quote)
|
||||||
|
} else {
|
||||||
|
QuoteMetadataKind::regular(text)
|
||||||
|
};
|
||||||
|
|
||||||
pub(crate) fn flags(&self) -> AnyStringFlags {
|
Self {
|
||||||
self.flags
|
kind,
|
||||||
}
|
source_style: flags.quote_style(),
|
||||||
}
|
|
||||||
|
|
||||||
impl Ranged for NormalizedString<'_> {
|
|
||||||
fn range(&self) -> TextRange {
|
|
||||||
self.content_range
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
|
||||||
let quotes = StringQuotes::from(self.flags);
|
|
||||||
ruff_formatter::write!(f, [self.flags.prefix(), quotes])?;
|
|
||||||
match &self.text {
|
|
||||||
Cow::Borrowed(_) => {
|
|
||||||
source_text_slice(self.range()).fmt(f)?;
|
|
||||||
}
|
|
||||||
Cow::Owned(normalized) => {
|
|
||||||
text(normalized).fmt(f)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
quotes.fmt(f)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Choose the appropriate quote style for a raw string.
|
pub(super) fn choose(&self, preferred_quote: Quote) -> Quote {
|
||||||
///
|
match self.kind {
|
||||||
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
|
QuoteMetadataKind::Raw { contains_preferred } => {
|
||||||
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
|
if contains_preferred {
|
||||||
/// style is double quotes.
|
self.source_style
|
||||||
fn choose_quotes_for_raw_string(
|
} else {
|
||||||
input: &str,
|
preferred_quote
|
||||||
flags: AnyStringFlags,
|
|
||||||
preferred_quote: Quote,
|
|
||||||
) -> AnyStringFlags {
|
|
||||||
let preferred_quote_char = preferred_quote.as_char();
|
|
||||||
let mut chars = input.chars().peekable();
|
|
||||||
let contains_unescaped_configured_quotes = loop {
|
|
||||||
match chars.next() {
|
|
||||||
Some('\\') => {
|
|
||||||
// Ignore escaped characters
|
|
||||||
chars.next();
|
|
||||||
}
|
|
||||||
// `"` or `'`
|
|
||||||
Some(c) if c == preferred_quote_char => {
|
|
||||||
if !flags.is_triple_quoted() {
|
|
||||||
break true;
|
|
||||||
}
|
|
||||||
|
|
||||||
match chars.peek() {
|
|
||||||
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
|
|
||||||
// about where the closing triple quotes start
|
|
||||||
None => break true,
|
|
||||||
Some(next) if *next == preferred_quote_char => {
|
|
||||||
// `""` or `''`
|
|
||||||
chars.next();
|
|
||||||
|
|
||||||
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
|
|
||||||
// `"""` or `'''` respectively inside the string
|
|
||||||
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
|
|
||||||
break true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(_) => continue,
|
QuoteMetadataKind::Triple { contains_preferred } => {
|
||||||
None => break false,
|
if contains_preferred {
|
||||||
|
self.source_style
|
||||||
|
} else {
|
||||||
|
preferred_quote
|
||||||
|
}
|
||||||
|
}
|
||||||
|
QuoteMetadataKind::Regular {
|
||||||
|
single_quotes,
|
||||||
|
double_quotes,
|
||||||
|
} => match single_quotes.cmp(&double_quotes) {
|
||||||
|
Ordering::Less => Quote::Single,
|
||||||
|
Ordering::Equal => preferred_quote,
|
||||||
|
Ordering::Greater => Quote::Double,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
};
|
|
||||||
if contains_unescaped_configured_quotes {
|
|
||||||
flags
|
|
||||||
} else {
|
|
||||||
flags.with_quote_style(preferred_quote)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Choose the appropriate quote style for a string.
|
#[derive(Copy, Clone, Debug)]
|
||||||
///
|
enum QuoteMetadataKind {
|
||||||
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
|
/// A raw string.
|
||||||
/// would require fewer escapes.
|
///
|
||||||
///
|
/// For raw strings it's only possible to change the quotes if the preferred quote style
|
||||||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
/// isn't used inside the string.
|
||||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
Raw { contains_preferred: bool },
|
||||||
/// used unless the string contains `"""`).
|
|
||||||
fn choose_quotes_impl(
|
/// Regular (non raw) triple quoted string.
|
||||||
input: &str,
|
///
|
||||||
flags: AnyStringFlags,
|
/// For triple quoted strings it's only possible to change the quotes if no
|
||||||
preferred_quote: Quote,
|
/// triple of the preferred quotes is used inside the string.
|
||||||
) -> AnyStringFlags {
|
Triple { contains_preferred: bool },
|
||||||
let quote = if flags.is_triple_quoted() {
|
|
||||||
|
/// A single quoted string that uses either double or single quotes.
|
||||||
|
///
|
||||||
|
/// For regular strings it's desired to pick the quote style that requires the least escaping.
|
||||||
|
/// E.g. pick single quotes for `'A "dog"'` because using single quotes would require escaping
|
||||||
|
/// the two `"`.
|
||||||
|
Regular {
|
||||||
|
single_quotes: u32,
|
||||||
|
double_quotes: u32,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QuoteMetadataKind {
|
||||||
|
/// For triple quoted strings, the preferred quote style can't be used if the string contains
|
||||||
|
/// a tripled of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||||
|
/// used unless the string contains `"""`).
|
||||||
|
fn triple_quoted(content: &str, preferred_quote: Quote) -> Self {
|
||||||
// True if the string contains a triple quote sequence of the configured quote style.
|
// True if the string contains a triple quote sequence of the configured quote style.
|
||||||
let mut uses_triple_quotes = false;
|
let mut uses_triple_quotes = false;
|
||||||
let mut chars = input.chars().peekable();
|
let mut chars = content.chars().peekable();
|
||||||
|
|
||||||
while let Some(c) = chars.next() {
|
while let Some(c) = chars.next() {
|
||||||
let preferred_quote_char = preferred_quote.as_char();
|
let preferred_quote_char = preferred_quote.as_char();
|
||||||
|
@ -369,18 +358,18 @@ fn choose_quotes_impl(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if uses_triple_quotes {
|
Self::Triple {
|
||||||
// String contains a triple quote sequence of the configured quote style.
|
contains_preferred: uses_triple_quotes,
|
||||||
// Keep the existing quote style.
|
|
||||||
flags.quote_style()
|
|
||||||
} else {
|
|
||||||
preferred_quote
|
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
|
||||||
|
/// would require fewer escapes.
|
||||||
|
fn regular(text: &str) -> Self {
|
||||||
let mut single_quotes = 0u32;
|
let mut single_quotes = 0u32;
|
||||||
let mut double_quotes = 0u32;
|
let mut double_quotes = 0u32;
|
||||||
|
|
||||||
for c in input.chars() {
|
for c in text.chars() {
|
||||||
match c {
|
match c {
|
||||||
'\'' => {
|
'\'' => {
|
||||||
single_quotes += 1;
|
single_quotes += 1;
|
||||||
|
@ -394,25 +383,105 @@ fn choose_quotes_impl(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match single_quotes.cmp(&double_quotes) {
|
Self::Regular {
|
||||||
Ordering::Less => Quote::Single,
|
single_quotes,
|
||||||
Ordering::Equal => preferred_quote,
|
double_quotes,
|
||||||
Ordering::Greater => Quote::Double,
|
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
flags.with_quote_style(quote)
|
/// Computes if a raw string uses the preferred quote. If it does, then it's not possible
|
||||||
|
/// to change the quote style because it would require escaping which isn't possible in raw strings.
|
||||||
|
fn raw(text: &str, preferred: Quote, triple_quoted: bool) -> Self {
|
||||||
|
let mut chars = text.chars().peekable();
|
||||||
|
let preferred_quote_char = preferred.as_char();
|
||||||
|
|
||||||
|
let contains_unescaped_configured_quotes = loop {
|
||||||
|
match chars.next() {
|
||||||
|
Some('\\') => {
|
||||||
|
// Ignore escaped characters
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
// `"` or `'`
|
||||||
|
Some(c) if c == preferred_quote_char => {
|
||||||
|
if !triple_quoted {
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
|
||||||
|
match chars.peek() {
|
||||||
|
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
|
||||||
|
// about where the closing triple quotes start
|
||||||
|
None => break true,
|
||||||
|
Some(next) if *next == preferred_quote_char => {
|
||||||
|
// `""` or `''`
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
|
||||||
|
// `"""` or `'''` respectively inside the string
|
||||||
|
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char)
|
||||||
|
{
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(_) => continue,
|
||||||
|
None => break false,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Self::Raw {
|
||||||
|
contains_preferred: contains_unescaped_configured_quotes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct NormalizedString<'a> {
|
||||||
|
/// Holds data about the quotes and prefix of the string
|
||||||
|
flags: AnyStringFlags,
|
||||||
|
|
||||||
|
/// The range of the string's content in the source (minus prefix and quotes).
|
||||||
|
content_range: TextRange,
|
||||||
|
|
||||||
|
/// The normalized text
|
||||||
|
text: Cow<'a, str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> NormalizedString<'a> {
|
||||||
|
pub(crate) fn text(&self) -> &Cow<'a, str> {
|
||||||
|
&self.text
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn flags(&self) -> AnyStringFlags {
|
||||||
|
self.flags
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ranged for NormalizedString<'_> {
|
||||||
|
fn range(&self) -> TextRange {
|
||||||
|
self.content_range
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||||
|
let quotes = StringQuotes::from(self.flags);
|
||||||
|
ruff_formatter::write!(f, [self.flags.prefix(), quotes])?;
|
||||||
|
match &self.text {
|
||||||
|
Cow::Borrowed(_) => source_text_slice(self.range()).fmt(f)?,
|
||||||
|
Cow::Owned(normalized) => text(normalized).fmt(f)?,
|
||||||
|
}
|
||||||
|
|
||||||
|
quotes.fmt(f)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
|
||||||
/// with the provided [`StringQuotes`] style.
|
|
||||||
///
|
|
||||||
/// Returns the normalized string and whether it contains new lines.
|
|
||||||
pub(crate) fn normalize_string(
|
pub(crate) fn normalize_string(
|
||||||
input: &str,
|
input: &str,
|
||||||
start_offset: usize,
|
start_offset: usize,
|
||||||
flags: AnyStringFlags,
|
new_flags: AnyStringFlags,
|
||||||
format_fstring: bool,
|
format_f_string: bool,
|
||||||
) -> Cow<str> {
|
) -> Cow<str> {
|
||||||
// The normalized string if `input` is not yet normalized.
|
// The normalized string if `input` is not yet normalized.
|
||||||
// `output` must remain empty if `input` is already normalized.
|
// `output` must remain empty if `input` is already normalized.
|
||||||
|
@ -421,18 +490,19 @@ pub(crate) fn normalize_string(
|
||||||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||||
let mut last_index = 0;
|
let mut last_index = 0;
|
||||||
|
|
||||||
let quote = flags.quote_style();
|
let quote = new_flags.quote_style();
|
||||||
let preferred_quote = quote.as_char();
|
let preferred_quote = quote.as_char();
|
||||||
let opposite_quote = quote.opposite().as_char();
|
let opposite_quote = quote.opposite().as_char();
|
||||||
|
|
||||||
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||||
|
|
||||||
let is_raw = flags.is_raw_string();
|
let is_raw = new_flags.is_raw_string();
|
||||||
let is_fstring = !format_fstring && flags.is_f_string();
|
|
||||||
|
let is_fstring = !format_f_string && new_flags.is_f_string();
|
||||||
let mut formatted_value_nesting = 0u32;
|
let mut formatted_value_nesting = 0u32;
|
||||||
|
|
||||||
while let Some((index, c)) = chars.next() {
|
while let Some((index, c)) = chars.next() {
|
||||||
if is_fstring && matches!(c, '{' | '}') {
|
if matches!(c, '{' | '}') && is_fstring {
|
||||||
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
|
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
|
||||||
// Skip over the second character of the double braces
|
// Skip over the second character of the double braces
|
||||||
chars.next();
|
chars.next();
|
||||||
|
@ -444,6 +514,7 @@ pub(crate) fn normalize_string(
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if c == '\r' {
|
if c == '\r' {
|
||||||
output.push_str(&input[last_index..index]);
|
output.push_str(&input[last_index..index]);
|
||||||
|
|
||||||
|
@ -466,8 +537,10 @@ pub(crate) fn normalize_string(
|
||||||
} else {
|
} else {
|
||||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||||
if let Some(normalised) = UnicodeEscape::new(next, !flags.is_byte_string())
|
if let Some(normalised) =
|
||||||
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
UnicodeEscape::new(next, !new_flags.is_byte_string()).and_then(
|
||||||
|
|escape| escape.normalize(&input[index + escape_start_len..]),
|
||||||
|
)
|
||||||
{
|
{
|
||||||
let escape_start_offset = index + escape_start_len;
|
let escape_start_offset = index + escape_start_len;
|
||||||
if let Cow::Owned(normalised) = &normalised {
|
if let Cow::Owned(normalised) = &normalised {
|
||||||
|
@ -485,7 +558,7 @@ pub(crate) fn normalize_string(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !flags.is_triple_quoted() {
|
if !new_flags.is_triple_quoted() {
|
||||||
#[allow(clippy::if_same_then_else)]
|
#[allow(clippy::if_same_then_else)]
|
||||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||||
// Remove the escape by ending before the backslash and starting again with the quote
|
// Remove the escape by ending before the backslash and starting again with the quote
|
||||||
|
@ -498,7 +571,7 @@ pub(crate) fn normalize_string(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if !flags.is_triple_quoted()
|
} else if !new_flags.is_triple_quoted()
|
||||||
&& c == preferred_quote
|
&& c == preferred_quote
|
||||||
&& formatted_value_nesting == 0
|
&& formatted_value_nesting == 0
|
||||||
{
|
{
|
||||||
|
@ -511,14 +584,12 @@ pub(crate) fn normalize_string(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let normalized = if last_index == 0 {
|
if last_index == 0 {
|
||||||
Cow::Borrowed(input)
|
Cow::Borrowed(input)
|
||||||
} else {
|
} else {
|
||||||
output.push_str(&input[last_index..]);
|
output.push_str(&input[last_index..]);
|
||||||
Cow::Owned(output)
|
Cow::Owned(output)
|
||||||
};
|
}
|
||||||
|
|
||||||
normalized
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
@ -671,14 +742,14 @@ impl UnicodeEscape {
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use super::UnicodeEscape;
|
||||||
|
use crate::string::normalize_string;
|
||||||
use ruff_python_ast::{
|
use ruff_python_ast::{
|
||||||
str::Quote,
|
str::Quote,
|
||||||
str_prefix::{AnyStringPrefix, ByteStringPrefix},
|
str_prefix::{AnyStringPrefix, ByteStringPrefix},
|
||||||
AnyStringFlags,
|
AnyStringFlags,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{normalize_string, UnicodeEscape};
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn normalize_32_escape() {
|
fn normalize_32_escape() {
|
||||||
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
|
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue