mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-22 11:25:26 +00:00
Add support for basic Constant::Str
formatting (#3173)
This PR enables us to apply the proper quotation marks, including support for escapes. There are some significant TODOs, especially around implicit concatenations like: ```py ( "abc" "def" ) ``` Which are represented as a single AST node, which requires us to tokenize _within_ the formatter to identify all the individual string parts.
This commit is contained in:
parent
095f005bf4
commit
f967f344fc
24 changed files with 426 additions and 706 deletions
240
crates/ruff_python_formatter/src/format/strings.rs
Normal file
240
crates/ruff_python_formatter/src/format/strings.rs
Normal file
|
@ -0,0 +1,240 @@
|
|||
use rustpython_parser::{Mode, Tok};
|
||||
|
||||
use ruff_formatter::prelude::*;
|
||||
use ruff_formatter::{write, Format};
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
use crate::context::ASTFormatContext;
|
||||
use crate::core::helpers::{leading_quote, trailing_quote};
|
||||
use crate::core::types::Range;
|
||||
use crate::cst::Expr;
|
||||
use crate::trivia::Parenthesize;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub struct StringLiteralPart {
|
||||
range: Range,
|
||||
}
|
||||
|
||||
impl Format<ASTFormatContext<'_>> for StringLiteralPart {
|
||||
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
|
||||
let (source, start, end) = f.context().locator().slice(self.range);
|
||||
|
||||
// Extract leading and trailing quotes.
|
||||
let content = &source[start..end];
|
||||
let leading_quote = leading_quote(content).unwrap();
|
||||
let trailing_quote = trailing_quote(content).unwrap();
|
||||
let body = &content[leading_quote.len()..content.len() - trailing_quote.len()];
|
||||
|
||||
// Determine the correct quote style.
|
||||
// TODO(charlie): Make this parameterizable.
|
||||
let mut squotes: usize = 0;
|
||||
let mut dquotes: usize = 0;
|
||||
for char in body.chars() {
|
||||
if char == '\'' {
|
||||
squotes += 1;
|
||||
} else if char == '"' {
|
||||
dquotes += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut is_raw = false;
|
||||
if leading_quote.contains('r') {
|
||||
is_raw = true;
|
||||
f.write_element(FormatElement::StaticText { text: "r" })?;
|
||||
} else if leading_quote.contains('R') {
|
||||
is_raw = true;
|
||||
f.write_element(FormatElement::StaticText { text: "R" })?;
|
||||
}
|
||||
|
||||
if trailing_quote.len() == 1 {
|
||||
// Single-quoted string.
|
||||
if dquotes == 0 || squotes > 0 {
|
||||
// If the body doesn't contain any double quotes, or it contains both single and
|
||||
// double quotes, use double quotes.
|
||||
f.write_element(FormatElement::StaticText { text: "\"" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: if is_raw {
|
||||
body.into()
|
||||
} else {
|
||||
double_escape(body).into()
|
||||
},
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "\"" })?;
|
||||
Ok(())
|
||||
} else {
|
||||
f.write_element(FormatElement::StaticText { text: "'" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: if is_raw {
|
||||
body.into()
|
||||
} else {
|
||||
single_escape(body).into()
|
||||
},
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "'" })?;
|
||||
Ok(())
|
||||
}
|
||||
} else if trailing_quote.len() == 3 {
|
||||
// Triple-quoted string.
|
||||
if body.starts_with("\"\"\"") || body.ends_with('"') {
|
||||
// We only need to use single quotes if the string body starts with three or more
|
||||
// double quotes, or ends with a double quote. Converting to double quotes in those
|
||||
// cases would cause a syntax error.
|
||||
f.write_element(FormatElement::StaticText { text: "'''" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: body.to_string().into_boxed_str(),
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "'''" })?;
|
||||
Ok(())
|
||||
} else {
|
||||
f.write_element(FormatElement::StaticText { text: "\"\"\"" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: body.to_string().into_boxed_str(),
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "\"\"\"" })?;
|
||||
Ok(())
|
||||
}
|
||||
} else {
|
||||
unreachable!("Invalid quote length: {}", trailing_quote.len());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn string_literal_part(range: Range) -> StringLiteralPart {
|
||||
StringLiteralPart { range }
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct StringLiteral<'a> {
|
||||
expr: &'a Expr,
|
||||
}
|
||||
|
||||
impl Format<ASTFormatContext<'_>> for StringLiteral<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
|
||||
let expr = self.expr;
|
||||
|
||||
// TODO(charlie): This tokenization needs to happen earlier, so that we can attach
|
||||
// comments to individual string literals.
|
||||
let (source, start, end) = f.context().locator().slice(Range::from_located(expr));
|
||||
let elts =
|
||||
rustpython_parser::lexer::lex_located(&source[start..end], Mode::Module, expr.location)
|
||||
.flatten()
|
||||
.filter_map(|(start, tok, end)| {
|
||||
if matches!(tok, Tok::String { .. }) {
|
||||
Some(Range::new(start, end))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
write!(
|
||||
f,
|
||||
[group(&format_with(|f| {
|
||||
if matches!(expr.parentheses, Parenthesize::IfExpanded) {
|
||||
write!(f, [if_group_breaks(&text("("))])?;
|
||||
}
|
||||
for (i, elt) in elts.iter().enumerate() {
|
||||
write!(f, [string_literal_part(*elt)])?;
|
||||
if i < elts.len() - 1 {
|
||||
write!(f, [soft_line_break_or_space()])?;
|
||||
}
|
||||
}
|
||||
if matches!(expr.parentheses, Parenthesize::IfExpanded) {
|
||||
write!(f, [if_group_breaks(&text(")"))])?;
|
||||
}
|
||||
Ok(())
|
||||
}))]
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn string_literal(expr: &Expr) -> StringLiteral {
|
||||
StringLiteral { expr }
|
||||
}
|
||||
|
||||
/// Escape a string body to be used in a string literal with double quotes.
|
||||
fn double_escape(text: &str) -> String {
|
||||
let mut escaped = String::with_capacity(text.len());
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let Some(next) = chars.peek() else {
|
||||
break;
|
||||
};
|
||||
if *next == '\'' {
|
||||
chars.next();
|
||||
escaped.push('\'');
|
||||
} else if *next == '"' {
|
||||
chars.next();
|
||||
escaped.push('"');
|
||||
} else if *next == '\\' {
|
||||
chars.next();
|
||||
escaped.push('\\');
|
||||
escaped.push(ch);
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
} else if ch == '"' {
|
||||
escaped.push('\\');
|
||||
escaped.push('"');
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
/// Escape a string body to be used in a string literal with single quotes.
|
||||
fn single_escape(text: &str) -> String {
|
||||
let mut escaped = String::with_capacity(text.len());
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let Some(next) = chars.peek() else {
|
||||
break;
|
||||
};
|
||||
if *next == '"' {
|
||||
chars.next();
|
||||
escaped.push('"');
|
||||
} else if *next == '\'' {
|
||||
chars.next();
|
||||
escaped.push('\'');
|
||||
} else if *next == '\\' {
|
||||
chars.next();
|
||||
escaped.push('\\');
|
||||
escaped.push(ch);
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
} else if ch == '\'' {
|
||||
escaped.push('\\');
|
||||
escaped.push('\'');
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_double_escape() {
|
||||
assert_eq!(double_escape(r#"It\'s mine"#), r#"It's mine"#);
|
||||
assert_eq!(double_escape(r#"It\'s "mine""#), r#"It's \"mine\""#);
|
||||
assert_eq!(double_escape(r#"It\\'s mine"#), r#"It\\'s mine"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_escape() {
|
||||
assert_eq!(single_escape(r#"It's \"mine\""#), r#"It\'s "mine""#);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue