mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-02 22:55:08 +00:00
Add support for basic Constant::Str
formatting (#3173)
This PR enables us to apply the proper quotation marks, including support for escapes. There are some significant TODOs, especially around implicit concatenations like: ```py ( "abc" "def" ) ``` Which are represented as a single AST node, which requires us to tokenize _within_ the formatter to identify all the individual string parts.
This commit is contained in:
parent
095f005bf4
commit
f967f344fc
24 changed files with 426 additions and 706 deletions
|
@ -13,6 +13,7 @@ use crate::cst::{
|
|||
Arguments, Boolop, Cmpop, Comprehension, Expr, ExprKind, Keyword, Operator, Unaryop,
|
||||
};
|
||||
use crate::format::helpers::{is_self_closing, is_simple_power, is_simple_slice};
|
||||
use crate::format::strings::string_literal;
|
||||
use crate::shared_traits::AsFormat;
|
||||
use crate::trivia::{Parenthesize, Relationship, TriviaKind};
|
||||
|
||||
|
@ -128,8 +129,6 @@ fn format_tuple(
|
|||
write!(
|
||||
f,
|
||||
[soft_block_indent(&format_with(|f| {
|
||||
// TODO(charlie): If the magic trailing comma isn't present, and the
|
||||
// tuple is _already_ expanded, we're not supposed to add this.
|
||||
let magic_trailing_comma = expr
|
||||
.trivia
|
||||
.iter()
|
||||
|
@ -641,10 +640,21 @@ fn format_joined_str(
|
|||
fn format_constant(
|
||||
f: &mut Formatter<ASTFormatContext<'_>>,
|
||||
expr: &Expr,
|
||||
_constant: &Constant,
|
||||
constant: &Constant,
|
||||
_kind: Option<&str>,
|
||||
) -> FormatResult<()> {
|
||||
write!(f, [literal(Range::from_located(expr))])?;
|
||||
match constant {
|
||||
Constant::None => write!(f, [text("None")])?,
|
||||
Constant::Bool(value) => {
|
||||
if *value {
|
||||
write!(f, [text("True")])?;
|
||||
} else {
|
||||
write!(f, [text("False")])?;
|
||||
}
|
||||
}
|
||||
Constant::Str(_) => write!(f, [string_literal(expr)])?,
|
||||
_ => write!(f, [literal(Range::from_located(expr))])?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -10,5 +10,6 @@ mod expr;
|
|||
mod helpers;
|
||||
mod operator;
|
||||
mod stmt;
|
||||
mod strings;
|
||||
mod unaryop;
|
||||
mod withitem;
|
||||
|
|
240
crates/ruff_python_formatter/src/format/strings.rs
Normal file
240
crates/ruff_python_formatter/src/format/strings.rs
Normal file
|
@ -0,0 +1,240 @@
|
|||
use rustpython_parser::{Mode, Tok};
|
||||
|
||||
use ruff_formatter::prelude::*;
|
||||
use ruff_formatter::{write, Format};
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
use crate::context::ASTFormatContext;
|
||||
use crate::core::helpers::{leading_quote, trailing_quote};
|
||||
use crate::core::types::Range;
|
||||
use crate::cst::Expr;
|
||||
use crate::trivia::Parenthesize;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub struct StringLiteralPart {
|
||||
range: Range,
|
||||
}
|
||||
|
||||
impl Format<ASTFormatContext<'_>> for StringLiteralPart {
|
||||
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
|
||||
let (source, start, end) = f.context().locator().slice(self.range);
|
||||
|
||||
// Extract leading and trailing quotes.
|
||||
let content = &source[start..end];
|
||||
let leading_quote = leading_quote(content).unwrap();
|
||||
let trailing_quote = trailing_quote(content).unwrap();
|
||||
let body = &content[leading_quote.len()..content.len() - trailing_quote.len()];
|
||||
|
||||
// Determine the correct quote style.
|
||||
// TODO(charlie): Make this parameterizable.
|
||||
let mut squotes: usize = 0;
|
||||
let mut dquotes: usize = 0;
|
||||
for char in body.chars() {
|
||||
if char == '\'' {
|
||||
squotes += 1;
|
||||
} else if char == '"' {
|
||||
dquotes += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut is_raw = false;
|
||||
if leading_quote.contains('r') {
|
||||
is_raw = true;
|
||||
f.write_element(FormatElement::StaticText { text: "r" })?;
|
||||
} else if leading_quote.contains('R') {
|
||||
is_raw = true;
|
||||
f.write_element(FormatElement::StaticText { text: "R" })?;
|
||||
}
|
||||
|
||||
if trailing_quote.len() == 1 {
|
||||
// Single-quoted string.
|
||||
if dquotes == 0 || squotes > 0 {
|
||||
// If the body doesn't contain any double quotes, or it contains both single and
|
||||
// double quotes, use double quotes.
|
||||
f.write_element(FormatElement::StaticText { text: "\"" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: if is_raw {
|
||||
body.into()
|
||||
} else {
|
||||
double_escape(body).into()
|
||||
},
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "\"" })?;
|
||||
Ok(())
|
||||
} else {
|
||||
f.write_element(FormatElement::StaticText { text: "'" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: if is_raw {
|
||||
body.into()
|
||||
} else {
|
||||
single_escape(body).into()
|
||||
},
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "'" })?;
|
||||
Ok(())
|
||||
}
|
||||
} else if trailing_quote.len() == 3 {
|
||||
// Triple-quoted string.
|
||||
if body.starts_with("\"\"\"") || body.ends_with('"') {
|
||||
// We only need to use single quotes if the string body starts with three or more
|
||||
// double quotes, or ends with a double quote. Converting to double quotes in those
|
||||
// cases would cause a syntax error.
|
||||
f.write_element(FormatElement::StaticText { text: "'''" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: body.to_string().into_boxed_str(),
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "'''" })?;
|
||||
Ok(())
|
||||
} else {
|
||||
f.write_element(FormatElement::StaticText { text: "\"\"\"" })?;
|
||||
f.write_element(FormatElement::DynamicText {
|
||||
text: body.to_string().into_boxed_str(),
|
||||
source_position: TextSize::default(),
|
||||
})?;
|
||||
f.write_element(FormatElement::StaticText { text: "\"\"\"" })?;
|
||||
Ok(())
|
||||
}
|
||||
} else {
|
||||
unreachable!("Invalid quote length: {}", trailing_quote.len());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn string_literal_part(range: Range) -> StringLiteralPart {
|
||||
StringLiteralPart { range }
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct StringLiteral<'a> {
|
||||
expr: &'a Expr,
|
||||
}
|
||||
|
||||
impl Format<ASTFormatContext<'_>> for StringLiteral<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
|
||||
let expr = self.expr;
|
||||
|
||||
// TODO(charlie): This tokenization needs to happen earlier, so that we can attach
|
||||
// comments to individual string literals.
|
||||
let (source, start, end) = f.context().locator().slice(Range::from_located(expr));
|
||||
let elts =
|
||||
rustpython_parser::lexer::lex_located(&source[start..end], Mode::Module, expr.location)
|
||||
.flatten()
|
||||
.filter_map(|(start, tok, end)| {
|
||||
if matches!(tok, Tok::String { .. }) {
|
||||
Some(Range::new(start, end))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
write!(
|
||||
f,
|
||||
[group(&format_with(|f| {
|
||||
if matches!(expr.parentheses, Parenthesize::IfExpanded) {
|
||||
write!(f, [if_group_breaks(&text("("))])?;
|
||||
}
|
||||
for (i, elt) in elts.iter().enumerate() {
|
||||
write!(f, [string_literal_part(*elt)])?;
|
||||
if i < elts.len() - 1 {
|
||||
write!(f, [soft_line_break_or_space()])?;
|
||||
}
|
||||
}
|
||||
if matches!(expr.parentheses, Parenthesize::IfExpanded) {
|
||||
write!(f, [if_group_breaks(&text(")"))])?;
|
||||
}
|
||||
Ok(())
|
||||
}))]
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn string_literal(expr: &Expr) -> StringLiteral {
|
||||
StringLiteral { expr }
|
||||
}
|
||||
|
||||
/// Escape a string body to be used in a string literal with double quotes.
|
||||
fn double_escape(text: &str) -> String {
|
||||
let mut escaped = String::with_capacity(text.len());
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let Some(next) = chars.peek() else {
|
||||
break;
|
||||
};
|
||||
if *next == '\'' {
|
||||
chars.next();
|
||||
escaped.push('\'');
|
||||
} else if *next == '"' {
|
||||
chars.next();
|
||||
escaped.push('"');
|
||||
} else if *next == '\\' {
|
||||
chars.next();
|
||||
escaped.push('\\');
|
||||
escaped.push(ch);
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
} else if ch == '"' {
|
||||
escaped.push('\\');
|
||||
escaped.push('"');
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
/// Escape a string body to be used in a string literal with single quotes.
|
||||
fn single_escape(text: &str) -> String {
|
||||
let mut escaped = String::with_capacity(text.len());
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let Some(next) = chars.peek() else {
|
||||
break;
|
||||
};
|
||||
if *next == '"' {
|
||||
chars.next();
|
||||
escaped.push('"');
|
||||
} else if *next == '\'' {
|
||||
chars.next();
|
||||
escaped.push('\'');
|
||||
} else if *next == '\\' {
|
||||
chars.next();
|
||||
escaped.push('\\');
|
||||
escaped.push(ch);
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
} else if ch == '\'' {
|
||||
escaped.push('\\');
|
||||
escaped.push('\'');
|
||||
} else {
|
||||
escaped.push(ch);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_double_escape() {
|
||||
assert_eq!(double_escape(r#"It\'s mine"#), r#"It's mine"#);
|
||||
assert_eq!(double_escape(r#"It\'s "mine""#), r#"It's \"mine\""#);
|
||||
assert_eq!(double_escape(r#"It\\'s mine"#), r#"It\\'s mine"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_escape() {
|
||||
assert_eq!(single_escape(r#"It's \"mine\""#), r#"It\'s "mine""#);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue