mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-06 00:20:37 +00:00

This PR splits the string formatting code in the formatter to be handled by the respective nodes. Previously, the string formatting was done through a single `FormatString` interface. Now, the nodes themselves are responsible for formatting. The following changes were made: 1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and inline the call to `FormatStringContinuation`. After the refactor, the binary like formatting would delegate to `FormatString` which would then delegate to `FormatStringContinuation`. This removes the intermediary steps. 2. Add formatter implementation for `FStringPart` which delegates it to the respective string literal or f-string node. 3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`. If it's a docstring variant, then the string expression would not be implicitly concatenated. This is guaranteed by the `DocstringStmt::try_from_expression` constructor. 4. Add `StringLiteralKind` which is either a `String`, `Docstring` or `InImplicitlyConcatenatedFString`. The last variant is for when the string literal is implicitly concatenated with an f-string (`"foo" f"bar {x}"`). 5. Remove `FormatString`. 6. Extract the f-string quote detection as a standalone function which is public to the crate. This is used to detect the quote to be used for an f-string at the expression level (`ExprFString` or `FormatStringContinuation`). ### Formatter ecosystem result **This PR** | project | similarity index | total files | changed files | |----------------|------------------:|------------------:|------------------:| | cpython | 0.75804 | 1799 | 1648 | | django | 0.99984 | 2772 | 34 | | home-assistant | 0.99955 | 10596 | 214 | | poetry | 0.99905 | 321 | 15 | | transformers | 0.99967 | 2657 | 324 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99980 | 3669 | 18 | | warehouse | 0.99976 | 654 | 14 | | zulip | 0.99958 | 1459 | 36 | **main** | project | similarity index | total files | changed files | |----------------|------------------:|------------------:|------------------:| | cpython | 0.75804 | 1799 | 1648 | | django | 0.99984 | 2772 | 34 | | home-assistant | 0.99955 | 10596 | 214 | | poetry | 0.99905 | 321 | 15 | | transformers | 0.99967 | 2657 | 324 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99980 | 3669 | 18 | | warehouse | 0.99976 | 654 | 14 | | zulip | 0.99958 | 1459 | 36 |
334 lines
9.6 KiB
Rust
334 lines
9.6 KiB
Rust
use crate::comments::Comments;
|
|
use crate::string::QuoteChar;
|
|
use crate::PyFormatOptions;
|
|
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
|
|
use ruff_source_file::Locator;
|
|
use std::fmt::{Debug, Formatter};
|
|
use std::ops::{Deref, DerefMut};
|
|
|
|
#[derive(Clone)]
|
|
pub struct PyFormatContext<'a> {
|
|
options: PyFormatOptions,
|
|
contents: &'a str,
|
|
comments: Comments<'a>,
|
|
node_level: NodeLevel,
|
|
indent_level: IndentLevel,
|
|
/// Set to a non-None value when the formatter is running on a code
|
|
/// snippet within a docstring. The value should be the quote character of the
|
|
/// docstring containing the code snippet.
|
|
///
|
|
/// Various parts of the formatter may inspect this state to change how it
|
|
/// works. For example, multi-line strings will always be written with a
|
|
/// quote style that is inverted from the one here in order to ensure that
|
|
/// the formatted Python code will be valid.
|
|
docstring: Option<QuoteChar>,
|
|
}
|
|
|
|
impl<'a> PyFormatContext<'a> {
|
|
pub(crate) fn new(options: PyFormatOptions, contents: &'a str, comments: Comments<'a>) -> Self {
|
|
Self {
|
|
options,
|
|
contents,
|
|
comments,
|
|
node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other),
|
|
indent_level: IndentLevel::new(0),
|
|
docstring: None,
|
|
}
|
|
}
|
|
|
|
pub(crate) fn source(&self) -> &'a str {
|
|
self.contents
|
|
}
|
|
|
|
#[allow(unused)]
|
|
pub(crate) fn locator(&self) -> Locator<'a> {
|
|
Locator::new(self.contents)
|
|
}
|
|
|
|
pub(crate) fn set_node_level(&mut self, level: NodeLevel) {
|
|
self.node_level = level;
|
|
}
|
|
|
|
pub(crate) fn node_level(&self) -> NodeLevel {
|
|
self.node_level
|
|
}
|
|
|
|
pub(crate) fn set_indent_level(&mut self, level: IndentLevel) {
|
|
self.indent_level = level;
|
|
}
|
|
|
|
pub(crate) fn indent_level(&self) -> IndentLevel {
|
|
self.indent_level
|
|
}
|
|
|
|
pub(crate) fn comments(&self) -> &Comments<'a> {
|
|
&self.comments
|
|
}
|
|
|
|
/// Returns a non-None value only if the formatter is running on a code
|
|
/// snippet within a docstring.
|
|
///
|
|
/// The quote character returned corresponds to the quoting used for the
|
|
/// docstring containing the code snippet currently being formatted.
|
|
pub(crate) fn docstring(&self) -> Option<QuoteChar> {
|
|
self.docstring
|
|
}
|
|
|
|
/// Return a new context suitable for formatting code snippets within a
|
|
/// docstring.
|
|
///
|
|
/// The quote character given should correspond to the quote character used
|
|
/// for the docstring containing the code snippets.
|
|
pub(crate) fn in_docstring(self, quote: QuoteChar) -> PyFormatContext<'a> {
|
|
PyFormatContext {
|
|
docstring: Some(quote),
|
|
..self
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if preview mode is enabled.
|
|
pub(crate) const fn is_preview(&self) -> bool {
|
|
self.options.preview().is_enabled()
|
|
}
|
|
}
|
|
|
|
impl FormatContext for PyFormatContext<'_> {
|
|
type Options = PyFormatOptions;
|
|
|
|
fn options(&self) -> &Self::Options {
|
|
&self.options
|
|
}
|
|
|
|
fn source_code(&self) -> SourceCode {
|
|
SourceCode::new(self.contents)
|
|
}
|
|
}
|
|
|
|
impl Debug for PyFormatContext<'_> {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("PyFormatContext")
|
|
.field("options", &self.options)
|
|
.field("comments", &self.comments.debug(self.source_code()))
|
|
.field("node_level", &self.node_level)
|
|
.field("source", &self.contents)
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
/// The position of a top-level statement in the module.
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
|
|
pub(crate) enum TopLevelStatementPosition {
|
|
/// This is the last top-level statement in the module.
|
|
Last,
|
|
/// Any other top-level statement.
|
|
#[default]
|
|
Other,
|
|
}
|
|
|
|
/// What's the enclosing level of the outer node.
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
|
pub(crate) enum NodeLevel {
|
|
/// Formatting statements on the module level.
|
|
TopLevel(TopLevelStatementPosition),
|
|
|
|
/// Formatting the body statements of a [compound statement](https://docs.python.org/3/reference/compound_stmts.html#compound-statements)
|
|
/// (`if`, `while`, `match`, etc.).
|
|
CompoundStatement,
|
|
|
|
/// The root or any sub-expression.
|
|
Expression(Option<GroupId>),
|
|
|
|
/// Formatting nodes that are enclosed by a parenthesized (any `[]`, `{}` or `()`) expression.
|
|
ParenthesizedExpression,
|
|
}
|
|
|
|
impl Default for NodeLevel {
|
|
fn default() -> Self {
|
|
Self::TopLevel(TopLevelStatementPosition::Other)
|
|
}
|
|
}
|
|
|
|
impl NodeLevel {
|
|
/// Returns `true` if the expression is in a parenthesized context.
|
|
pub(crate) const fn is_parenthesized(self) -> bool {
|
|
matches!(
|
|
self,
|
|
NodeLevel::Expression(Some(_)) | NodeLevel::ParenthesizedExpression
|
|
)
|
|
}
|
|
|
|
/// Returns `true` if this is the last top-level statement in the module.
|
|
pub(crate) const fn is_last_top_level_statement(self) -> bool {
|
|
matches!(self, NodeLevel::TopLevel(TopLevelStatementPosition::Last))
|
|
}
|
|
}
|
|
|
|
/// Change the [`NodeLevel`] of the formatter for the lifetime of this struct
|
|
pub(crate) struct WithNodeLevel<'ast, 'buf, B>
|
|
where
|
|
B: Buffer<Context = PyFormatContext<'ast>>,
|
|
{
|
|
buffer: &'buf mut B,
|
|
saved_level: NodeLevel,
|
|
}
|
|
|
|
impl<'ast, 'buf, B> WithNodeLevel<'ast, 'buf, B>
|
|
where
|
|
B: Buffer<Context = PyFormatContext<'ast>>,
|
|
{
|
|
pub(crate) fn new(level: NodeLevel, buffer: &'buf mut B) -> Self {
|
|
let context = buffer.state_mut().context_mut();
|
|
let saved_level = context.node_level();
|
|
|
|
context.set_node_level(level);
|
|
|
|
Self {
|
|
buffer,
|
|
saved_level,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'ast, 'buf, B> Deref for WithNodeLevel<'ast, 'buf, B>
|
|
where
|
|
B: Buffer<Context = PyFormatContext<'ast>>,
|
|
{
|
|
type Target = B;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
self.buffer
|
|
}
|
|
}
|
|
|
|
impl<'ast, 'buf, B> DerefMut for WithNodeLevel<'ast, 'buf, B>
|
|
where
|
|
B: Buffer<Context = PyFormatContext<'ast>>,
|
|
{
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
self.buffer
|
|
}
|
|
}
|
|
|
|
impl<'ast, B> Drop for WithNodeLevel<'ast, '_, B>
|
|
where
|
|
B: Buffer<Context = PyFormatContext<'ast>>,
|
|
{
|
|
fn drop(&mut self) {
|
|
self.buffer
|
|
.state_mut()
|
|
.context_mut()
|
|
.set_node_level(self.saved_level);
|
|
}
|
|
}
|
|
|
|
/// The current indent level of the formatter.
|
|
///
|
|
/// One can determine the the width of the indent itself (in number of ASCII
|
|
/// space characters) by multiplying the indent level by the configured indent
|
|
/// width.
|
|
///
|
|
/// This is specifically used inside the docstring code formatter for
|
|
/// implementing its "dynamic" line width mode. Namely, in the nested call to
|
|
/// the formatter, when "dynamic" mode is enabled, the line width is set to
|
|
/// `min(1, line_width - indent_level * indent_width)`, where `line_width` in
|
|
/// this context is the global line width setting.
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
|
pub(crate) struct IndentLevel {
|
|
/// The numeric level. It is incremented for every whole indent in Python
|
|
/// source code.
|
|
///
|
|
/// Note that the first indentation level is actually 1, since this starts
|
|
/// at 0 and is incremented when the first top-level statement is seen. So
|
|
/// even though the first top-level statement in Python source will have no
|
|
/// indentation, its indentation level is 1.
|
|
level: u16,
|
|
}
|
|
|
|
impl IndentLevel {
|
|
/// Returns a new indent level for the given value.
|
|
pub(crate) fn new(level: u16) -> IndentLevel {
|
|
IndentLevel { level }
|
|
}
|
|
|
|
/// Returns the next indent level.
|
|
pub(crate) fn increment(self) -> IndentLevel {
|
|
IndentLevel {
|
|
level: self.level.saturating_add(1),
|
|
}
|
|
}
|
|
|
|
/// Convert this indent level into a specific number of ASCII whitespace
|
|
/// characters based on the given indent width.
|
|
pub(crate) fn to_ascii_spaces(self, width: IndentWidth) -> u16 {
|
|
let width = u16::try_from(width.value()).unwrap_or(u16::MAX);
|
|
// Why the subtraction? IndentLevel starts at 0 and asks for the "next"
|
|
// indent level before seeing the first top-level statement. So it's
|
|
// always 1 more than what we expect it to be.
|
|
let level = self.level.saturating_sub(1);
|
|
width.saturating_mul(level)
|
|
}
|
|
}
|
|
|
|
/// Change the [`IndentLevel`] of the formatter for the lifetime of this
|
|
/// struct.
|
|
pub(crate) struct WithIndentLevel<'a, B, D>
|
|
where
|
|
D: DerefMut<Target = B>,
|
|
B: Buffer<Context = PyFormatContext<'a>>,
|
|
{
|
|
buffer: D,
|
|
saved_level: IndentLevel,
|
|
}
|
|
|
|
impl<'a, B, D> WithIndentLevel<'a, B, D>
|
|
where
|
|
D: DerefMut<Target = B>,
|
|
B: Buffer<Context = PyFormatContext<'a>>,
|
|
{
|
|
pub(crate) fn new(level: IndentLevel, mut buffer: D) -> Self {
|
|
let context = buffer.state_mut().context_mut();
|
|
let saved_level = context.indent_level();
|
|
|
|
context.set_indent_level(level);
|
|
|
|
Self {
|
|
buffer,
|
|
saved_level,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, B, D> Deref for WithIndentLevel<'a, B, D>
|
|
where
|
|
D: DerefMut<Target = B>,
|
|
B: Buffer<Context = PyFormatContext<'a>>,
|
|
{
|
|
type Target = B;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.buffer
|
|
}
|
|
}
|
|
|
|
impl<'a, B, D> DerefMut for WithIndentLevel<'a, B, D>
|
|
where
|
|
D: DerefMut<Target = B>,
|
|
B: Buffer<Context = PyFormatContext<'a>>,
|
|
{
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.buffer
|
|
}
|
|
}
|
|
|
|
impl<'a, B, D> Drop for WithIndentLevel<'a, B, D>
|
|
where
|
|
D: DerefMut<Target = B>,
|
|
B: Buffer<Context = PyFormatContext<'a>>,
|
|
{
|
|
fn drop(&mut self) {
|
|
self.buffer
|
|
.state_mut()
|
|
.context_mut()
|
|
.set_indent_level(self.saved_level);
|
|
}
|
|
}
|