ruff/crates/ruff_python_formatter/src/context.rs
Dhruv Manilawala 189e947808
Split string formatting to individual nodes (#9058)
This PR splits the string formatting code in the formatter to be handled
by the respective nodes.

Previously, the string formatting was done through a single
`FormatString` interface. Now, the nodes themselves are responsible for
formatting.

The following changes were made:
1. Remove `StringLayout::ImplicitStringConcatenationInBinaryLike` and
inline the call to `FormatStringContinuation`. After the refactor, the
binary like formatting would delegate to `FormatString` which would then
delegate to `FormatStringContinuation`. This removes the intermediary
steps.
2. Add formatter implementation for `FStringPart` which delegates it to
the respective string literal or f-string node.
3. Add `ExprStringLiteralKind` which is either `String` or `Docstring`.
If it's a docstring variant, then the string expression would not be
implicitly concatenated. This is guaranteed by the
`DocstringStmt::try_from_expression` constructor.
4. Add `StringLiteralKind` which is either a `String`, `Docstring` or
`InImplicitlyConcatenatedFString`. The last variant is for when the
string literal is implicitly concatenated with an f-string (`"foo" f"bar
{x}"`).
5. Remove `FormatString`.
6. Extract the f-string quote detection as a standalone function which
is public to the crate. This is used to detect the quote to be used for
an f-string at the expression level (`ExprFString` or
`FormatStringContinuation`).


### Formatter ecosystem result

**This PR**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |

**main**

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75804 | 1799 | 1648 |
| django | 0.99984 | 2772 | 34 |
| home-assistant | 0.99955 | 10596 | 214 |
| poetry | 0.99905 | 321 | 15 |
| transformers | 0.99967 | 2657 | 324 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99980 | 3669 | 18 |
| warehouse | 0.99976 | 654 | 14 |
| zulip | 0.99958 | 1459 | 36 |
2023-12-14 12:55:10 -06:00

334 lines
9.6 KiB
Rust

use crate::comments::Comments;
use crate::string::QuoteChar;
use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_source_file::Locator;
use std::fmt::{Debug, Formatter};
use std::ops::{Deref, DerefMut};
#[derive(Clone)]
pub struct PyFormatContext<'a> {
options: PyFormatOptions,
contents: &'a str,
comments: Comments<'a>,
node_level: NodeLevel,
indent_level: IndentLevel,
/// Set to a non-None value when the formatter is running on a code
/// snippet within a docstring. The value should be the quote character of the
/// docstring containing the code snippet.
///
/// Various parts of the formatter may inspect this state to change how it
/// works. For example, multi-line strings will always be written with a
/// quote style that is inverted from the one here in order to ensure that
/// the formatted Python code will be valid.
docstring: Option<QuoteChar>,
}
impl<'a> PyFormatContext<'a> {
pub(crate) fn new(options: PyFormatOptions, contents: &'a str, comments: Comments<'a>) -> Self {
Self {
options,
contents,
comments,
node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other),
indent_level: IndentLevel::new(0),
docstring: None,
}
}
pub(crate) fn source(&self) -> &'a str {
self.contents
}
#[allow(unused)]
pub(crate) fn locator(&self) -> Locator<'a> {
Locator::new(self.contents)
}
pub(crate) fn set_node_level(&mut self, level: NodeLevel) {
self.node_level = level;
}
pub(crate) fn node_level(&self) -> NodeLevel {
self.node_level
}
pub(crate) fn set_indent_level(&mut self, level: IndentLevel) {
self.indent_level = level;
}
pub(crate) fn indent_level(&self) -> IndentLevel {
self.indent_level
}
pub(crate) fn comments(&self) -> &Comments<'a> {
&self.comments
}
/// Returns a non-None value only if the formatter is running on a code
/// snippet within a docstring.
///
/// The quote character returned corresponds to the quoting used for the
/// docstring containing the code snippet currently being formatted.
pub(crate) fn docstring(&self) -> Option<QuoteChar> {
self.docstring
}
/// Return a new context suitable for formatting code snippets within a
/// docstring.
///
/// The quote character given should correspond to the quote character used
/// for the docstring containing the code snippets.
pub(crate) fn in_docstring(self, quote: QuoteChar) -> PyFormatContext<'a> {
PyFormatContext {
docstring: Some(quote),
..self
}
}
/// Returns `true` if preview mode is enabled.
pub(crate) const fn is_preview(&self) -> bool {
self.options.preview().is_enabled()
}
}
impl FormatContext for PyFormatContext<'_> {
type Options = PyFormatOptions;
fn options(&self) -> &Self::Options {
&self.options
}
fn source_code(&self) -> SourceCode {
SourceCode::new(self.contents)
}
}
impl Debug for PyFormatContext<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PyFormatContext")
.field("options", &self.options)
.field("comments", &self.comments.debug(self.source_code()))
.field("node_level", &self.node_level)
.field("source", &self.contents)
.finish()
}
}
/// The position of a top-level statement in the module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
pub(crate) enum TopLevelStatementPosition {
/// This is the last top-level statement in the module.
Last,
/// Any other top-level statement.
#[default]
Other,
}
/// What's the enclosing level of the outer node.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum NodeLevel {
/// Formatting statements on the module level.
TopLevel(TopLevelStatementPosition),
/// Formatting the body statements of a [compound statement](https://docs.python.org/3/reference/compound_stmts.html#compound-statements)
/// (`if`, `while`, `match`, etc.).
CompoundStatement,
/// The root or any sub-expression.
Expression(Option<GroupId>),
/// Formatting nodes that are enclosed by a parenthesized (any `[]`, `{}` or `()`) expression.
ParenthesizedExpression,
}
impl Default for NodeLevel {
fn default() -> Self {
Self::TopLevel(TopLevelStatementPosition::Other)
}
}
impl NodeLevel {
/// Returns `true` if the expression is in a parenthesized context.
pub(crate) const fn is_parenthesized(self) -> bool {
matches!(
self,
NodeLevel::Expression(Some(_)) | NodeLevel::ParenthesizedExpression
)
}
/// Returns `true` if this is the last top-level statement in the module.
pub(crate) const fn is_last_top_level_statement(self) -> bool {
matches!(self, NodeLevel::TopLevel(TopLevelStatementPosition::Last))
}
}
/// Change the [`NodeLevel`] of the formatter for the lifetime of this struct
pub(crate) struct WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
buffer: &'buf mut B,
saved_level: NodeLevel,
}
impl<'ast, 'buf, B> WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
pub(crate) fn new(level: NodeLevel, buffer: &'buf mut B) -> Self {
let context = buffer.state_mut().context_mut();
let saved_level = context.node_level();
context.set_node_level(level);
Self {
buffer,
saved_level,
}
}
}
impl<'ast, 'buf, B> Deref for WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
self.buffer
}
}
impl<'ast, 'buf, B> DerefMut for WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
self.buffer
}
}
impl<'ast, B> Drop for WithNodeLevel<'ast, '_, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_node_level(self.saved_level);
}
}
/// The current indent level of the formatter.
///
/// One can determine the the width of the indent itself (in number of ASCII
/// space characters) by multiplying the indent level by the configured indent
/// width.
///
/// This is specifically used inside the docstring code formatter for
/// implementing its "dynamic" line width mode. Namely, in the nested call to
/// the formatter, when "dynamic" mode is enabled, the line width is set to
/// `min(1, line_width - indent_level * indent_width)`, where `line_width` in
/// this context is the global line width setting.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct IndentLevel {
/// The numeric level. It is incremented for every whole indent in Python
/// source code.
///
/// Note that the first indentation level is actually 1, since this starts
/// at 0 and is incremented when the first top-level statement is seen. So
/// even though the first top-level statement in Python source will have no
/// indentation, its indentation level is 1.
level: u16,
}
impl IndentLevel {
/// Returns a new indent level for the given value.
pub(crate) fn new(level: u16) -> IndentLevel {
IndentLevel { level }
}
/// Returns the next indent level.
pub(crate) fn increment(self) -> IndentLevel {
IndentLevel {
level: self.level.saturating_add(1),
}
}
/// Convert this indent level into a specific number of ASCII whitespace
/// characters based on the given indent width.
pub(crate) fn to_ascii_spaces(self, width: IndentWidth) -> u16 {
let width = u16::try_from(width.value()).unwrap_or(u16::MAX);
// Why the subtraction? IndentLevel starts at 0 and asks for the "next"
// indent level before seeing the first top-level statement. So it's
// always 1 more than what we expect it to be.
let level = self.level.saturating_sub(1);
width.saturating_mul(level)
}
}
/// Change the [`IndentLevel`] of the formatter for the lifetime of this
/// struct.
pub(crate) struct WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
buffer: D,
saved_level: IndentLevel,
}
impl<'a, B, D> WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
pub(crate) fn new(level: IndentLevel, mut buffer: D) -> Self {
let context = buffer.state_mut().context_mut();
let saved_level = context.indent_level();
context.set_indent_level(level);
Self {
buffer,
saved_level,
}
}
}
impl<'a, B, D> Deref for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
&self.buffer
}
}
impl<'a, B, D> DerefMut for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.buffer
}
}
impl<'a, B, D> Drop for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_indent_level(self.saved_level);
}
}