From 615337a54d26b76e450707ed378431e93ce7557d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 31 Jul 2023 16:58:15 -0400 Subject: [PATCH] Remove newline-insertion logic from `JoinNodesBuilder` (#6205) ## Summary This PR moves the "insert empty lines" behavior out of `JoinNodesBuilder` and into the `Suite` formatter. I find it a little confusing that the logic is split between those two formatters right now, and since this is _only_ used in that one place, IMO it is a bit simpler to just inline it and use a single approach to tracking state (right now, both are stateful). The only other place this was used was for decorators. As a side effect, we now remove blank lines in both of these cases, which is a known but intentional deviation from Black (which preserves the empty line before the comment in the first case): ```python @foo # Hello @bar def baz(): pass @foo @bar def baz(): pass ``` --- crates/ruff_python_formatter/src/builders.rs | 238 +----------------- .../src/statement/stmt_function_def.rs | 6 +- .../src/statement/suite.rs | 157 +++++++----- ...patibility@simple_cases__comments9.py.snap | 13 +- 4 files changed, 111 insertions(+), 303 deletions(-) diff --git a/crates/ruff_python_formatter/src/builders.rs b/crates/ruff_python_formatter/src/builders.rs index 441390118d..84bdcba390 100644 --- a/crates/ruff_python_formatter/src/builders.rs +++ b/crates/ruff_python_formatter/src/builders.rs @@ -1,12 +1,9 @@ +use ruff_formatter::{format_args, write, Argument, Arguments}; use ruff_python_ast::Ranged; +use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer}; use ruff_text_size::{TextRange, TextSize}; use crate::comments::{dangling_comments, SourceComment}; -use ruff_formatter::{format_args, write, Argument, Arguments}; -use ruff_python_trivia::{ - lines_after, skip_trailing_trivia, SimpleToken, SimpleTokenKind, SimpleTokenizer, -}; - use crate::context::NodeLevel; use crate::prelude::*; use crate::MagicTrailingComma; @@ -47,15 +44,6 @@ impl<'ast> Format> for ParenthesizeIfExpands<'_, 'ast> { /// Provides Python specific extensions to [`Formatter`]. pub(crate) trait PyFormatterExtensions<'ast, 'buf> { - /// Creates a joiner that inserts the appropriate number of empty lines between two nodes, depending on the - /// line breaks that separate the two nodes in the source document. The `level` customizes the maximum allowed - /// empty lines between any two nodes. Separates any two nodes by at least a hard line break. - /// - /// * [`NodeLevel::Module`]: Up to two empty lines - /// * [`NodeLevel::CompoundStatement`]: Up to one empty line - /// * [`NodeLevel::Expression`]: No empty lines - fn join_nodes<'fmt>(&'fmt mut self, level: NodeLevel) -> JoinNodesBuilder<'fmt, 'ast, 'buf>; - /// A builder that separates each element by a `,` and a [`soft_line_break_or_space`]. /// It emits a trailing `,` that is only shown if the enclosing group expands. It forces the enclosing /// group to expand if the last item has a trailing `comma` and the magical comma option is enabled. @@ -66,10 +54,6 @@ pub(crate) trait PyFormatterExtensions<'ast, 'buf> { } impl<'buf, 'ast> PyFormatterExtensions<'ast, 'buf> for PyFormatter<'ast, 'buf> { - fn join_nodes<'fmt>(&'fmt mut self, level: NodeLevel) -> JoinNodesBuilder<'fmt, 'ast, 'buf> { - JoinNodesBuilder::new(self, level) - } - fn join_comma_separated<'fmt>( &'fmt mut self, sequence_end: TextSize, @@ -78,130 +62,6 @@ impl<'buf, 'ast> PyFormatterExtensions<'ast, 'buf> for PyFormatter<'ast, 'buf> { } } -#[must_use = "must eventually call `finish()` on the builder."] -pub(crate) struct JoinNodesBuilder<'fmt, 'ast, 'buf> { - fmt: &'fmt mut PyFormatter<'ast, 'buf>, - result: FormatResult<()>, - last_end: Option, - node_level: NodeLevel, -} - -impl<'fmt, 'ast, 'buf> JoinNodesBuilder<'fmt, 'ast, 'buf> { - fn new(fmt: &'fmt mut PyFormatter<'ast, 'buf>, level: NodeLevel) -> Self { - Self { - fmt, - result: Ok(()), - last_end: None, - node_level: level, - } - } - - /// Writes a `node`, inserting the appropriate number of line breaks depending on the number of - /// line breaks that were present in the source document. Uses `content` to format the `node`. - pub(crate) fn entry(&mut self, node: &T, content: &dyn Format>) - where - T: Ranged, - { - let node_level = self.node_level; - - self.result = self.result.and_then(|_| { - if let Some(last_end) = self.last_end.replace(node.end()) { - let source = self.fmt.context().source(); - let count_lines = |offset| { - // It's necessary to skip any trailing line comment because RustPython doesn't include trailing comments - // in the node's range - // ```python - // a # The range of `a` ends right before this comment - // - // b - // ``` - // - // Simply using `lines_after` doesn't work if a statement has a trailing comment because - // it then counts the lines between the statement and the trailing comment, which is - // always 0. This is why it skips any trailing trivia (trivia that's on the same line) - // and counts the lines after. - let after_trailing_trivia = skip_trailing_trivia(offset, source); - lines_after(after_trailing_trivia, source) - }; - - match node_level { - NodeLevel::TopLevel => match count_lines(last_end) { - 0 | 1 => hard_line_break().fmt(self.fmt), - 2 => empty_line().fmt(self.fmt), - _ => write!(self.fmt, [empty_line(), empty_line()]), - }, - NodeLevel::CompoundStatement => match count_lines(last_end) { - 0 | 1 => hard_line_break().fmt(self.fmt), - _ => empty_line().fmt(self.fmt), - }, - NodeLevel::Expression(_) | NodeLevel::ParenthesizedExpression => { - hard_line_break().fmt(self.fmt) - } - }?; - } - - content.fmt(self.fmt) - }); - } - - /// Writes a sequence of node with their content tuples, inserting the appropriate number of line breaks between any two of them - /// depending on the number of line breaks that exist in the source document. - #[allow(unused)] - pub(crate) fn entries(&mut self, entries: I) -> &mut Self - where - T: Ranged, - F: Format>, - I: IntoIterator, - { - for (node, content) in entries { - self.entry(&node, &content); - } - - self - } - - /// Writes a sequence of nodes, using their [`AsFormat`] implementation to format the content. - /// Inserts the appropriate number of line breaks between any two nodes, depending on the number of - /// line breaks in the source document. - #[allow(unused)] - pub(crate) fn nodes<'a, T, I>(&mut self, nodes: I) -> &mut Self - where - T: Ranged + AsFormat> + 'a, - I: IntoIterator, - { - for node in nodes { - self.entry(node, &node.format()); - } - - self - } - - /// Writes a single entry using the specified separator to separate the entry from a previous entry. - pub(crate) fn entry_with_separator( - &mut self, - separator: &dyn Format>, - content: &dyn Format>, - node: &T, - ) where - T: Ranged, - { - self.result = self.result.and_then(|_| { - if self.last_end.is_some() { - separator.fmt(self.fmt)?; - } - - self.last_end = Some(node.end()); - - content.fmt(self.fmt) - }); - } - - /// Finishes the joiner and gets the format result. - pub(crate) fn finish(&mut self) -> FormatResult<()> { - self.result - } -} - #[derive(Copy, Clone, Debug)] enum Entries { /// No previous entry @@ -403,97 +263,3 @@ impl<'ast> Format> for EmptyWithDanglingComments<'_> { ) } } - -#[cfg(test)] -mod tests { - use ruff_python_ast::ModModule; - use ruff_python_parser::Parse; - - use ruff_formatter::format; - - use crate::comments::Comments; - use crate::context::{NodeLevel, PyFormatContext}; - use crate::prelude::*; - use crate::PyFormatOptions; - - fn format_ranged(level: NodeLevel) -> String { - let source = r#" -a = 10 - - - -three_leading_newlines = 80 - - -two_leading_newlines = 20 - -one_leading_newline = 10 -no_leading_newline = 30 -"#; - - let module = ModModule::parse(source, "test.py").unwrap(); - - let context = PyFormatContext::new(PyFormatOptions::default(), source, Comments::default()); - - let test_formatter = - format_with(|f: &mut PyFormatter| f.join_nodes(level).nodes(&module.body).finish()); - - let formatted = format!(context, [test_formatter]).unwrap(); - let printed = formatted.print().unwrap(); - - printed.as_code().to_string() - } - - // Keeps up to two empty lines - #[test] - fn ranged_builder_top_level() { - let printed = format_ranged(NodeLevel::TopLevel); - - assert_eq!( - &printed, - r#"a = 10 - - -three_leading_newlines = 80 - - -two_leading_newlines = 20 - -one_leading_newline = 10 -no_leading_newline = 30"# - ); - } - - // Should keep at most one empty level - #[test] - fn ranged_builder_statement_level() { - let printed = format_ranged(NodeLevel::CompoundStatement); - - assert_eq!( - &printed, - r#"a = 10 - -three_leading_newlines = 80 - -two_leading_newlines = 20 - -one_leading_newline = 10 -no_leading_newline = 30"# - ); - } - - // Removes all empty lines - #[test] - fn ranged_builder_parenthesized_level() { - let printed = format_ranged(NodeLevel::Expression(None)); - - assert_eq!( - &printed, - r#"a = 10 -three_leading_newlines = 80 -two_leading_newlines = 20 -one_leading_newline = 10 -no_leading_newline = 30"# - ); - } -} diff --git a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs index 91b20899ca..27d569b5b3 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs @@ -5,7 +5,7 @@ use ruff_python_ast::function::AnyFunctionDefinition; use ruff_python_trivia::{lines_after, skip_trailing_trivia}; use crate::comments::{leading_comments, trailing_comments}; -use crate::context::NodeLevel; + use crate::expression::parentheses::{optional_parentheses, Parentheses}; use crate::prelude::*; use crate::FormatNodeRule; @@ -47,8 +47,8 @@ impl FormatRule, PyFormatContext<'_>> for FormatAnyFun dangling_comments.split_at(trailing_definition_comments_start); if let Some(last_decorator) = item.decorators().last() { - f.join_nodes(NodeLevel::CompoundStatement) - .nodes(item.decorators()) + f.join_with(hard_line_break()) + .entries(item.decorators().iter().formatted()) .finish()?; if leading_function_definition_comments.is_empty() { diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 5bfd8ed4c0..acce13d79c 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -1,10 +1,7 @@ -use ruff_python_ast::{Ranged, Stmt, Suite}; - -use ruff_formatter::{ - format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions, -}; +use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions}; use ruff_python_ast::helpers::is_compound_statement; -use ruff_python_trivia::lines_before; +use ruff_python_ast::{Ranged, Stmt, Suite}; +use ruff_python_trivia::{lines_after, lines_before, skip_trailing_trivia}; use crate::context::NodeLevel; use crate::prelude::*; @@ -51,52 +48,50 @@ impl FormatRule> for FormatSuite { let saved_level = f.context().node_level(); f.context_mut().set_node_level(node_level); - let mut joiner = f.join_nodes(node_level); + // Wrap the entire formatting operation in a `format_with` to ensure that we restore + // context regardless of whether an error occurs. + let formatted = format_with(|f| { + let mut iter = statements.iter(); + let Some(first) = iter.next() else { + return Ok(()); + }; - let mut iter = statements.iter(); - let Some(first) = iter.next() else { - return Ok(()); - }; + // First entry has never any separator, doesn't matter which one we take. + write!(f, [first.format()])?; - // First entry has never any separator, doesn't matter which one we take. - joiner.entry(first, &first.format()); + let mut last = first; - let mut last = first; - - for statement in iter { - if is_class_or_function_definition(last) || is_class_or_function_definition(statement) { - match self.level { - SuiteLevel::TopLevel => { - joiner.entry_with_separator( - &format_args![empty_line(), empty_line()], - &statement.format(), - statement, - ); + for statement in iter { + if is_class_or_function_definition(last) + || is_class_or_function_definition(statement) + { + match self.level { + SuiteLevel::TopLevel => { + write!(f, [empty_line(), empty_line(), statement.format()])?; + } + SuiteLevel::Nested => { + write!(f, [empty_line(), statement.format()])?; + } } - SuiteLevel::Nested => { - joiner.entry_with_separator(&empty_line(), &statement.format(), statement); - } - } - } else if is_import_definition(last) && !is_import_definition(statement) { - joiner.entry_with_separator(&empty_line(), &statement.format(), statement); - } else if is_compound_statement(last) { - // Handles the case where a body has trailing comments. The issue is that RustPython does not include - // the comments in the range of the suite. This means, the body ends right after the last statement in the body. - // ```python - // def test(): - // ... - // # The body of `test` ends right after `...` and before this comment - // - // # leading comment - // - // - // a = 10 - // ``` - // Using `lines_after` for the node doesn't work because it would count the lines after the `...` - // which is 0 instead of 1, the number of lines between the trailing comment and - // the leading comment. This is why the suite handling counts the lines before the - // start of the next statement or before the first leading comments for compound statements. - let separator = format_with(|f| { + } else if is_import_definition(last) && !is_import_definition(statement) { + write!(f, [empty_line(), statement.format()])?; + } else if is_compound_statement(last) { + // Handles the case where a body has trailing comments. The issue is that RustPython does not include + // the comments in the range of the suite. This means, the body ends right after the last statement in the body. + // ```python + // def test(): + // ... + // # The body of `test` ends right after `...` and before this comment + // + // # leading comment + // + // + // a = 10 + // ``` + // Using `lines_after` for the node doesn't work because it would count the lines after the `...` + // which is 0 instead of 1, the number of lines between the trailing comment and + // the leading comment. This is why the suite handling counts the lines before the + // start of the next statement or before the first leading comments for compound statements. let start = if let Some(first_leading) = comments.leading_comments(statement).first() { first_leading.slice().start() @@ -105,27 +100,66 @@ impl FormatRule> for FormatSuite { }; match lines_before(start, source) { - 0 | 1 => hard_line_break().fmt(f), - 2 => empty_line().fmt(f), + 0 | 1 => write!(f, [hard_line_break()])?, + 2 => write!(f, [empty_line()])?, 3.. => { if self.level.is_nested() { - empty_line().fmt(f) + write!(f, [empty_line()])?; } else { - write!(f, [empty_line(), empty_line()]) + write!(f, [empty_line(), empty_line()])?; } } } - }); - joiner.entry_with_separator(&separator, &statement.format(), statement); - } else { - joiner.entry(statement, &statement.format()); + write!(f, [statement.format()])?; + } else { + // Insert the appropriate number of empty lines based on the node level, e.g.: + // * [`NodeLevel::Module`]: Up to two empty lines + // * [`NodeLevel::CompoundStatement`]: Up to one empty line + // * [`NodeLevel::Expression`]: No empty lines + + let count_lines = |offset| { + // It's necessary to skip any trailing line comment because RustPython doesn't include trailing comments + // in the node's range + // ```python + // a # The range of `a` ends right before this comment + // + // b + // ``` + // + // Simply using `lines_after` doesn't work if a statement has a trailing comment because + // it then counts the lines between the statement and the trailing comment, which is + // always 0. This is why it skips any trailing trivia (trivia that's on the same line) + // and counts the lines after. + let after_trailing_trivia = skip_trailing_trivia(offset, source); + lines_after(after_trailing_trivia, source) + }; + + match node_level { + NodeLevel::TopLevel => match count_lines(last.end()) { + 0 | 1 => write!(f, [hard_line_break()])?, + 2 => write!(f, [empty_line()])?, + _ => write!(f, [empty_line(), empty_line()])?, + }, + NodeLevel::CompoundStatement => match count_lines(last.end()) { + 0 | 1 => write!(f, [hard_line_break()])?, + _ => write!(f, [empty_line()])?, + }, + NodeLevel::Expression(_) | NodeLevel::ParenthesizedExpression => { + write!(f, [hard_line_break()])?; + } + } + + write!(f, [statement.format()])?; + } + + last = statement; } - last = statement; - } + Ok(()) + }); - let result = joiner.finish(); + let result = formatted.fmt(f); f.context_mut().set_node_level(saved_level); @@ -133,6 +167,7 @@ impl FormatRule> for FormatSuite { } } +/// Returns `true` if a [`Stmt`] is a class or function definition. const fn is_class_or_function_definition(stmt: &Stmt) -> bool { matches!( stmt, @@ -140,6 +175,7 @@ const fn is_class_or_function_definition(stmt: &Stmt) -> bool { ) } +/// Returns `true` if a [`Stmt`] is an import. const fn is_import_definition(stmt: &Stmt) -> bool { matches!(stmt, Stmt::Import(_) | Stmt::ImportFrom(_)) } @@ -171,11 +207,10 @@ impl<'ast> IntoFormat> for Suite { #[cfg(test)] mod tests { + use ruff_formatter::format; use ruff_python_ast::Suite; use ruff_python_parser::Parse; - use ruff_formatter::format; - use crate::comments::Comments; use crate::prelude::*; use crate::statement::suite::SuiteLevel; diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments9.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments9.py.snap index 2399d84c1b..ede65b6951 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments9.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments9.py.snap @@ -151,7 +151,15 @@ def bar(): ```diff --- Black +++ Ruff -@@ -106,7 +106,6 @@ +@@ -74,7 +74,6 @@ + @deco1 + # leading 2 + @deco2(with_args=True) +- + # leading 3 that already has an empty line + @deco3 + # leading 4 +@@ -106,7 +105,6 @@ # Another leading comment def another_inline(): pass @@ -159,7 +167,7 @@ def bar(): else: # More leading comments def inline_after_else(): -@@ -121,7 +120,6 @@ +@@ -121,7 +119,6 @@ # Another leading comment def another_top_level_quote_inline_inline(): pass @@ -248,7 +256,6 @@ some = statement @deco1 # leading 2 @deco2(with_args=True) - # leading 3 that already has an empty line @deco3 # leading 4