Correctly handle newlines after/before comments (#4895)

## Summary This issue fixes the removal of empty lines between a leading comment and the previous statement: ```python a = 20 # leading comment b = 10 ``` Ruff removed the empty line between `a` and `b` because: * The leading comments formatting does not preserve leading newlines (to avoid adding new lines at the top of a body) * The `JoinNodesBuilder` counted the lines before `b`, which is 1 -> Doesn't insert a new line This is fixed by changing the `JoinNodesBuilder` to count the lines instead *after* the last node. This correctly gives 1, and the `# leading comment` will insert the empty lines between any other leading comment or the node. ## Test Plan I added a new test for empty lines.
2025-10-05 16:10:36 +00:00 · 2023-06-07 14:49:43 +02:00 · 2023-06-07 14:49:43 +02:00 · 6ab3fc60f4
commit 6ab3fc60f4
parent 222ca98a41
19 changed files with 332 additions and 124 deletions
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__collections_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__collections_py.snap
@ -84,24 +84,7 @@ if True:
 ```diff
 --- Black
 +++ Ruff
-@@ -1,75 +1,49 @@
- import core, time, a
- 
- from . import A, B, C
-
- # keeps existing trailing comma
- from foo import (
-     bar,
- )
-
- # also keeps existing structure
- from foo import (
-     baz,
-     qux,
- )
-
- # `as` works as well
- from foo import (
+@@ -18,44 +18,26 @@
     xyzzy as magic,
 )
 
@ -154,12 +137,12 @@ if True:
 -    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa wraps %s"
 -    % bar
 -)
-
 +y = {"oneple": (1,),}
 +assert False, ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa wraps %s" % bar)
+ 
 # looping over a 1-tuple should also not get wrapped
 for x in (1,):
-     pass
+@@ -63,13 +45,9 @@
 for (x,) in (1,), (2,), (3,):
     pass
 
@ -175,7 +158,7 @@ if True:
 print("foo %r", (foo.bar,))
 
 if True:
-@@ -79,21 +53,15 @@
+@@ -79,21 +57,15 @@
     )
 
 if True:
@ -210,15 +193,18 @@ if True:
 import core, time, a

 from . import A, B, C
+
 # keeps existing trailing comma
 from foo import (
    bar,
 )
+
 # also keeps existing structure
 from foo import (
    baz,
    qux,
 )
+
 # `as` works as well
 from foo import (
    xyzzy as magic,
@ -244,6 +230,7 @@ nested_long_lines = ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbb
 x = {"oneple": (1,)}
 y = {"oneple": (1,),}
 assert False, ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa wraps %s" % bar)
+
 # looping over a 1-tuple should also not get wrapped
 for x in (1,):
    pass
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__fmtonoff3_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__fmtonoff3_py.snap
@ -30,11 +30,8 @@ x = [
 ```diff
 --- Black
 +++ Ruff
-@@ -10,6 +10,9 @@
-     1, 2,
-     3, 4,
+@@ -12,4 +12,6 @@
 ]
-+
 # fmt: on
 
 -x = [1, 2, 3, 4]
@ -58,7 +55,6 @@ x = [
    1, 2,
    3, 4,
 ]
-
 # fmt: on

 x = [
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__fmtonoff5_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__fmtonoff5_py.snap
@ -97,16 +97,7 @@ elif   unformatted:
 ```diff
 --- Black
 +++ Ruff
-@@ -9,8 +9,6 @@
-             ]  # Includes an formatted indentation.
-     },
- )
-
-
- # Regression test for https://github.com/psf/black/issues/2015.
- run(
-     # fmt: off
-@@ -44,7 +42,7 @@
+@@ -44,7 +44,7 @@
         print ( "This won't be formatted" )
     print ( "This won't be formatted either" )
 else:
@ -115,7 +106,7 @@ elif   unformatted:
 
 
 # Regression test for https://github.com/psf/black/issues/3184.
-@@ -61,7 +59,7 @@
+@@ -61,7 +61,7 @@
             elif param[0:4] in ("ZZZZ",):
                 print ( "This won't be formatted either" )
 
@ -124,7 +115,7 @@ elif   unformatted:
 
 
 # Regression test for https://github.com/psf/black/issues/2985.
-@@ -72,10 +70,7 @@
+@@ -72,10 +72,7 @@
 
 
 class Factory(t.Protocol):
@ -136,7 +127,7 @@ elif   unformatted:
 
 
 # Regression test for https://github.com/psf/black/issues/3436.
-@@ -83,5 +78,5 @@
+@@ -83,5 +80,5 @@
     return x
 # fmt: off
 elif   unformatted:
@ -160,6 +151,8 @@ setup(
            ]  # Includes an formatted indentation.
    },
 )
+
+
 # Regression test for https://github.com/psf/black/issues/2015.
 run(
    # fmt: off
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__function_trailing_comma_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__function_trailing_comma_py.snap
@ -188,11 +188,8 @@ some_module.some_function(
 ):
     pass
 
-@@ -100,15 +56,7 @@
- some_module.some_function(
-     argument1, (one_element_tuple,), argument4, argument5, argument6
- )
-
+@@ -103,12 +59,5 @@
+ 
 # Inner trailing comma causes outer to explode
 some_module.some_function(
 -    argument1,
@ -268,6 +265,7 @@ def func() -> ((also_super_long_type_annotation_that_may_cause_an_AST_related_cr
 some_module.some_function(
    argument1, (one_element_tuple,), argument4, argument5, argument6
 )
+
 # Inner trailing comma causes outer to explode
 some_module.some_function(
    argument1, (one, two,), argument4, argument5, argument6
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__import_spacing_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__import_spacing_py.snap
@ -62,7 +62,7 @@ __all__ = (
 ```diff
 --- Black
 +++ Ruff
-@@ -2,12 +2,13 @@
+@@ -2,8 +2,10 @@
 
 # flake8: noqa
 
@ -74,11 +74,7 @@ __all__ = (
     ERROR,
 )
 import sys
-
- # This relies on each of the submodules having an __all__ variable.
- from .base_events import *
- from .coroutines import *
-@@ -22,33 +23,16 @@
+@@ -22,33 +24,16 @@
 from ..streams import *
 
 from some_library import (
@ -134,6 +130,7 @@ from logging import (
    ERROR,
 )
 import sys
+
 # This relies on each of the submodules having an __all__ variable.
 from .base_events import *
 from .coroutines import *
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__one_element_subscript_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__one_element_subscript_py.snap
@ -25,11 +25,9 @@ list_of_types = [tuple[int,],]
 ```diff
 --- Black
 +++ Ruff
-@@ -2,21 +2,9 @@
- # in a single-element subscript.
- a: tuple[int,]
+@@ -4,19 +4,9 @@
 b = tuple[int,]
-
+ 
 # The magic comma still applies to multi-element subscripts.
 -c: tuple[
 -    int,
@ -39,9 +37,9 @@ list_of_types = [tuple[int,],]
 -    int,
 -    int,
 -]
-
 +c: tuple[int, int,]
 +d = tuple[int, int,]
+ 
 # Magic commas still work as expected for non-subscripts.
 -small_list = [
 -    1,
@ -60,9 +58,11 @@ list_of_types = [tuple[int,],]
 # in a single-element subscript.
 a: tuple[int,]
 b = tuple[int,]
+
 # The magic comma still applies to multi-element subscripts.
 c: tuple[int, int,]
 d = tuple[int, int,]
+
 # Magic commas still work as expected for non-subscripts.
 small_list = [1,]
 list_of_types = [tuple[int,],]
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__power_op_spacing_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__power_op_spacing_py.snap
@ -89,15 +89,6 @@ return np.divide(
 
 
 def function_dont_replace_spaces():
-@@ -47,8 +47,6 @@
- o = settings(max_examples=10**6.0)
- p = {(k, k**2): v**2.0 for k, v in pairs}
- q = [10.5**i for i in range(6)]
-
-
- # WE SHOULD DEFINITELY NOT EAT THESE COMMENTS (https://github.com/psf/black/issues/2873)
- if hasattr(view, "sum_of_weights"):
-     return np.divide(  # type: ignore[no-any-return]
 ```

 ## Ruff Output
@ -152,6 +143,8 @@ n = count <= 10**5.0
 o = settings(max_examples=10**6.0)
 p = {(k, k**2): v**2.0 for k, v in pairs}
 q = [10.5**i for i in range(6)]
+
+
 # WE SHOULD DEFINITELY NOT EAT THESE COMMENTS (https://github.com/psf/black/issues/2873)
 if hasattr(view, "sum_of_weights"):
    return np.divide(  # type: ignore[no-any-return]
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__prefer_rhs_split_reformatted_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__prefer_rhs_split_reformatted_py.snap
@ -25,7 +25,7 @@ xxxxxxxxx_yyy_zzzzzzzz[xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxx
 ```diff
 --- Black
 +++ Ruff
-@@ -2,20 +2,10 @@
+@@ -2,20 +2,11 @@
 
 # Left hand side fits in a single line but will still be exploded by the
 # magic trailing comma.
@ -41,7 +41,7 @@ xxxxxxxxx_yyy_zzzzzzzz[xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxx
     arg1,
     arg2,
 )
-
+ 
 # Make when when the left side of assignment plus the opening paren "... = (" is
 # exactly line length limit + 1, it won't be split like that.
 -xxxxxxxxx_yyy_zzzzzzzz[
@ -61,6 +61,7 @@ first_value, (m1, m2,), third_value = xxxxxx_yyyyyy_zzzzzz_wwwwww_uuuuuuu_vvvvvv
    arg1,
    arg2,
 )
+
 # Make when when the left side of assignment plus the opening paren "... = (" is
 # exactly line length limit + 1, it won't be split like that.
 xxxxxxxxx_yyy_zzzzzzzz[xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxxx=1)] = 1
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__remove_for_brackets_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__remove_for_brackets_py.snap
@ -32,17 +32,16 @@ for (((((k, v))))) in d.items():
 ```diff
 --- Black
 +++ Ruff
-@@ -1,27 +1,16 @@
+@@ -1,5 +1,5 @@
 # Only remove tuple brackets after `for`
 -for k, v in d.items():
 +for (k, v) in d.items():
     print(k, v)
-
+ 
 # Don't touch tuple brackets after `in`
- for module in (core, _unicodefun):
-     if hasattr(module, "_verify_python3_env"):
+@@ -8,20 +8,12 @@
         module._verify_python3_env = lambda: None
-
+ 
 # Brackets remain for long for loop lines
 -for (
 -    why_would_anyone_choose_to_name_a_loop_variable_with_a_name_this_long,
@ -59,7 +58,7 @@ for (((((k, v))))) in d.items():
 -):
 +for (k, v) in dfkasdjfldsjflkdsjflkdsjfdslkfjldsjfgkjdshgkljjdsfldgkhsdofudsfudsofajdslkfjdslkfjldisfjdffjsdlkfjdlkjjkdflskadjldkfjsalkfjdasj.items():
     print(k, v)
-
+ 
 # Test deeply nested brackets
 -for k, v in d.items():
 +for (((((k, v))))) in d.items():
@ -72,16 +71,19 @@ for (((((k, v))))) in d.items():
 # Only remove tuple brackets after `for`
 for (k, v) in d.items():
    print(k, v)
+
 # Don't touch tuple brackets after `in`
 for module in (core, _unicodefun):
    if hasattr(module, "_verify_python3_env"):
        module._verify_python3_env = lambda: None
+
 # Brackets remain for long for loop lines
 for (why_would_anyone_choose_to_name_a_loop_variable_with_a_name_this_long, i_dont_know_but_we_should_still_check_the_behaviour_if_they_do) in d.items():
    print(k, v)

 for (k, v) in dfkasdjfldsjflkdsjflkdsjfdslkfjldsjfgkjdshgkljjdsfldgkhsdofudsfudsofajdslkfjdslkfjldisfjdffjsdlkfjdlkjjkdflskadjldkfjsalkfjdasj.items():
    print(k, v)
+
 # Test deeply nested brackets
 for (((((k, v))))) in d.items():
    print(k, v)
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__skip_magic_trailing_comma_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__skip_magic_trailing_comma_py.snap
@ -60,30 +60,28 @@ func(
 ```diff
 --- Black
 +++ Ruff
-@@ -1,25 +1,43 @@
- # We should not remove the trailing comma in a single-element subscript.
- a: tuple[int,]
+@@ -3,23 +3,45 @@
 b = tuple[int,]
-
+ 
 # But commas in multiple element subscripts should be removed.
 -c: tuple[int, int]
 -d = tuple[int, int]
-
 +c: tuple[int, int,]
 +d = tuple[int, int,]
+ 
 # Remove commas for non-subscripts.
 -small_list = [1]
 -list_of_types = [tuple[int,]]
 -small_set = {1}
 -set_of_types = {tuple[int,]}
-
 +small_list = [1,]
 +list_of_types = [tuple[int,],]
 +small_set = {1,}
 +set_of_types = {tuple[int,],}
+ 
 # Except single element tuples
 small_tuple = (1,)
-
+ 
 # Trailing commas in multiple chained non-nested parens.
 -zero(one).two(three).four(five)
 +zero(
@ -126,16 +124,20 @@ func(
 # We should not remove the trailing comma in a single-element subscript.
 a: tuple[int,]
 b = tuple[int,]
+
 # But commas in multiple element subscripts should be removed.
 c: tuple[int, int,]
 d = tuple[int, int,]
+
 # Remove commas for non-subscripts.
 small_list = [1,]
 list_of_types = [tuple[int,],]
 small_set = {1,}
 set_of_types = {tuple[int,],}
+
 # Except single element tuples
 small_tuple = (1,)
+
 # Trailing commas in multiple chained non-nested parens.
 zero(
    one,
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__trailing_commas_in_leading_parts_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__trailing_commas_in_leading_parts_py.snap
@ -46,7 +46,7 @@ assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
 ```diff
 --- Black
 +++ Ruff
-@@ -1,28 +1,10 @@
+@@ -1,28 +1,11 @@
 -zero(
 -    one,
 -).two(
@ -54,15 +54,15 @@ assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
 -).four(
 -    five,
 -)
-
+zero(one,).two(three,).four(five,)
+ 
 -func1(arg1).func2(
 -    arg2,
 -).func3(arg3).func4(
 -    arg4,
 -).func5(arg5)
-+zero(one,).two(three,).four(five,)
- 
 +func1(arg1).func2(arg2,).func3(arg3).func4(arg4,).func5(arg5)
+ 
 # Inner one-element tuple shouldn't explode
 func1(arg1).func2(arg1, (one_tuple,)).func3(arg3)
 
@ -78,14 +78,6 @@ assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
 
 
 # Example from https://github.com/psf/black/issues/3229
-@@ -41,7 +23,6 @@
-     long_module.long_class.long_func().another_func()
-     == long_module.long_class.long_func()["some_key"].another_func(arg1)
- )
-
- # Regression test for https://github.com/psf/black/issues/3414.
- assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
-     xxxxxxxxx
 ```

 ## Ruff Output
@ -94,6 +86,7 @@ assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
 zero(one,).two(three,).four(five,)

 func1(arg1).func2(arg2,).func3(arg3).func4(arg4,).func5(arg5)
+
 # Inner one-element tuple shouldn't explode
 func1(arg1).func2(arg1, (one_tuple,)).func3(arg3)

@ -116,6 +109,7 @@ assert (
    long_module.long_class.long_func().another_func()
    == long_module.long_class.long_func()["some_key"].another_func(arg1)
 )
+
 # Regression test for https://github.com/psf/black/issues/3414.
 assert xxxxxxxxx.xxxxxxxxx.xxxxxxxxx(
    xxxxxxxxx
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__tupleassign_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__tupleassign_py.snap
@ -20,7 +20,7 @@ this_will_be_wrapped_in_parens, = struct.unpack(b"12345678901234567890")
 ```diff
 --- Black
 +++ Ruff
-@@ -1,12 +1,6 @@
+@@ -1,12 +1,7 @@
 # This is a standalone comment.
 -(
 -    sdfjklsdfsjldkflkjsf,
@ -28,8 +28,8 @@ this_will_be_wrapped_in_parens, = struct.unpack(b"12345678901234567890")
 -    sdfsdjfklsdfjlksdljkf,
 -    sdsfsdfjskdflsfsdf,
 -) = (1, 2, 3)
-
 +sdfjklsdfsjldkflkjsf, sdfjsdfjlksdljkfsdlkf, sdfsdjfklsdfjlksdljkf, sdsfsdfjskdflsfsdf = 1, 2, 3
+ 
 # This is as well.
 -(this_will_be_wrapped_in_parens,) = struct.unpack(b"12345678901234567890")
 +this_will_be_wrapped_in_parens, = struct.unpack(b"12345678901234567890")
@ -42,6 +42,7 @@ this_will_be_wrapped_in_parens, = struct.unpack(b"12345678901234567890")
 ```py
 # This is a standalone comment.
 sdfjklsdfsjldkflkjsf, sdfjsdfjlksdljkfsdlkf, sdfsdjfklsdfjlksdljkf, sdsfsdfjskdflsfsdf = 1, 2, 3
+
 # This is as well.
 this_will_be_wrapped_in_parens, = struct.unpack(b"12345678901234567890")

--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsruff_testexpressionbinary_expression_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsruff_testexpressionbinary_expression_py.snap
@ -65,6 +65,8 @@ not (aaaaaaaaaaaaaa + {a for x in bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
    # leading right comment
    b
 )
+
+
 # Black breaks the right side first for the following expressions:
 (
    aaaaaaaaaaaaaa
@ -100,11 +102,14 @@ aaaaaaaaaaaaaa + [
    aaaaaaaaaaaaaa
    + {a for x in bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb}
 )
+
 # Wraps it in parentheses if it needs to break both left and right
 (
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
    + [bbbbbbbbbbbbbbbbbbbbbb, ccccccccccccccccccccc, dddddddddddddddd, eee]
 )  # comment
+
+
 # But only for expressions that have a statement parent.
 (
    not (aaaaaaaaaaaaaa + {a for x in bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb})
@ -112,6 +117,8 @@ aaaaaaaaaaaaaa + [
 [
    a + [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb] in c,
 ]
+
+
 # leading comment
 (
    # comment
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsruff_test__trivia_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsruff_test__trivia_py.snap
@ -0,0 +1,80 @@
+---
+source: crates/ruff_python_formatter/src/lib.rs
+expression: snapshot
+---
+## Input
+```py
+
+# Removes the line above
+
+a = 10  # Keeps the line above
+
+# Separated by one line from `a` and `b`
+
+b = 20
+# Adds two lines after `b`
+class Test:
+    def a(self):
+        pass
+        # trailing comment
+
+# two lines before, one line after
+
+c = 30
+
+while a == 10:
+    ...
+
+    # trailing comment with one line before
+
+# one line before this leading comment
+
+d = 40
+
+while b == 20:
+    ...
+    # no empty line before
+
+e = 50  # one empty line before
+```
+
+
+
+## Output
+```py
+# Removes the line above
+
+a = 10  # Keeps the line above
+
+# Separated by one line from `a` and `b`
+
+b = 20
+
+
+# Adds two lines after `b`
+class Test:
+    def a(self):
+        pass
+
+
+# two lines before, one line after
+
+c = 30
+
+while a == 10:
+    ...
+
+    # trailing comment with one line before
+
+# one line before this leading comment
+
+d = 40
+
+while b == 20:
+    ...
+    # no empty line before
+
+e = 50  # one empty line before
+```
+
+