Emit non-logical newlines for "empty" lines (#4444)

2025-09-12 21:36:47 +00:00 · 2023-05-16 10:58:56 -04:00 · 2023-05-16 10:58:56 -04:00 · f0465bf106
commit f0465bf106
parent 8134ec25f0
11 changed files with 54 additions and 161 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2001,7 +2001,7 @@ dependencies = [
 [[package]]
 name = "ruff_text_size"
 version = "0.0.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "schemars",
 "serde",
@ -2072,7 +2072,7 @@ dependencies = [
 [[package]]
 name = "rustpython-ast"
 version = "0.2.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "is-macro",
 "num-bigint",
@ -2083,7 +2083,7 @@ dependencies = [
 [[package]]
 name = "rustpython-format"
 version = "0.2.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "bitflags 2.2.1",
 "itertools",
@ -2095,7 +2095,7 @@ dependencies = [
 [[package]]
 name = "rustpython-literal"
 version = "0.2.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "hexf-parse",
 "lexical-parse-float",
@ -2106,7 +2106,7 @@ dependencies = [
 [[package]]
 name = "rustpython-parser"
 version = "0.2.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "anyhow",
 "itertools",
@ -2128,7 +2128,7 @@ dependencies = [
 [[package]]
 name = "rustpython-parser-core"
 version = "0.2.0"
-source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
+source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
 dependencies = [
 "ruff_text_size",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -31,10 +31,10 @@ proc-macro2 = { version = "1.0.51" }
 quote = { version = "1.0.23" }
 regex = { version = "1.7.1" }
 rustc-hash = { version = "1.1.0" }
-ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
-rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
-rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
-rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
+ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
+rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
+rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
+rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
 schemars = { version = "0.8.12" }
 serde = { version = "1.0.152", features = ["derive"] }
 serde_json = { version = "1.0.93", features = ["preserve_order"] }
--- a/crates/ruff/src/checkers/logical_lines.rs
+++ b/crates/ruff/src/checkers/logical_lines.rs
@ -168,7 +168,8 @@ mod tests {
        let contents = r#"
 x = 1
 y = 2
-z = x + 1"#;
+z = x + 1"#
+            .trim();
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -189,7 +190,8 @@ x = [
  3,
 ]
 y = 2
-z = x + 1"#;
+z = x + 1"#
+            .trim();
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -216,7 +218,8 @@ z = x + 1"#;
        let contents = r#"
 def f():
  x = 1
-f()"#;
+f()"#
+            .trim();
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -231,7 +234,8 @@ def f():
  """Docstring goes here."""
  # Comment goes here.
  x = 1
-f()"#;
+f()"#
+            .trim();
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
--- a/crates/ruff/src/doc_lines.rs
+++ b/crates/ruff/src/doc_lines.rs
@ -3,7 +3,7 @@

 use std::iter::FusedIterator;

-use ruff_text_size::{TextRange, TextSize};
+use ruff_text_size::TextSize;
 use rustpython_parser::ast::{self, Constant, Expr, Ranged, Stmt, Suite};
 use rustpython_parser::lexer::LexResult;
 use rustpython_parser::Tok;
@ -13,24 +13,19 @@ use ruff_python_ast::source_code::Locator;
 use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};

 /// Extract doc lines (standalone comments) from a token sequence.
-pub(crate) fn doc_lines_from_tokens<'a>(
-    lxr: &'a [LexResult],
-    locator: &'a Locator<'a>,
-) -> DocLines<'a> {
-    DocLines::new(lxr, locator)
+pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines {
+    DocLines::new(lxr)
 }

 pub(crate) struct DocLines<'a> {
    inner: std::iter::Flatten<core::slice::Iter<'a, LexResult>>,
-    locator: &'a Locator<'a>,
    prev: TextSize,
 }

 impl<'a> DocLines<'a> {
-    fn new(lxr: &'a [LexResult], locator: &'a Locator) -> Self {
+    fn new(lxr: &'a [LexResult]) -> Self {
        Self {
            inner: lxr.iter().flatten(),
-            locator,
            prev: TextSize::default(),
        }
    }
@ -46,15 +41,11 @@ impl Iterator for DocLines<'_> {

            match tok {
                Tok::Comment(..) => {
-                    if at_start_of_line
-                        || self
-                            .locator
-                            .contains_line_break(TextRange::new(self.prev, range.start()))
-                    {
+                    if at_start_of_line {
                        break Some(range.start());
                    }
                }
-                Tok::Newline => {
+                Tok::Newline | Tok::NonLogicalNewline => {
                    at_start_of_line = true;
                }
                Tok::Indent | Tok::Dedent => {
--- a/crates/ruff/src/linter.rs
+++ b/crates/ruff/src/linter.rs
@ -88,7 +88,7 @@ pub fn check_path(
    let use_doc_lines = settings.rules.enabled(Rule::DocLineTooLong);
    let mut doc_lines = vec![];
    if use_doc_lines {
-        doc_lines.extend(doc_lines_from_tokens(&tokens, locator));
+        doc_lines.extend(doc_lines_from_tokens(&tokens));
    }

    // Run the token-based rules.
--- a/crates/ruff/src/rules/flake8_todos/rules.rs
+++ b/crates/ruff/src/rules/flake8_todos/rules.rs
@ -309,7 +309,8 @@ pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec<Diagnostic
        // TD003
        let mut has_issue_link = false;
        while let Some((token, token_range)) = iter.peek() {
-            if let Tok::Comment(comment) = token {
+            match token {
+                Tok::Comment(comment) => {
                    if detect_tag(comment, token_range.start()).is_some() {
                        break;
                    }
@ -317,10 +318,15 @@ pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec<Diagnostic
                        has_issue_link = true;
                        break;
                    }
-            } else {
+                }
+                Tok::Newline | Tok::NonLogicalNewline => {
+                    continue;
+                }
+                _ => {
                    break;
                }
            }
+        }
        if !has_issue_link {
            diagnostics.push(Diagnostic::new(MissingTodoLink, tag.range));
        }
--- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs
@ -89,8 +89,7 @@ impl<'a> LogicalLines<'a> {
        let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
        let mut parens: u32 = 0;

-        let mut iter = tokens.iter().flatten().peekable();
-        while let Some((token, range)) = iter.next() {
+        for (token, range) in tokens.iter().flatten() {
            let token_kind = TokenKind::from_token(token);
            builder.push_token(token_kind, *range);

@ -101,24 +100,6 @@ impl<'a> LogicalLines<'a> {
                TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
                    parens -= 1;
                }
-                TokenKind::Comment if parens == 0 => {
-                    // If a comment is followed by a newline, ignore it, and we'll build the line
-                    // when we process the newline. Otherwise, we'll end up creating one logical
-                    // line here, and then another, empty logical line when we process the newline.
-                    //
-                    // The lexer will always emit a newline after a comment _unless_ the comment
-                    // appears at the start of a logical line.
-                    if let Some((token, ..)) = iter.peek() {
-                        let token_kind = TokenKind::from_token(token);
-                        if matches!(
-                            token_kind,
-                            TokenKind::Newline | TokenKind::NonLogicalNewline
-                        ) {
-                            continue;
-                        }
-                    }
-                    builder.finish_line();
-                }
                TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
                    builder.finish_line();
                }
--- a/crates/ruff_python_ast/src/source_code/indexer.rs
+++ b/crates/ruff_python_ast/src/source_code/indexer.rs
@ -35,17 +35,17 @@ impl Indexer {

            // Get the trivia between the previous and the current token and detect any newlines.
            // This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
-            // between any two tokens that form a continuation nor multiple newlines in a row.
-            // That's why we have to extract the newlines "manually".
+            // between any two tokens that form a continuation. That's why we have to extract the
+            // newlines "manually".
            for (index, text) in trivia.match_indices(['\n', '\r']) {
                if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
                    continue;
                }

-                // Newlines after a comment or new-line never form a continuation.
+                // Newlines after a newline never form a continuation.
                if !matches!(
                    prev_token,
-                    Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None
+                    Some(Tok::Newline | Tok::NonLogicalNewline) | None
                ) {
                    continuation_lines.push(line_start);
                }
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@ -28,7 +28,7 @@ pub fn fmt(contents: &str) -> Result<Formatted<ASTFormatContext>> {
    let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);

    // Extract trivia.
-    let trivia = trivia::extract_trivia_tokens(&tokens, contents);
+    let trivia = trivia::extract_trivia_tokens(&tokens);

    // Parse the AST.
    let python_ast = ruff_rustpython::parse_program_tokens(tokens, "<filename>")?;
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__comment_after_escaped_newline_py.snap
+++ b/crates/ruff_python_formatter/src/snapshots/ruff_python_formattertestsblack_test__comment_after_escaped_newline_py.snap
@ -1,62 +0,0 @@
---
-source: crates/ruff_python_formatter/src/lib.rs
-expression: snapshot
-input_file: crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comment_after_escaped_newline.py
---
-## Input
-
-```py
-def bob(): \
-         # pylint: disable=W9016
-    pass
-
-
-def bobtwo(): \
-    \
-  # some comment here
-    pass
-```
-
-## Black Differences
-
-```diff
--- Black
-+++ Ruff
-@@ -1,6 +1,8 @@
-def bob():  # pylint: disable=W9016
-+def bob():
-+    # pylint: disable=W9016
-     pass
- 
- 
-def bobtwo():  # some comment here
-+def bobtwo():
-+    # some comment here
-     pass
-```
-
-## Ruff Output
-
-```py
-def bob():
-    # pylint: disable=W9016
-    pass
-
-
-def bobtwo():
-    # some comment here
-    pass
-```
-
-## Black Output
-
-```py
-def bob():  # pylint: disable=W9016
-    pass
-
-
-def bobtwo():  # some comment here
-    pass
-```
-
-
--- a/crates/ruff_python_formatter/src/trivia.rs
+++ b/crates/ruff_python_formatter/src/trivia.rs
@ -2,7 +2,6 @@ use ruff_text_size::{TextRange, TextSize};
 use rustc_hash::FxHashMap;
 use rustpython_parser::lexer::LexResult;
 use rustpython_parser::Tok;
-use std::ops::Add;

 use crate::cst::{
    Alias, Arg, Body, BoolOp, CmpOp, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword,
@ -190,49 +189,25 @@ impl Trivia {
    }
 }

-pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec<TriviaToken> {
+pub fn extract_trivia_tokens(lxr: &[LexResult]) -> Vec<TriviaToken> {
    let mut tokens = vec![];
-    let mut prev_end = TextSize::default();
    let mut prev_tok: Option<(&Tok, TextRange)> = None;
    let mut prev_semantic_tok: Option<(&Tok, TextRange)> = None;
    let mut parens = vec![];

    for (tok, range) in lxr.iter().flatten() {
+        let after_new_line = matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _)));
+
        // Add empty lines.
-        let trivia = &text[TextRange::new(prev_end, range.start())];
-        let bytes = trivia.as_bytes();
-
-        let mut bytes_iter = bytes.iter().enumerate();
-
-        let mut after_new_line =
-            matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _)));
-
-        while let Some((index, byte)) = bytes_iter.next() {
-            let len = match byte {
-                b'\r' if bytes.get(index + 1) == Some(&b'\n') => {
-                    bytes_iter.next();
-                    TextSize::from(2)
-                }
-                b'\n' | b'\r' => TextSize::from(1),
-                _ => {
-                    // Must be whitespace or the parser would generate a token
-                    continue;
-                }
-            };
-
-            if after_new_line {
-                let new_line_start = prev_end.add(TextSize::try_from(index).unwrap());
+        if after_new_line && matches!(tok, Tok::NonLogicalNewline) {
            tokens.push(TriviaToken {
-                    range: TextRange::new(new_line_start, new_line_start.add(len)),
+                range: *range,
                kind: TriviaTokenKind::EmptyLine,
            });
-            } else {
-                after_new_line = true;
-            }
        }

        // Add comments.
-        if let Tok::Comment(_) = tok {
+        if matches!(tok, Tok::Comment(..)) {
            tokens.push(TriviaToken {
                range: *range,
                // Used to use prev_non-newline_tok
@ -293,8 +268,6 @@ pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec<TriviaToken>
        ) {
            prev_semantic_tok = Some((tok, *range));
        }
-
-        prev_end = range.end();
    }
    tokens
 }