Emit non-logical newlines for "empty" lines (#4444)

This commit is contained in:
Charlie Marsh 2023-05-16 10:58:56 -04:00 committed by GitHub
parent 8134ec25f0
commit f0465bf106
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 54 additions and 161 deletions

12
Cargo.lock generated
View file

@ -2001,7 +2001,7 @@ dependencies = [
[[package]]
name = "ruff_text_size"
version = "0.0.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"schemars",
"serde",
@ -2072,7 +2072,7 @@ dependencies = [
[[package]]
name = "rustpython-ast"
version = "0.2.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"is-macro",
"num-bigint",
@ -2083,7 +2083,7 @@ dependencies = [
[[package]]
name = "rustpython-format"
version = "0.2.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"bitflags 2.2.1",
"itertools",
@ -2095,7 +2095,7 @@ dependencies = [
[[package]]
name = "rustpython-literal"
version = "0.2.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"hexf-parse",
"lexical-parse-float",
@ -2106,7 +2106,7 @@ dependencies = [
[[package]]
name = "rustpython-parser"
version = "0.2.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"anyhow",
"itertools",
@ -2128,7 +2128,7 @@ dependencies = [
[[package]]
name = "rustpython-parser-core"
version = "0.2.0"
source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d"
source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee"
dependencies = [
"ruff_text_size",
]

View file

@ -31,10 +31,10 @@ proc-macro2 = { version = "1.0.51" }
quote = { version = "1.0.23" }
regex = { version = "1.7.1" }
rustc-hash = { version = "1.1.0" }
ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" }
rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" }
rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
schemars = { version = "0.8.12" }
serde = { version = "1.0.152", features = ["derive"] }
serde_json = { version = "1.0.93", features = ["preserve_order"] }

View file

@ -168,7 +168,8 @@ mod tests {
let contents = r#"
x = 1
y = 2
z = x + 1"#;
z = x + 1"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -189,7 +190,8 @@ x = [
3,
]
y = 2
z = x + 1"#;
z = x + 1"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -216,7 +218,8 @@ z = x + 1"#;
let contents = r#"
def f():
x = 1
f()"#;
f()"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
@ -231,7 +234,8 @@ def f():
"""Docstring goes here."""
# Comment goes here.
x = 1
f()"#;
f()"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)

View file

@ -3,7 +3,7 @@
use std::iter::FusedIterator;
use ruff_text_size::{TextRange, TextSize};
use ruff_text_size::TextSize;
use rustpython_parser::ast::{self, Constant, Expr, Ranged, Stmt, Suite};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
@ -13,24 +13,19 @@ use ruff_python_ast::source_code::Locator;
use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};
/// Extract doc lines (standalone comments) from a token sequence.
pub(crate) fn doc_lines_from_tokens<'a>(
lxr: &'a [LexResult],
locator: &'a Locator<'a>,
) -> DocLines<'a> {
DocLines::new(lxr, locator)
pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines {
DocLines::new(lxr)
}
pub(crate) struct DocLines<'a> {
inner: std::iter::Flatten<core::slice::Iter<'a, LexResult>>,
locator: &'a Locator<'a>,
prev: TextSize,
}
impl<'a> DocLines<'a> {
fn new(lxr: &'a [LexResult], locator: &'a Locator) -> Self {
fn new(lxr: &'a [LexResult]) -> Self {
Self {
inner: lxr.iter().flatten(),
locator,
prev: TextSize::default(),
}
}
@ -46,15 +41,11 @@ impl Iterator for DocLines<'_> {
match tok {
Tok::Comment(..) => {
if at_start_of_line
|| self
.locator
.contains_line_break(TextRange::new(self.prev, range.start()))
{
if at_start_of_line {
break Some(range.start());
}
}
Tok::Newline => {
Tok::Newline | Tok::NonLogicalNewline => {
at_start_of_line = true;
}
Tok::Indent | Tok::Dedent => {

View file

@ -88,7 +88,7 @@ pub fn check_path(
let use_doc_lines = settings.rules.enabled(Rule::DocLineTooLong);
let mut doc_lines = vec![];
if use_doc_lines {
doc_lines.extend(doc_lines_from_tokens(&tokens, locator));
doc_lines.extend(doc_lines_from_tokens(&tokens));
}
// Run the token-based rules.

View file

@ -309,7 +309,8 @@ pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec<Diagnostic
// TD003
let mut has_issue_link = false;
while let Some((token, token_range)) = iter.peek() {
if let Tok::Comment(comment) = token {
match token {
Tok::Comment(comment) => {
if detect_tag(comment, token_range.start()).is_some() {
break;
}
@ -317,10 +318,15 @@ pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec<Diagnostic
has_issue_link = true;
break;
}
} else {
}
Tok::Newline | Tok::NonLogicalNewline => {
continue;
}
_ => {
break;
}
}
}
if !has_issue_link {
diagnostics.push(Diagnostic::new(MissingTodoLink, tag.range));
}

View file

@ -89,8 +89,7 @@ impl<'a> LogicalLines<'a> {
let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
let mut parens: u32 = 0;
let mut iter = tokens.iter().flatten().peekable();
while let Some((token, range)) = iter.next() {
for (token, range) in tokens.iter().flatten() {
let token_kind = TokenKind::from_token(token);
builder.push_token(token_kind, *range);
@ -101,24 +100,6 @@ impl<'a> LogicalLines<'a> {
TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens -= 1;
}
TokenKind::Comment if parens == 0 => {
// If a comment is followed by a newline, ignore it, and we'll build the line
// when we process the newline. Otherwise, we'll end up creating one logical
// line here, and then another, empty logical line when we process the newline.
//
// The lexer will always emit a newline after a comment _unless_ the comment
// appears at the start of a logical line.
if let Some((token, ..)) = iter.peek() {
let token_kind = TokenKind::from_token(token);
if matches!(
token_kind,
TokenKind::Newline | TokenKind::NonLogicalNewline
) {
continue;
}
}
builder.finish_line();
}
TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
builder.finish_line();
}

View file

@ -35,17 +35,17 @@ impl Indexer {
// Get the trivia between the previous and the current token and detect any newlines.
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
// between any two tokens that form a continuation nor multiple newlines in a row.
// That's why we have to extract the newlines "manually".
// between any two tokens that form a continuation. That's why we have to extract the
// newlines "manually".
for (index, text) in trivia.match_indices(['\n', '\r']) {
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
continue;
}
// Newlines after a comment or new-line never form a continuation.
// Newlines after a newline never form a continuation.
if !matches!(
prev_token,
Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None
Some(Tok::Newline | Tok::NonLogicalNewline) | None
) {
continuation_lines.push(line_start);
}

View file

@ -28,7 +28,7 @@ pub fn fmt(contents: &str) -> Result<Formatted<ASTFormatContext>> {
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
// Extract trivia.
let trivia = trivia::extract_trivia_tokens(&tokens, contents);
let trivia = trivia::extract_trivia_tokens(&tokens);
// Parse the AST.
let python_ast = ruff_rustpython::parse_program_tokens(tokens, "<filename>")?;

View file

@ -1,62 +0,0 @@
---
source: crates/ruff_python_formatter/src/lib.rs
expression: snapshot
input_file: crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comment_after_escaped_newline.py
---
## Input
```py
def bob(): \
# pylint: disable=W9016
pass
def bobtwo(): \
\
# some comment here
pass
```
## Black Differences
```diff
--- Black
+++ Ruff
@@ -1,6 +1,8 @@
-def bob(): # pylint: disable=W9016
+def bob():
+ # pylint: disable=W9016
pass
-def bobtwo(): # some comment here
+def bobtwo():
+ # some comment here
pass
```
## Ruff Output
```py
def bob():
# pylint: disable=W9016
pass
def bobtwo():
# some comment here
pass
```
## Black Output
```py
def bob(): # pylint: disable=W9016
pass
def bobtwo(): # some comment here
pass
```

View file

@ -2,7 +2,6 @@ use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashMap;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::ops::Add;
use crate::cst::{
Alias, Arg, Body, BoolOp, CmpOp, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword,
@ -190,49 +189,25 @@ impl Trivia {
}
}
pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec<TriviaToken> {
pub fn extract_trivia_tokens(lxr: &[LexResult]) -> Vec<TriviaToken> {
let mut tokens = vec![];
let mut prev_end = TextSize::default();
let mut prev_tok: Option<(&Tok, TextRange)> = None;
let mut prev_semantic_tok: Option<(&Tok, TextRange)> = None;
let mut parens = vec![];
for (tok, range) in lxr.iter().flatten() {
let after_new_line = matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _)));
// Add empty lines.
let trivia = &text[TextRange::new(prev_end, range.start())];
let bytes = trivia.as_bytes();
let mut bytes_iter = bytes.iter().enumerate();
let mut after_new_line =
matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _)));
while let Some((index, byte)) = bytes_iter.next() {
let len = match byte {
b'\r' if bytes.get(index + 1) == Some(&b'\n') => {
bytes_iter.next();
TextSize::from(2)
}
b'\n' | b'\r' => TextSize::from(1),
_ => {
// Must be whitespace or the parser would generate a token
continue;
}
};
if after_new_line {
let new_line_start = prev_end.add(TextSize::try_from(index).unwrap());
if after_new_line && matches!(tok, Tok::NonLogicalNewline) {
tokens.push(TriviaToken {
range: TextRange::new(new_line_start, new_line_start.add(len)),
range: *range,
kind: TriviaTokenKind::EmptyLine,
});
} else {
after_new_line = true;
}
}
// Add comments.
if let Tok::Comment(_) = tok {
if matches!(tok, Tok::Comment(..)) {
tokens.push(TriviaToken {
range: *range,
// Used to use prev_non-newline_tok
@ -293,8 +268,6 @@ pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec<TriviaToken>
) {
prev_semantic_tok = Some((tok, *range));
}
prev_end = range.end();
}
tokens
}