Refactor StmtIf: Formatter and Linter (#5459)

## Summary

Previously, `StmtIf` was defined recursively as
```rust
pub struct StmtIf {
    pub range: TextRange,
    pub test: Box<Expr>,
    pub body: Vec<Stmt>,
    pub orelse: Vec<Stmt>,
}
```
Every `elif` was represented as an `orelse` with a single `StmtIf`. This
means that this representation couldn't differentiate between
```python
if cond1:
    x = 1
else:
    if cond2:
        x = 2
```
and 
```python
if cond1:
    x = 1
elif cond2:
    x = 2
```
It also makes many checks harder than they need to be because we have to
recurse just to iterate over an entire if-elif-else and because we're
lacking nodes and ranges on the `elif` and `else` branches.

We change the representation to a flat

```rust
pub struct StmtIf {
    pub range: TextRange,
    pub test: Box<Expr>,
    pub body: Vec<Stmt>,
    pub elif_else_clauses: Vec<ElifElseClause>,
}

pub struct ElifElseClause {
    pub range: TextRange,
    pub test: Option<Expr>,
    pub body: Vec<Stmt>,
}
```
where `test: Some(_)` represents an `elif` and `test: None` an else.

This representation is different tradeoff, e.g. we need to allocate the
`Vec<ElifElseClause>`, the `elif`s are now different than the `if`s
(which matters in rules where want to check both `if`s and `elif`s) and
the type system doesn't guarantee that the `test: None` else is actually
last. We're also now a bit more inconsistent since all other `else`,
those from `for`, `while` and `try`, still don't have nodes. With the
new representation some things became easier, e.g. finding the `elif`
token (we can use the start of the `ElifElseClause`) and formatting
comments for if-elif-else (no more dangling comments splitting, we only
have to insert the dangling comment after the colon manually and set
`leading_alternate_branch_comments`, everything else is taken of by
having nodes for each branch and the usual placement.rs fixups).

## Merge Plan

This PR requires coordination between the parser repo and the main ruff
repo. I've split the ruff part, into two stacked PRs which have to be
merged together (only the second one fixes all tests), the first for the
formatter to be reviewed by @michareiser and the second for the linter
to be reviewed by @charliermarsh.

* MH: Review and merge
https://github.com/astral-sh/RustPython-Parser/pull/20
* MH: Review and merge or move later in stack
https://github.com/astral-sh/RustPython-Parser/pull/21
* MH: Review and approve
https://github.com/astral-sh/RustPython-Parser/pull/22
* MH: Review and approve formatter PR
https://github.com/astral-sh/ruff/pull/5459
* CM: Review and approve linter PR
https://github.com/astral-sh/ruff/pull/5460
* Merge linter PR in formatter PR, fix ecosystem checks (ecosystem
checks can't run on the formatter PR and won't run on the linter PR, so
we need to merge them first)
 * Merge https://github.com/astral-sh/RustPython-Parser/pull/22
 * Create tag in the parser, update linter+formatter PR
 * Merge linter+formatter PR https://github.com/astral-sh/ruff/pull/5459

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
konsti 2023-07-18 13:40:15 +02:00 committed by GitHub
parent 167b9356fa
commit 730e6b2b4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
82 changed files with 2333 additions and 2009 deletions

View file

@ -437,9 +437,19 @@ where
Stmt::If(ast::StmtIf {
test,
body,
orelse,
elif_else_clauses,
range: _range,
}) => any_over_expr(test, func) || any_over_body(body, func) || any_over_body(orelse, func),
}) => {
any_over_expr(test, func)
|| any_over_body(body, func)
|| elif_else_clauses.iter().any(|clause| {
clause
.test
.as_ref()
.map_or(false, |test| any_over_expr(test, func))
|| any_over_body(&clause.body, func)
})
}
Stmt::With(ast::StmtWith { items, body, .. })
| Stmt::AsyncWith(ast::StmtAsyncWith { items, body, .. }) => {
items.iter().any(|with_item| {
@ -529,6 +539,7 @@ where
range: _range,
}) => any_over_expr(value, func),
Stmt::Pass(_) | Stmt::Break(_) | Stmt::Continue(_) => false,
Stmt::TypeAlias(_) => todo!(),
}
}
@ -944,9 +955,15 @@ where
| Stmt::AsyncFunctionDef(_)
| Stmt::Try(_)
| Stmt::TryStar(_) => {}
Stmt::If(ast::StmtIf { body, orelse, .. }) => {
Stmt::If(ast::StmtIf {
body,
elif_else_clauses,
..
}) => {
walk_body(self, body);
walk_body(self, orelse);
for clause in elif_else_clauses {
self.visit_elif_else_clause(clause);
}
}
Stmt::While(ast::StmtWhile { body, .. })
| Stmt::With(ast::StmtWith { body, .. })
@ -1063,25 +1080,6 @@ pub fn first_colon_range(range: TextRange, locator: &Locator) -> Option<TextRang
range
}
/// Return the `Range` of the first `Elif` or `Else` token in an `If` statement.
pub fn elif_else_range(stmt: &ast::StmtIf, locator: &Locator) -> Option<TextRange> {
let ast::StmtIf { body, orelse, .. } = stmt;
let start = body.last().expect("Expected body to be non-empty").end();
let end = match &orelse[..] {
[Stmt::If(ast::StmtIf { test, .. })] => test.start(),
[stmt, ..] => stmt.start(),
_ => return None,
};
let contents = &locator.contents()[TextRange::new(start, end)];
lexer::lex_starts_at(contents, Mode::Module, start)
.flatten()
.find(|(kind, _)| matches!(kind, Tok::Elif | Tok::Else))
.map(|(_, range)| range)
}
/// Given an offset at the end of a line (including newlines), return the offset of the
/// continuation at the end of that line.
fn find_continuation(offset: TextSize, locator: &Locator, indexer: &Indexer) -> Option<TextSize> {
@ -1568,13 +1566,13 @@ mod tests {
use anyhow::Result;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_ast::{CmpOp, Expr, Ranged, Stmt};
use rustpython_ast::{CmpOp, Expr, Ranged};
use rustpython_parser::ast::Suite;
use rustpython_parser::Parse;
use crate::helpers::{
elif_else_range, first_colon_range, has_trailing_content, locate_cmp_ops,
resolve_imported_module_path, LocatedCmpOp,
first_colon_range, has_trailing_content, locate_cmp_ops, resolve_imported_module_path,
LocatedCmpOp,
};
use crate::source_code::Locator;
@ -1667,35 +1665,6 @@ y = 2
assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
}
#[test]
fn extract_elif_else_range() -> Result<()> {
let contents = "if a:
...
elif b:
...
";
let stmt = Stmt::parse(contents, "<filename>")?;
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(stmt, &locator).unwrap();
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));
let contents = "if a:
...
else:
...
";
let stmt = Stmt::parse(contents, "<filename>")?;
let stmt = Stmt::as_if_stmt(&stmt).unwrap();
let locator = Locator::new(contents);
let range = elif_else_range(stmt, &locator).unwrap();
assert_eq!(range.start(), TextSize::from(14));
assert_eq!(range.end(), TextSize::from(18));
Ok(())
}
#[test]
fn extract_cmp_op_location() -> Result<()> {
let contents = "x == 1";