Format Slice Expressions (#5047)

This formats slice expressions and subscript expressions.

Spaces around the colons follows the same rules as black
(https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices):
```python
e00 = "e"[:]
e01 = "e"[:1]
e02 = "e"[: a()]
e10 = "e"[1:]
e11 = "e"[1:1]
e12 = "e"[1 : a()]
e20 = "e"[a() :]
e21 = "e"[a() : 1]
e22 = "e"[a() : a()]
e200 = "e"[a() : :]
e201 = "e"[a() :: 1]
e202 = "e"[a() :: a()]
e210 = "e"[a() : 1 :]
```

Comment placement is different due to our very different infrastructure.
If we have explicit bounds (e.g. `x[1:2]`) all comments get assigned as
leading or trailing to the bound expression. If a bound is missing
`[:]`, comments get marked as dangling and placed in the same section as
they were originally in:
```python
x = "x"[ # a
      # b
    :  # c
      # d
]
```
to
```python
x = "x"[
    # a
    # b
    :
    # c
    # d
]
```
Except for the potential trailing end-of-line comments, all comments get
formatted on their own line. This can be improved by keeping end-of-line
comments after the opening bracket or after a colon as such but the
changes were already complex enough.

I added tests for comment placement and spaces.
This commit is contained in:
konstin 2023-06-21 17:09:39 +02:00 committed by GitHub
parent 4634560c80
commit 6155fd647d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1065 additions and 430 deletions

View file

@ -1,26 +1,264 @@
use crate::comments::{dangling_comments, Comments, SourceComment};
use crate::expression::parentheses::{
default_expression_needs_parentheses, NeedsParentheses, Parentheses, Parenthesize,
};
use crate::{not_yet_implemented_custom_text, FormatNodeRule, PyFormatter};
use crate::comments::Comments;
use ruff_formatter::{write, Buffer, FormatResult};
use crate::trivia::Token;
use crate::trivia::{first_non_trivia_token, TokenKind};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
use ruff_python_ast::node::AstNode;
use ruff_python_ast::prelude::{Expr, Ranged};
use ruff_text_size::TextRange;
use rustpython_parser::ast::ExprSlice;
#[derive(Default)]
pub struct FormatExprSlice;
impl FormatNodeRule<ExprSlice> for FormatExprSlice {
fn fmt_fields(&self, _item: &ExprSlice, f: &mut PyFormatter) -> FormatResult<()> {
write!(
f,
[not_yet_implemented_custom_text(
"NOT_IMPLEMENTED_start:NOT_IMPLEMENTED_end"
)]
)
/// This implementation deviates from black in that comments are attached to the section of the
/// slice they originate in
fn fmt_fields(&self, item: &ExprSlice, f: &mut PyFormatter) -> FormatResult<()> {
// `[lower:upper:step]`
let ExprSlice {
range,
lower,
upper,
step,
} = item;
let (first_colon, second_colon) =
find_colons(f.context().contents(), *range, lower, upper)?;
// Handle comment placement
// In placements.rs, we marked comment for None nodes a dangling and associated all others
// as leading or dangling wrt to a node. That means we either format a node and only have
// to handle newlines and spacing, or the node is None and we insert the corresponding
// slice of dangling comments
let comments = f.context().comments().clone();
let slice_dangling_comments = comments.dangling_comments(item.as_any_node_ref());
// Put the dangling comments (where the nodes are missing) into buckets
let first_colon_partition_index = slice_dangling_comments
.partition_point(|x| x.slice().start() < first_colon.range.start());
let (dangling_lower_comments, dangling_upper_step_comments) =
slice_dangling_comments.split_at(first_colon_partition_index);
let (dangling_upper_comments, dangling_step_comments) =
if let Some(second_colon) = &second_colon {
let second_colon_partition_index = dangling_upper_step_comments
.partition_point(|x| x.slice().start() < second_colon.range.start());
dangling_upper_step_comments.split_at(second_colon_partition_index)
} else {
// Without a second colon they remaining dangling comments belong between the first
// colon and the closing parentheses
(dangling_upper_step_comments, [].as_slice())
};
// Ensure there a no dangling comments for a node if the node is present
debug_assert!(lower.is_none() || dangling_lower_comments.is_empty());
debug_assert!(upper.is_none() || dangling_upper_comments.is_empty());
debug_assert!(step.is_none() || dangling_step_comments.is_empty());
// Handle spacing around the colon(s)
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
let lower_simple = lower.as_ref().map_or(true, |expr| is_simple_expr(expr));
let upper_simple = upper.as_ref().map_or(true, |expr| is_simple_expr(expr));
let step_simple = step.as_ref().map_or(true, |expr| is_simple_expr(expr));
let all_simple = lower_simple && upper_simple && step_simple;
// lower
if let Some(lower) = lower {
write!(f, [lower.format(), line_suffix_boundary()])?;
} else {
dangling_comments(dangling_lower_comments).fmt(f)?;
}
// First colon
// The spacing after the colon depends on both the lhs and the rhs:
// ```
// e00 = x[:]
// e01 = x[:1]
// e02 = x[: a()]
// e10 = x[1:]
// e11 = x[1:1]
// e12 = x[1 : a()]
// e20 = x[a() :]
// e21 = x[a() : 1]
// e22 = x[a() : a()]
// e200 = "e"[a() : :]
// e201 = "e"[a() :: 1]
// e202 = "e"[a() :: a()]
// ```
if !all_simple {
space().fmt(f)?;
}
text(":").fmt(f)?;
// No upper node, no need for a space, e.g. `x[a() :]`
if !all_simple && upper.is_some() {
space().fmt(f)?;
}
// Upper
if let Some(upper) = upper {
let upper_leading_comments = comments.leading_comments(upper.as_ref());
leading_comments_spacing(f, upper_leading_comments)?;
write!(f, [upper.format(), line_suffix_boundary()])?;
} else {
if let Some(first) = dangling_upper_comments.first() {
// Here the spacing for end-of-line comments works but own line comments need
// explicit spacing
if first.line_position().is_own_line() {
hard_line_break().fmt(f)?;
}
}
dangling_comments(dangling_upper_comments).fmt(f)?;
}
// (optionally) step
if second_colon.is_some() {
// Same spacing rules as for the first colon, except for the strange case when the
// second colon exists, but neither upper nor step
// ```
// e200 = "e"[a() : :]
// e201 = "e"[a() :: 1]
// e202 = "e"[a() :: a()]
// ```
if !all_simple && (upper.is_some() || step.is_none()) {
space().fmt(f)?;
}
text(":").fmt(f)?;
// No step node, no need for a space
if !all_simple && step.is_some() {
space().fmt(f)?;
}
if let Some(step) = step {
let step_leading_comments = comments.leading_comments(step.as_ref());
leading_comments_spacing(f, step_leading_comments)?;
step.format().fmt(f)?;
} else {
if !dangling_step_comments.is_empty() {
// Put the colon and comments on their own lines
write!(
f,
[hard_line_break(), dangling_comments(dangling_step_comments)]
)?;
}
}
} else {
debug_assert!(step.is_none(), "step can't exist without a second colon");
}
Ok(())
}
}
/// We're in a slice, so we know there's a first colon, but with have to look into the source
/// to find out whether there is a second one, too, e.g. `[1:2]` and `[1:10:2]`.
///
/// Returns the first and optionally the second colon.
pub(crate) fn find_colons(
contents: &str,
range: TextRange,
lower: &Option<Box<Expr>>,
upper: &Option<Box<Expr>>,
) -> FormatResult<(Token, Option<Token>)> {
let after_lower = lower
.as_ref()
.map_or(range.start(), |lower| lower.range().end());
let first_colon =
first_non_trivia_token(after_lower, contents).ok_or(FormatError::SyntaxError)?;
if first_colon.kind != TokenKind::Colon {
return Err(FormatError::SyntaxError);
}
let after_upper = upper
.as_ref()
.map_or(first_colon.end(), |upper| upper.range().end());
// At least the closing bracket must exist, so there must be a token there
let next_token =
first_non_trivia_token(after_upper, contents).ok_or(FormatError::SyntaxError)?;
let second_colon = if next_token.kind == TokenKind::Colon {
debug_assert!(
next_token.range.start() < range.end(),
"The next token in a slice must either be a colon or the closing bracket"
);
Some(next_token)
} else {
None
};
Ok((first_colon, second_colon))
}
/// Determines whether this expression needs a space around the colon
/// <https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices>
fn is_simple_expr(expr: &Expr) -> bool {
matches!(expr, Expr::Constant(_) | Expr::Name(_))
}
pub(crate) enum ExprSliceCommentSection {
Lower,
Upper,
Step,
}
/// Assigns a comment to lower/upper/step in `[lower:upper:step]`.
///
/// ```python
/// "sliceable"[
/// # lower comment
/// :
/// # upper comment
/// :
/// # step comment
/// ]
/// ```
pub(crate) fn assign_comment_in_slice(
comment: TextRange,
contents: &str,
expr_slice: &ExprSlice,
) -> ExprSliceCommentSection {
let ExprSlice {
range,
lower,
upper,
step: _,
} = expr_slice;
let (first_colon, second_colon) = find_colons(contents, *range, lower, upper)
.expect("SyntaxError when trying to parse slice");
if comment.start() < first_colon.range.start() {
ExprSliceCommentSection::Lower
} else {
// We are to the right of the first colon
if let Some(second_colon) = second_colon {
if comment.start() < second_colon.range.start() {
ExprSliceCommentSection::Upper
} else {
ExprSliceCommentSection::Step
}
} else {
// No second colon means there is no step
ExprSliceCommentSection::Upper
}
}
}
/// Manual spacing for the leading comments of upper and step
fn leading_comments_spacing(
f: &mut PyFormatter,
leading_comments: &[SourceComment],
) -> FormatResult<()> {
if let Some(first) = leading_comments.first() {
if first.line_position().is_own_line() {
// Insert a newline after the colon so the comment ends up on its own line
hard_line_break().fmt(f)?;
} else {
// Insert the two spaces between the colon and the end-of-line comment after the colon
write!(f, [space(), space()])?;
}
}
Ok(())
}
impl NeedsParentheses for ExprSlice {
fn needs_parentheses(
&self,

View file

@ -1,24 +1,52 @@
use crate::comments::{trailing_comments, Comments};
use crate::expression::parentheses::{
default_expression_needs_parentheses, NeedsParentheses, Parentheses, Parenthesize,
};
use crate::{not_yet_implemented_custom_text, FormatNodeRule, PyFormatter};
use crate::comments::Comments;
use ruff_formatter::{write, Buffer, FormatResult};
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{group, soft_block_indent, text};
use ruff_formatter::{format_args, write, Buffer, FormatResult};
use ruff_python_ast::node::AstNode;
use rustpython_parser::ast::ExprSubscript;
#[derive(Default)]
pub struct FormatExprSubscript;
impl FormatNodeRule<ExprSubscript> for FormatExprSubscript {
fn fmt_fields(&self, _item: &ExprSubscript, f: &mut PyFormatter) -> FormatResult<()> {
fn fmt_fields(&self, item: &ExprSubscript, f: &mut PyFormatter) -> FormatResult<()> {
let ExprSubscript {
range: _,
value,
slice,
ctx: _,
} = item;
let comments = f.context().comments().clone();
let dangling_comments = comments.dangling_comments(item.as_any_node_ref());
debug_assert!(
dangling_comments.len() <= 1,
"The subscript expression must have at most a single comment, the one after the bracket"
);
write!(
f,
[not_yet_implemented_custom_text(
"NOT_IMPLEMENTED_value[NOT_IMPLEMENTED_key]"
)]
[group(&format_args![
value.format(),
text("["),
trailing_comments(dangling_comments),
soft_block_indent(&slice.format()),
text("]")
])]
)
}
fn fmt_dangling_comments(
&self,
_node: &ExprSubscript,
_f: &mut PyFormatter,
) -> FormatResult<()> {
// Handled inside of `fmt_fields`
Ok(())
}
}
impl NeedsParentheses for ExprSubscript {
@ -28,6 +56,9 @@ impl NeedsParentheses for ExprSubscript {
source: &str,
comments: &Comments,
) -> Parentheses {
default_expression_needs_parentheses(self.into(), parenthesize, source, comments)
match default_expression_needs_parentheses(self.into(), parenthesize, source, comments) {
Parentheses::Optional => Parentheses::Never,
parentheses => parentheses,
}
}
}

View file

@ -2,10 +2,10 @@ use crate::comments::{trailing_comments, Comments};
use crate::expression::parentheses::{
default_expression_needs_parentheses, NeedsParentheses, Parentheses, Parenthesize,
};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule;
use ruff_formatter::FormatContext;
use crate::{AsFormat, FormatNodeRule, PyFormatter};
use ruff_formatter::prelude::{hard_line_break, space, text};
use ruff_formatter::{Format, FormatContext, FormatResult};
use ruff_python_ast::prelude::UnaryOp;
use ruff_text_size::{TextLen, TextRange};
use rustpython_parser::ast::{ExprUnaryOp, Ranged};