Introduce AST nodes for PatternMatchClass arguments (#6881)

## Summary

This PR introduces two new AST nodes to improve the representation of
`PatternMatchClass`. As a reminder, `PatternMatchClass` looks like this:

```python
case Point2D(0, 0, x=1, y=2):
  ...
```

Historically, this was represented as a vector of patterns (for the `0,
0` portion) and parallel vectors of keyword names (for `x` and `y`) and
values (for `1` and `2`). This introduces a bunch of challenges for the
formatter, but importantly, it's also really different from how we
represent similar nodes, like arguments (`func(0, 0, x=1, y=2)`) or
parameters (`def func(x, y)`).

So, firstly, we now use a single node (`PatternArguments`) for the
entire parenthesized region, making it much more consistent with our
other nodes. So, above, `PatternArguments` would be `(0, 0, x=1, y=2)`.

Secondly, we now have a `PatternKeyword` node for `x=1` and `y=2`. This
is much more similar to the how `Keyword` is represented within
`Arguments` for call expressions.

Closes https://github.com/astral-sh/ruff/issues/6866.

Closes https://github.com/astral-sh/ruff/issues/6880.
This commit is contained in:
Charlie Marsh 2023-08-26 10:45:44 -04:00 committed by GitHub
parent ed1b4122d0
commit 15b73bdb8a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 25299 additions and 25824 deletions

View file

@ -8,6 +8,8 @@ use crate::expression::parentheses::{
};
use crate::prelude::*;
pub(crate) mod pattern_arguments;
pub(crate) mod pattern_keyword;
pub(crate) mod pattern_match_as;
pub(crate) mod pattern_match_class;
pub(crate) mod pattern_match_mapping;

View file

@ -0,0 +1,110 @@
use ruff_formatter::write;
use ruff_python_ast::node::AstNode;
use ruff_python_ast::{Pattern, PatternArguments, Ranged};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{TextRange, TextSize};
use crate::comments::SourceComment;
use crate::expression::parentheses::{empty_parenthesized, parenthesized, Parentheses};
use crate::prelude::*;
#[derive(Default)]
pub struct FormatPatternArguments;
impl FormatNodeRule<PatternArguments> for FormatPatternArguments {
fn fmt_fields(&self, item: &PatternArguments, f: &mut PyFormatter) -> FormatResult<()> {
// If there are no arguments, all comments are dangling:
// ```python
// case Point2D( # dangling
// # dangling
// )
// ```
if item.patterns.is_empty() && item.keywords.is_empty() {
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
return write!(f, [empty_parenthesized("(", dangling, ")")]);
}
let all_arguments = format_with(|f: &mut PyFormatter| {
let source = f.context().source();
let mut joiner = f.join_comma_separated(item.end());
match item.patterns.as_slice() {
[pattern] if item.keywords.is_empty() => {
let parentheses =
if is_single_argument_parenthesized(pattern, item.end(), source) {
Parentheses::Always
} else {
// Note: no need to handle opening-parenthesis comments, since
// an opening-parenthesis comment implies that the argument is
// parenthesized.
Parentheses::Never
};
joiner.entry(pattern, &pattern.format().with_options(parentheses));
}
patterns => {
joiner
.entries(patterns.iter().map(|pattern| {
(
pattern,
pattern.format().with_options(Parentheses::Preserve),
)
}))
.nodes(item.keywords.iter());
}
}
joiner.finish()
});
// If the arguments are non-empty, then a dangling comment indicates a comment on the
// same line as the opening parenthesis, e.g.:
// ```python
// case Point2D( # dangling
// ...
// )
// ```
let comments = f.context().comments().clone();
let dangling_comments = comments.dangling(item.as_any_node_ref());
write!(
f,
[parenthesized("(", &group(&all_arguments), ")")
.with_dangling_comments(dangling_comments)]
)
}
fn fmt_dangling_comments(
&self,
_dangling_comments: &[SourceComment],
_f: &mut PyFormatter,
) -> FormatResult<()> {
Ok(())
}
}
/// Returns `true` if the pattern (which is the only argument to a [`PatternMatchClass`]) is
/// parenthesized. Used to avoid falsely assuming that `x` is parenthesized in cases like:
/// ```python
/// case Point2D(x): ...
/// ```
fn is_single_argument_parenthesized(pattern: &Pattern, call_end: TextSize, source: &str) -> bool {
let mut has_seen_r_paren = false;
for token in SimpleTokenizer::new(source, TextRange::new(pattern.end(), call_end)).skip_trivia()
{
match token.kind() {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;
}
}
}
false
}

View file

@ -0,0 +1,17 @@
use crate::prelude::*;
use ruff_formatter::write;
use ruff_python_ast::PatternKeyword;
#[derive(Default)]
pub struct FormatPatternKeyword;
impl FormatNodeRule<PatternKeyword> for FormatPatternKeyword {
fn fmt_fields(&self, item: &PatternKeyword, f: &mut PyFormatter) -> FormatResult<()> {
let PatternKeyword {
range: _,
attr,
pattern,
} = item;
write!(f, [attr.format(), text("="), pattern.format()])
}
}

View file

@ -1,13 +1,9 @@
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{Pattern, PatternMatchClass, Ranged};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{TextRange, TextSize};
use ruff_python_ast::PatternMatchClass;
use crate::comments::{dangling_comments, SourceComment};
use crate::expression::parentheses::{
empty_parenthesized, parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
#[derive(Default)]
@ -16,74 +12,22 @@ pub struct FormatPatternMatchClass;
impl FormatNodeRule<PatternMatchClass> for FormatPatternMatchClass {
fn fmt_fields(&self, item: &PatternMatchClass, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchClass {
range,
range: _,
cls,
patterns,
kwd_attrs,
kwd_patterns,
arguments,
} = item;
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
// Identify the dangling comments before and after the open parenthesis.
let (before_parenthesis, after_parenthesis) = if let Some(left_paren) =
SimpleTokenizer::starts_at(cls.end(), f.context().source())
.find(|token| token.kind() == SimpleTokenKind::LParen)
{
dangling
.split_at(dangling.partition_point(|comment| comment.start() < left_paren.start()))
} else {
(dangling, [].as_slice())
};
write!(f, [cls.format(), dangling_comments(before_parenthesis)])?;
match (patterns.as_slice(), kwd_attrs.as_slice()) {
([], []) => {
// No patterns; render parentheses with any dangling comments.
write!(f, [empty_parenthesized("(", after_parenthesis, ")")])
}
([pattern], []) => {
// A single pattern. We need to take care not to re-parenthesize it, since our standard
// parenthesis detection will false-positive here.
let parentheses = if is_single_argument_parenthesized(
pattern,
item.end(),
f.context().source(),
) {
Parentheses::Always
} else {
Parentheses::Never
};
write!(
f,
[
parenthesized("(", &pattern.format().with_options(parentheses), ")")
.with_dangling_comments(after_parenthesis)
]
)
}
_ => {
// Multiple patterns: standard logic.
let items = format_with(|f| {
let mut join = f.join_comma_separated(range.end());
join.nodes(patterns.iter());
for (key, value) in kwd_attrs.iter().zip(kwd_patterns.iter()) {
join.entry(
key,
&format_with(|f| write!(f, [key.format(), text("="), value.format()])),
);
}
join.finish()
});
write!(
f,
[parenthesized("(", &group(&items), ")")
.with_dangling_comments(after_parenthesis)]
)
}
}
write!(
f,
[
cls.format(),
dangling_comments(dangling),
arguments.format()
]
)
}
fn fmt_dangling_comments(
@ -109,46 +53,10 @@ impl NeedsParentheses for PatternMatchClass {
// (...)
// ): ...
// ```
let dangling = context.comments().dangling(self);
if !dangling.is_empty() {
if let Some(left_paren) = SimpleTokenizer::starts_at(self.cls.end(), context.source())
.find(|token| token.kind() == SimpleTokenKind::LParen)
{
if dangling
.iter()
.any(|comment| comment.start() < left_paren.start())
{
return OptionalParentheses::Multiline;
};
}
}
OptionalParentheses::Never
}
}
/// Returns `true` if the pattern (which is the only argument to a [`PatternMatchClass`]) is
/// parenthesized. Used to avoid falsely assuming that `x` is parenthesized in cases like:
/// ```python
/// case Point2D(x): ...
/// ```
fn is_single_argument_parenthesized(pattern: &Pattern, call_end: TextSize, source: &str) -> bool {
let mut has_seen_r_paren = false;
for token in SimpleTokenizer::new(source, TextRange::new(pattern.end(), call_end)).skip_trivia()
{
match token.kind() {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;
}
if context.comments().has_dangling(self) {
OptionalParentheses::Multiline
} else {
OptionalParentheses::Never
}
}
false
}