Introduce AST nodes for PatternMatchClass arguments (#6881)

## Summary

This PR introduces two new AST nodes to improve the representation of
`PatternMatchClass`. As a reminder, `PatternMatchClass` looks like this:

```python
case Point2D(0, 0, x=1, y=2):
  ...
```

Historically, this was represented as a vector of patterns (for the `0,
0` portion) and parallel vectors of keyword names (for `x` and `y`) and
values (for `1` and `2`). This introduces a bunch of challenges for the
formatter, but importantly, it's also really different from how we
represent similar nodes, like arguments (`func(0, 0, x=1, y=2)`) or
parameters (`def func(x, y)`).

So, firstly, we now use a single node (`PatternArguments`) for the
entire parenthesized region, making it much more consistent with our
other nodes. So, above, `PatternArguments` would be `(0, 0, x=1, y=2)`.

Secondly, we now have a `PatternKeyword` node for `x=1` and `y=2`. This
is much more similar to the how `Keyword` is represented within
`Arguments` for call expressions.

Closes https://github.com/astral-sh/ruff/issues/6866.

Closes https://github.com/astral-sh/ruff/issues/6880.
This commit is contained in:
Charlie Marsh 2023-08-26 10:45:44 -04:00 committed by GitHub
parent ed1b4122d0
commit 15b73bdb8a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 25299 additions and 25824 deletions

View file

@ -174,7 +174,7 @@ fn handle_enclosed_comment<'a>(
}
})
}
AnyNodeRef::Arguments(_) | AnyNodeRef::TypeParams(_) => {
AnyNodeRef::Arguments(_) | AnyNodeRef::TypeParams(_) | AnyNodeRef::PatternArguments(_) => {
handle_bracketed_end_of_line_comment(comment, locator)
}
AnyNodeRef::Comprehension(comprehension) => {
@ -220,9 +220,7 @@ fn handle_enclosed_comment<'a>(
CommentPlacement::Default(comment)
}
}
AnyNodeRef::PatternMatchClass(class) => {
handle_pattern_match_class_comment(comment, class, locator)
}
AnyNodeRef::PatternMatchClass(class) => handle_pattern_match_class_comment(comment, class),
AnyNodeRef::PatternMatchAs(_) => handle_pattern_match_as_comment(comment, locator),
AnyNodeRef::PatternMatchStar(_) => handle_pattern_match_star_comment(comment),
AnyNodeRef::PatternMatchMapping(pattern) => {
@ -1233,75 +1231,23 @@ fn handle_with_item_comment<'a>(
}
}
/// Handles trailing comments after the `as` keyword of a pattern match item:
///
/// Handles trailing comments between the class name and its arguments in:
/// ```python
/// case (
/// Pattern
/// # dangling
/// ( # dangling
/// # dangling
/// )
/// (...)
/// ): ...
/// ```
fn handle_pattern_match_class_comment<'a>(
comment: DecoratedComment<'a>,
class: &'a ast::PatternMatchClass,
locator: &Locator,
) -> CommentPlacement<'a> {
// Find the open parentheses on the arguments.
let Some(left_paren) = SimpleTokenizer::starts_at(class.cls.end(), locator.contents())
.skip_trivia()
.find(|token| token.kind == SimpleTokenKind::LParen)
else {
return CommentPlacement::Default(comment);
};
// If the comment appears before the open parenthesis, it's dangling:
// ```python
// case (
// Pattern
// # dangling
// (...)
// ): ...
// ```
if comment.end() < left_paren.start() {
return CommentPlacement::dangling(comment.enclosing_node(), comment);
if class.cls.end() < comment.start() && comment.end() < class.arguments.start() {
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
CommentPlacement::Default(comment)
}
let Some(first_item) = class
.patterns
.first()
.map(Ranged::start)
.or_else(|| class.kwd_attrs.first().map(Ranged::start))
else {
// If there are no items, then the comment must be dangling:
// ```python
// case (
// Pattern(
// # dangling
// )
// ): ...
// ```
return CommentPlacement::dangling(comment.enclosing_node(), comment);
};
// If the comment appears before the first item or its parentheses, then it's dangling:
// ```python
// case (
// Pattern( # dangling
// 0,
// 0,
// )
// ): ...
// ```
if comment.line_position().is_end_of_line() {
if comment.end() < first_item {
return CommentPlacement::dangling(comment.enclosing_node(), comment);
}
}
CommentPlacement::Default(comment)
}
/// Handles trailing comments after the `as` keyword of a pattern match item:

View file

@ -2256,6 +2256,78 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::PatternMatchOr {
}
}
impl FormatRule<ast::PatternArguments, PyFormatContext<'_>>
for crate::pattern::pattern_arguments::FormatPatternArguments
{
#[inline]
fn fmt(&self, node: &ast::PatternArguments, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::PatternArguments>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::PatternArguments {
type Format<'a> = FormatRefWithRule<
'a,
ast::PatternArguments,
crate::pattern::pattern_arguments::FormatPatternArguments,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(
self,
crate::pattern::pattern_arguments::FormatPatternArguments::default(),
)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::PatternArguments {
type Format = FormatOwnedWithRule<
ast::PatternArguments,
crate::pattern::pattern_arguments::FormatPatternArguments,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(
self,
crate::pattern::pattern_arguments::FormatPatternArguments::default(),
)
}
}
impl FormatRule<ast::PatternKeyword, PyFormatContext<'_>>
for crate::pattern::pattern_keyword::FormatPatternKeyword
{
#[inline]
fn fmt(&self, node: &ast::PatternKeyword, f: &mut PyFormatter) -> FormatResult<()> {
FormatNodeRule::<ast::PatternKeyword>::fmt(self, node, f)
}
}
impl<'ast> AsFormat<PyFormatContext<'ast>> for ast::PatternKeyword {
type Format<'a> = FormatRefWithRule<
'a,
ast::PatternKeyword,
crate::pattern::pattern_keyword::FormatPatternKeyword,
PyFormatContext<'ast>,
>;
fn format(&self) -> Self::Format<'_> {
FormatRefWithRule::new(
self,
crate::pattern::pattern_keyword::FormatPatternKeyword::default(),
)
}
}
impl<'ast> IntoFormat<PyFormatContext<'ast>> for ast::PatternKeyword {
type Format = FormatOwnedWithRule<
ast::PatternKeyword,
crate::pattern::pattern_keyword::FormatPatternKeyword,
PyFormatContext<'ast>,
>;
fn into_format(self) -> Self::Format {
FormatOwnedWithRule::new(
self,
crate::pattern::pattern_keyword::FormatPatternKeyword::default(),
)
}
}
impl FormatRule<ast::Comprehension, PyFormatContext<'_>>
for crate::other::comprehension::FormatComprehension
{

View file

@ -8,6 +8,8 @@ use crate::expression::parentheses::{
};
use crate::prelude::*;
pub(crate) mod pattern_arguments;
pub(crate) mod pattern_keyword;
pub(crate) mod pattern_match_as;
pub(crate) mod pattern_match_class;
pub(crate) mod pattern_match_mapping;

View file

@ -0,0 +1,110 @@
use ruff_formatter::write;
use ruff_python_ast::node::AstNode;
use ruff_python_ast::{Pattern, PatternArguments, Ranged};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{TextRange, TextSize};
use crate::comments::SourceComment;
use crate::expression::parentheses::{empty_parenthesized, parenthesized, Parentheses};
use crate::prelude::*;
#[derive(Default)]
pub struct FormatPatternArguments;
impl FormatNodeRule<PatternArguments> for FormatPatternArguments {
fn fmt_fields(&self, item: &PatternArguments, f: &mut PyFormatter) -> FormatResult<()> {
// If there are no arguments, all comments are dangling:
// ```python
// case Point2D( # dangling
// # dangling
// )
// ```
if item.patterns.is_empty() && item.keywords.is_empty() {
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
return write!(f, [empty_parenthesized("(", dangling, ")")]);
}
let all_arguments = format_with(|f: &mut PyFormatter| {
let source = f.context().source();
let mut joiner = f.join_comma_separated(item.end());
match item.patterns.as_slice() {
[pattern] if item.keywords.is_empty() => {
let parentheses =
if is_single_argument_parenthesized(pattern, item.end(), source) {
Parentheses::Always
} else {
// Note: no need to handle opening-parenthesis comments, since
// an opening-parenthesis comment implies that the argument is
// parenthesized.
Parentheses::Never
};
joiner.entry(pattern, &pattern.format().with_options(parentheses));
}
patterns => {
joiner
.entries(patterns.iter().map(|pattern| {
(
pattern,
pattern.format().with_options(Parentheses::Preserve),
)
}))
.nodes(item.keywords.iter());
}
}
joiner.finish()
});
// If the arguments are non-empty, then a dangling comment indicates a comment on the
// same line as the opening parenthesis, e.g.:
// ```python
// case Point2D( # dangling
// ...
// )
// ```
let comments = f.context().comments().clone();
let dangling_comments = comments.dangling(item.as_any_node_ref());
write!(
f,
[parenthesized("(", &group(&all_arguments), ")")
.with_dangling_comments(dangling_comments)]
)
}
fn fmt_dangling_comments(
&self,
_dangling_comments: &[SourceComment],
_f: &mut PyFormatter,
) -> FormatResult<()> {
Ok(())
}
}
/// Returns `true` if the pattern (which is the only argument to a [`PatternMatchClass`]) is
/// parenthesized. Used to avoid falsely assuming that `x` is parenthesized in cases like:
/// ```python
/// case Point2D(x): ...
/// ```
fn is_single_argument_parenthesized(pattern: &Pattern, call_end: TextSize, source: &str) -> bool {
let mut has_seen_r_paren = false;
for token in SimpleTokenizer::new(source, TextRange::new(pattern.end(), call_end)).skip_trivia()
{
match token.kind() {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;
}
}
}
false
}

View file

@ -0,0 +1,17 @@
use crate::prelude::*;
use ruff_formatter::write;
use ruff_python_ast::PatternKeyword;
#[derive(Default)]
pub struct FormatPatternKeyword;
impl FormatNodeRule<PatternKeyword> for FormatPatternKeyword {
fn fmt_fields(&self, item: &PatternKeyword, f: &mut PyFormatter) -> FormatResult<()> {
let PatternKeyword {
range: _,
attr,
pattern,
} = item;
write!(f, [attr.format(), text("="), pattern.format()])
}
}

View file

@ -1,13 +1,9 @@
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{Pattern, PatternMatchClass, Ranged};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{TextRange, TextSize};
use ruff_python_ast::PatternMatchClass;
use crate::comments::{dangling_comments, SourceComment};
use crate::expression::parentheses::{
empty_parenthesized, parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
#[derive(Default)]
@ -16,74 +12,22 @@ pub struct FormatPatternMatchClass;
impl FormatNodeRule<PatternMatchClass> for FormatPatternMatchClass {
fn fmt_fields(&self, item: &PatternMatchClass, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchClass {
range,
range: _,
cls,
patterns,
kwd_attrs,
kwd_patterns,
arguments,
} = item;
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
// Identify the dangling comments before and after the open parenthesis.
let (before_parenthesis, after_parenthesis) = if let Some(left_paren) =
SimpleTokenizer::starts_at(cls.end(), f.context().source())
.find(|token| token.kind() == SimpleTokenKind::LParen)
{
dangling
.split_at(dangling.partition_point(|comment| comment.start() < left_paren.start()))
} else {
(dangling, [].as_slice())
};
write!(f, [cls.format(), dangling_comments(before_parenthesis)])?;
match (patterns.as_slice(), kwd_attrs.as_slice()) {
([], []) => {
// No patterns; render parentheses with any dangling comments.
write!(f, [empty_parenthesized("(", after_parenthesis, ")")])
}
([pattern], []) => {
// A single pattern. We need to take care not to re-parenthesize it, since our standard
// parenthesis detection will false-positive here.
let parentheses = if is_single_argument_parenthesized(
pattern,
item.end(),
f.context().source(),
) {
Parentheses::Always
} else {
Parentheses::Never
};
write!(
f,
[
parenthesized("(", &pattern.format().with_options(parentheses), ")")
.with_dangling_comments(after_parenthesis)
]
)
}
_ => {
// Multiple patterns: standard logic.
let items = format_with(|f| {
let mut join = f.join_comma_separated(range.end());
join.nodes(patterns.iter());
for (key, value) in kwd_attrs.iter().zip(kwd_patterns.iter()) {
join.entry(
key,
&format_with(|f| write!(f, [key.format(), text("="), value.format()])),
);
}
join.finish()
});
write!(
f,
[parenthesized("(", &group(&items), ")")
.with_dangling_comments(after_parenthesis)]
)
}
}
write!(
f,
[
cls.format(),
dangling_comments(dangling),
arguments.format()
]
)
}
fn fmt_dangling_comments(
@ -109,46 +53,10 @@ impl NeedsParentheses for PatternMatchClass {
// (...)
// ): ...
// ```
let dangling = context.comments().dangling(self);
if !dangling.is_empty() {
if let Some(left_paren) = SimpleTokenizer::starts_at(self.cls.end(), context.source())
.find(|token| token.kind() == SimpleTokenKind::LParen)
{
if dangling
.iter()
.any(|comment| comment.start() < left_paren.start())
{
return OptionalParentheses::Multiline;
};
}
}
OptionalParentheses::Never
}
}
/// Returns `true` if the pattern (which is the only argument to a [`PatternMatchClass`]) is
/// parenthesized. Used to avoid falsely assuming that `x` is parenthesized in cases like:
/// ```python
/// case Point2D(x): ...
/// ```
fn is_single_argument_parenthesized(pattern: &Pattern, call_end: TextSize, source: &str) -> bool {
let mut has_seen_r_paren = false;
for token in SimpleTokenizer::new(source, TextRange::new(pattern.end(), call_end)).skip_trivia()
{
match token.kind() {
SimpleTokenKind::RParen => {
if has_seen_r_paren {
return true;
}
has_seen_r_paren = true;
}
// Skip over any trailing comma
SimpleTokenKind::Comma => continue,
_ => {
// Passed the arguments
break;
}
if context.comments().has_dangling(self) {
OptionalParentheses::Multiline
} else {
OptionalParentheses::Never
}
}
false
}