From f754ad58988953a06f04a4341e347fe899e7f323 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 25 Aug 2023 00:03:27 -0400 Subject: [PATCH] Handle bracketed comments on sequence patterns (#6801) ## Summary This PR ensures that we handle bracketed comments on sequences, like `# comment` here: ```python match x: case [ # comment 1, 2 ]: pass ``` The handling is very similar to other, similar nodes, except that we do need some special logic to determine whether the sequence is parenthesized, similar to our logic for tuples. ## Test Plan `cargo test` --- .../src/comments/placement.rs | 10 +++ .../src/pattern/pattern_match_sequence.rs | 89 +++++++++++++++---- .../snapshots/format@statement__match.py.snap | 26 +++--- 3 files changed, 93 insertions(+), 32 deletions(-) diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 02cb2cc72e..c0983ec19d 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -15,6 +15,7 @@ use crate::expression::expr_tuple::is_tuple_parenthesized; use crate::other::parameters::{ assign_argument_separator_comment_placement, find_parameter_separators, }; +use crate::pattern::pattern_match_sequence::SequenceType; /// Manually attach comments to nodes that the default placement gets wrong. pub(super) fn place_comment<'a>( @@ -179,6 +180,15 @@ fn handle_enclosed_comment<'a>( AnyNodeRef::Comprehension(comprehension) => { handle_comprehension_comment(comment, comprehension, locator) } + AnyNodeRef::PatternMatchSequence(pattern_match_sequence) => { + if SequenceType::from_pattern(pattern_match_sequence, locator.contents()) + .is_parenthesized() + { + handle_bracketed_end_of_line_comment(comment, locator) + } else { + CommentPlacement::Default(comment) + } + } AnyNodeRef::ExprAttribute(attribute) => { handle_attribute_comment(comment, attribute, locator) } diff --git a/crates/ruff_python_formatter/src/pattern/pattern_match_sequence.rs b/crates/ruff_python_formatter/src/pattern/pattern_match_sequence.rs index 4a6e39b663..c9346c4e46 100644 --- a/crates/ruff_python_formatter/src/pattern/pattern_match_sequence.rs +++ b/crates/ruff_python_formatter/src/pattern/pattern_match_sequence.rs @@ -1,7 +1,9 @@ use ruff_formatter::prelude::format_with; use ruff_formatter::{Format, FormatResult}; use ruff_python_ast::node::AnyNodeRef; -use ruff_python_ast::PatternMatchSequence; +use ruff_python_ast::{PatternMatchSequence, Ranged}; +use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer}; +use ruff_text_size::TextRange; use crate::builders::PyFormatterExtensions; use crate::context::PyFormatContext; @@ -13,29 +15,19 @@ use crate::{FormatNodeRule, PyFormatter}; #[derive(Default)] pub struct FormatPatternMatchSequence; -#[derive(Debug)] -enum SequenceType { - Tuple, - TupleNoParens, - List, -} - impl FormatNodeRule for FormatPatternMatchSequence { fn fmt_fields(&self, item: &PatternMatchSequence, f: &mut PyFormatter) -> FormatResult<()> { let PatternMatchSequence { patterns, range } = item; - let sequence_type = match &f.context().source()[*range].chars().next() { - Some('(') => SequenceType::Tuple, - Some('[') => SequenceType::List, - _ => SequenceType::TupleNoParens, - }; + let comments = f.context().comments().clone(); let dangling = comments.dangling(item); + + let sequence_type = SequenceType::from_pattern(item, f.context().source()); if patterns.is_empty() { return match sequence_type { - SequenceType::Tuple => empty_parenthesized("(", dangling, ")").fmt(f), SequenceType::List => empty_parenthesized("[", dangling, "]").fmt(f), - SequenceType::TupleNoParens => { - unreachable!("If empty, it should be either tuple or list") + SequenceType::Tuple | SequenceType::TupleNoParens => { + empty_parenthesized("(", dangling, ")").fmt(f) } }; } @@ -65,3 +57,68 @@ impl NeedsParentheses for PatternMatchSequence { OptionalParentheses::Never } } + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum SequenceType { + /// A list literal, e.g., `[1, 2, 3]`. + List, + /// A parenthesized tuple literal, e.g., `(1, 2, 3)`. + Tuple, + /// A tuple literal without parentheses, e.g., `1, 2, 3`. + TupleNoParens, +} + +impl SequenceType { + pub(crate) fn from_pattern(pattern: &PatternMatchSequence, source: &str) -> SequenceType { + if source[pattern.range()].starts_with('[') { + SequenceType::List + } else if source[pattern.range()].starts_with('(') { + // If the pattern is empty, it must be a parenthesized tuple with no members. (This + // branch exists to differentiate between a tuple with and without its own parentheses, + // but a tuple without its own parentheses must have at least one member.) + let Some(elt) = pattern.patterns.first() else { + return SequenceType::Tuple; + }; + + // Count the number of open parentheses between the start of the pattern and the first + // element, and the number of close parentheses between the first element and its + // trailing comma. If the number of open parentheses is greater than the number of close + // parentheses, + // the pattern is parenthesized. For example, here, we have two parentheses before the + // first element, and one after it: + // ```python + // ((a), b, c) + // ``` + // + // This algorithm successfully avoids false positives for cases like: + // ```python + // (a), b, c + // ``` + let open_parentheses_count = + SimpleTokenizer::new(source, TextRange::new(pattern.start(), elt.start())) + .skip_trivia() + .filter(|token| token.kind() == SimpleTokenKind::LParen) + .count(); + + // Count the number of close parentheses. + let close_parentheses_count = + SimpleTokenizer::new(source, TextRange::new(elt.end(), elt.end())) + .skip_trivia() + .take_while(|token| token.kind() != SimpleTokenKind::Comma) + .filter(|token| token.kind() == SimpleTokenKind::RParen) + .count(); + + if open_parentheses_count > close_parentheses_count { + SequenceType::Tuple + } else { + SequenceType::TupleNoParens + } + } else { + SequenceType::TupleNoParens + } + } + + pub(crate) fn is_parenthesized(self) -> bool { + matches!(self, SequenceType::List | SequenceType::Tuple) + } +} diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__match.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__match.py.snap index 032a971380..d01a764d25 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__match.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__match.py.snap @@ -572,8 +572,7 @@ match foo: match foo: - case [ - # leading + case [ # leading # leading # leading # leading @@ -685,9 +684,8 @@ match foo: 2, ]: pass - case [ - ( # outer - # inner + case [ # outer + ( # inner 1 ), 2, @@ -695,29 +693,25 @@ match foo: pass case [ ( # outer - [ - # inner + [ # inner 1, ] ) ]: pass - case [ - ( # outer - # inner outer - [ - # inner + case [ # outer + ( # inner outer + [ # inner 1, ] ) ]: pass - case [ - ( # outer + case [ # outer + ( # own line # inner outer - [ - # inner + [ # inner 1, ] )