Use CommentRanges in backwards lexing (#7360)

## Summary The tokenizer was split into a forward and a backwards tokenizer. The backwards tokenizer uses the same names as the forwards ones (e.g. `next_token`). The backwards tokenizer gets the comment ranges that we already built to skip comments. --------- Co-authored-by: Micha Reiser <micha@reiser.io>
2025-09-28 12:55:05 +00:00 · 2023-09-16 05:21:45 +02:00 · 2023-09-16 05:21:45 +02:00 · 2cbe1733c8
commit 2cbe1733c8
parent 1f6e1485f9
41 changed files with 744 additions and 628 deletions
--- a/crates/ruff_python_formatter/src/comments/debug.rs
+++ b/crates/ruff_python_formatter/src/comments/debug.rs
@ -182,6 +182,7 @@ mod tests {
    use ruff_formatter::SourceCode;
    use ruff_python_ast::node::AnyNode;
    use ruff_python_ast::{StmtBreak, StmtContinue};
+    use ruff_python_trivia::CommentRanges;
    use ruff_text_size::{TextRange, TextSize};

    use crate::comments::map::MultiMap;
@ -231,7 +232,8 @@ break;
            ),
        );

-        let comments = Comments::new(comments_map);
+        let comment_ranges = CommentRanges::default();
+        let comments = Comments::new(comments_map, &comment_ranges);

        assert_debug_snapshot!(comments.debug(source_code));
    }
--- a/crates/ruff_python_formatter/src/comments/mod.rs
+++ b/crates/ruff_python_formatter/src/comments/mod.rs
@ -99,8 +99,7 @@ use ruff_formatter::{SourceCode, SourceCodeSlice};
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal};
 use ruff_python_ast::Mod;
-use ruff_python_index::CommentRanges;
-use ruff_python_trivia::PythonWhitespace;
+use ruff_python_trivia::{CommentRanges, PythonWhitespace};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};

@ -281,7 +280,7 @@ type CommentsMap<'a> = MultiMap<NodeRefEqualityKey<'a>, SourceComment>;
 /// The comments of a syntax tree stored by node.
 ///
 /// Cloning `comments` is cheap as it only involves bumping a reference counter.
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub(crate) struct Comments<'a> {
    /// The implementation uses an [Rc] so that [Comments] has a lifetime independent from the [crate::Formatter].
    /// Independent lifetimes are necessary to support the use case where a (formattable object)[crate::Format]
@ -306,15 +305,31 @@ pub(crate) struct Comments<'a> {
    /// }
    /// ```
    data: Rc<CommentsData<'a>>,
+    /// We need those for backwards lexing
+    comment_ranges: &'a CommentRanges,
 }

 impl<'a> Comments<'a> {
-    fn new(comments: CommentsMap<'a>) -> Self {
+    fn new(comments: CommentsMap<'a>, comment_ranges: &'a CommentRanges) -> Self {
        Self {
            data: Rc::new(CommentsData { comments }),
+            comment_ranges,
        }
    }

+    /// Effectively a [`Default`] implementation that works around the lifetimes for tests
+    #[cfg(test)]
+    pub(crate) fn from_ranges(comment_ranges: &'a CommentRanges) -> Self {
+        Self {
+            data: Rc::new(CommentsData::default()),
+            comment_ranges,
+        }
+    }
+
+    pub(crate) fn ranges(&self) -> &'a CommentRanges {
+        self.comment_ranges
+    }
+
    /// Extracts the comments from the AST.
    pub(crate) fn from_ast(
        root: &'a Mod,
@ -324,12 +339,13 @@ impl<'a> Comments<'a> {
        let map = if comment_ranges.is_empty() {
            CommentsMap::new()
        } else {
-            let mut builder = CommentsMapBuilder::new(Locator::new(source_code.as_str()));
+            let mut builder =
+                CommentsMapBuilder::new(Locator::new(source_code.as_str()), comment_ranges);
            CommentsVisitor::new(source_code, comment_ranges, &mut builder).visit(root);
            builder.finish()
        };

-        Self::new(map)
+        Self::new(map, comment_ranges)
    }

    /// Returns `true` if the given `node` has any comments.
@ -528,9 +544,10 @@ mod tests {

    use ruff_formatter::SourceCode;
    use ruff_python_ast::Mod;
-    use ruff_python_index::{CommentRanges, CommentRangesBuilder};
+    use ruff_python_index::CommentRangesBuilder;
    use ruff_python_parser::lexer::lex;
    use ruff_python_parser::{parse_tokens, Mode};
+    use ruff_python_trivia::CommentRanges;

    use crate::comments::Comments;

--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@ -4,7 +4,8 @@ use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::whitespace::indentation;
 use ruff_python_ast::{self as ast, Comprehension, Expr, MatchCase, Parameters};
 use ruff_python_trivia::{
-    find_only_token_in_range, indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer,
+    find_only_token_in_range, indentation_at_offset, BackwardsTokenizer, CommentRanges,
+    SimpleToken, SimpleTokenKind, SimpleTokenizer,
 };
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextLen, TextRange};
@ -20,12 +21,13 @@ use crate::pattern::pattern_match_sequence::SequenceType;
 /// Manually attach comments to nodes that the default placement gets wrong.
 pub(super) fn place_comment<'a>(
    comment: DecoratedComment<'a>,
+    comment_ranges: &CommentRanges,
    locator: &Locator,
 ) -> CommentPlacement<'a> {
    handle_parenthesized_comment(comment, locator)
        .or_else(|comment| handle_end_of_line_comment_around_body(comment, locator))
        .or_else(|comment| handle_own_line_comment_around_body(comment, locator))
-        .or_else(|comment| handle_enclosed_comment(comment, locator))
+        .or_else(|comment| handle_enclosed_comment(comment, comment_ranges, locator))
 }

 /// Handle parenthesized comments. A parenthesized comment is a comment that appears within a
@ -172,6 +174,7 @@ fn handle_parenthesized_comment<'a>(
 /// Handle a comment that is enclosed by a node.
 fn handle_enclosed_comment<'a>(
    comment: DecoratedComment<'a>,
+    comment_ranges: &CommentRanges,
    locator: &Locator,
 ) -> CommentPlacement<'a> {
    match comment.enclosing_node() {
@ -213,13 +216,15 @@ fn handle_enclosed_comment<'a>(
        AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator)
            .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)),
        AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator),
-        AnyNodeRef::ExprSlice(expr_slice) => handle_slice_comments(comment, expr_slice, locator),
+        AnyNodeRef::ExprSlice(expr_slice) => {
+            handle_slice_comments(comment, expr_slice, comment_ranges, locator)
+        }
        AnyNodeRef::ExprStarred(starred) => {
            handle_trailing_expression_starred_star_end_of_line_comment(comment, starred, locator)
        }
        AnyNodeRef::ExprSubscript(expr_subscript) => {
            if let Expr::Slice(expr_slice) = expr_subscript.slice.as_ref() {
-                handle_slice_comments(comment, expr_slice, locator)
+                handle_slice_comments(comment, expr_slice, comment_ranges, locator)
            } else {
                CommentPlacement::Default(comment)
            }
@ -958,6 +963,7 @@ fn handle_module_level_own_line_comment_before_class_or_function_comment<'a>(
 fn handle_slice_comments<'a>(
    comment: DecoratedComment<'a>,
    expr_slice: &'a ast::ExprSlice,
+    comment_ranges: &CommentRanges,
    locator: &Locator,
 ) -> CommentPlacement<'a> {
    let ast::ExprSlice {
@ -969,9 +975,9 @@ fn handle_slice_comments<'a>(

    // Check for `foo[ # comment`, but only if they are on the same line
    let after_lbracket = matches!(
-        SimpleTokenizer::up_to_without_back_comment(comment.start(), locator.contents())
+        BackwardsTokenizer::up_to(comment.start(), locator.contents(), comment_ranges)
            .skip_trivia()
-            .next_back(),
+            .next(),
        Some(SimpleToken {
            kind: SimpleTokenKind::LBracket,
            ..
--- a/crates/ruff_python_formatter/src/comments/visitor.rs
+++ b/crates/ruff_python_formatter/src/comments/visitor.rs
@ -8,8 +8,7 @@ use ruff_python_ast::{Mod, Stmt};
 // pre-order.
 #[allow(clippy::wildcard_imports)]
 use ruff_python_ast::visitor::preorder::*;
-use ruff_python_index::CommentRanges;
-use ruff_python_trivia::is_python_whitespace;
+use ruff_python_trivia::{is_python_whitespace, CommentRanges};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange, TextSize};

@ -536,12 +535,14 @@ impl<'a> PushComment<'a> for CommentsVecBuilder<'a> {
 /// [`CommentsMap`].
 pub(super) struct CommentsMapBuilder<'a> {
    comments: CommentsMap<'a>,
+    /// We need those for backwards lexing
+    comment_ranges: &'a CommentRanges,
    locator: Locator<'a>,
 }

 impl<'a> PushComment<'a> for CommentsMapBuilder<'a> {
    fn push_comment(&mut self, placement: DecoratedComment<'a>) {
-        let placement = place_comment(placement, &self.locator);
+        let placement = place_comment(placement, self.comment_ranges, &self.locator);
        match placement {
            CommentPlacement::Leading { node, comment } => {
                self.push_leading_comment(node, comment);
@ -603,9 +604,10 @@ impl<'a> PushComment<'a> for CommentsMapBuilder<'a> {
 }

 impl<'a> CommentsMapBuilder<'a> {
-    pub(crate) fn new(locator: Locator<'a>) -> Self {
+    pub(crate) fn new(locator: Locator<'a>, comment_ranges: &'a CommentRanges) -> Self {
        Self {
            comments: CommentsMap::default(),
+            comment_ranges,
            locator,
        }
    }