Build CommentRanges outside the parser (#11792)

## Summary This PR updates the parser to remove building the `CommentRanges` and instead it'll be built by the linter and the formatter when it's required. For the linter, it'll be built and owned by the `Indexer` while for the formatter it'll be built from the `Tokens` struct and passed as an argument. ## Test Plan `cargo insta test`
2025-11-01 04:18:05 +00:00 · 2024-06-09 15:25:17 +05:30 · 2024-06-09 15:25:17 +05:30 · 549cc1e437
commit 549cc1e437
parent 7509a48eab
28 changed files with 151 additions and 102 deletions
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -239,7 +239,6 @@ pub struct Parsed<T> {
    syntax: T,
    tokens: Tokens,
    errors: Vec<ParseError>,
-    comment_ranges: CommentRanges,
 }

 impl<T> Parsed<T> {
@ -258,11 +257,6 @@ impl<T> Parsed<T> {
        &self.errors
    }

-    /// Returns the comment ranges for the parsed output.
-    pub fn comment_ranges(&self) -> &CommentRanges {
-        &self.comment_ranges
-    }
-
    /// Consumes the [`Parsed`] output and returns the contained syntax node.
    pub fn into_syntax(self) -> T {
        self.syntax
@ -313,7 +307,6 @@ impl Parsed<Mod> {
                syntax: module,
                tokens: self.tokens,
                errors: self.errors,
-                comment_ranges: self.comment_ranges,
            }),
            Mod::Expression(_) => None,
        }
@ -333,7 +326,6 @@ impl Parsed<Mod> {
                syntax: expression,
                tokens: self.tokens,
                errors: self.errors,
-                comment_ranges: self.comment_ranges,
            }),
        }
    }
@ -518,6 +510,18 @@ impl Deref for Tokens {
    }
 }

+impl From<&Tokens> for CommentRanges {
+    fn from(tokens: &Tokens) -> Self {
+        let mut ranges = vec![];
+        for token in tokens {
+            if token.kind() == TokenKind::Comment {
+                ranges.push(token.range());
+            }
+        }
+        CommentRanges::new(ranges)
+    }
+}
+
 /// Control in the different modes by which a source file can be parsed.
 ///
 /// The mode argument specifies in what way code must be parsed.
--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@ -147,7 +147,7 @@ impl<'src> Parser<'src> {

        // TODO consider re-integrating lexical error handling into the parser?
        let parse_errors = self.errors;
-        let (tokens, comment_ranges, lex_errors) = self.tokens.finish();
+        let (tokens, lex_errors) = self.tokens.finish();

        // Fast path for when there are no lex errors.
        // There's no fast path for when there are no parse errors because a lex error
@ -156,7 +156,6 @@ impl<'src> Parser<'src> {
            return Parsed {
                syntax,
                tokens: Tokens::new(tokens),
-                comment_ranges,
                errors: parse_errors,
            };
        }
@ -188,7 +187,6 @@ impl<'src> Parser<'src> {
        Parsed {
            syntax,
            tokens: Tokens::new(tokens),
-            comment_ranges,
            errors: merged,
        }
    }
--- a/crates/ruff_python_parser/src/token_source.rs
+++ b/crates/ruff_python_parser/src/token_source.rs
@ -1,4 +1,3 @@
-use ruff_python_trivia::CommentRanges;
 use ruff_text_size::{TextRange, TextSize};

 use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
@ -14,9 +13,6 @@ pub(crate) struct TokenSource<'src> {
    /// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector
    /// holds both the trivia and non-trivia tokens.
    tokens: Vec<Token>,
-
-    /// A vector containing the range of all the comment tokens emitted by the lexer.
-    comments: Vec<TextRange>,
 }

 impl<'src> TokenSource<'src> {
@ -26,7 +22,6 @@ impl<'src> TokenSource<'src> {
        TokenSource {
            lexer,
            tokens: vec![],
-            comments: vec![],
        }
    }

@ -103,9 +98,6 @@ impl<'src> TokenSource<'src> {
        loop {
            let kind = self.lexer.next_token();
            if is_trivia(kind) {
-                if kind == TokenKind::Comment {
-                    self.comments.push(self.current_range());
-                }
                self.tokens
                    .push(Token::new(kind, self.current_range(), self.current_flags()));
                continue;
@ -130,7 +122,6 @@ impl<'src> TokenSource<'src> {
        TokenSourceCheckpoint {
            lexer_checkpoint: self.lexer.checkpoint(),
            tokens_position: self.tokens.len(),
-            comments_position: self.comments.len(),
        }
    }

@ -139,18 +130,16 @@ impl<'src> TokenSource<'src> {
        let TokenSourceCheckpoint {
            lexer_checkpoint,
            tokens_position,
-            comments_position,
        } = checkpoint;

        self.lexer.rewind(lexer_checkpoint);
        self.tokens.truncate(tokens_position);
-        self.comments.truncate(comments_position);
    }

    /// Consumes the token source, returning the collected tokens, comment ranges, and any errors
    /// encountered during lexing. The token collection includes both the trivia and non-trivia
    /// tokens.
-    pub(crate) fn finish(mut self) -> (Vec<Token>, CommentRanges, Vec<LexicalError>) {
+    pub(crate) fn finish(mut self) -> (Vec<Token>, Vec<LexicalError>) {
        assert_eq!(
            self.current_kind(),
            TokenKind::EndOfFile,
@ -163,15 +152,13 @@ impl<'src> TokenSource<'src> {
            assert_eq!(last.kind(), TokenKind::EndOfFile);
        }

-        let comment_ranges = CommentRanges::new(self.comments);
-        (self.tokens, comment_ranges, self.lexer.finish())
+        (self.tokens, self.lexer.finish())
    }
 }

 pub(crate) struct TokenSourceCheckpoint {
    lexer_checkpoint: LexerCheckpoint,
    tokens_position: usize,
-    comments_position: usize,
 }

 /// Allocates a [`Vec`] with an approximated capacity to fit all tokens