Use CommentRanges in backwards lexing (#7360)

## Summary The tokenizer was split into a forward and a backwards tokenizer. The backwards tokenizer uses the same names as the forwards ones (e.g. `next_token`). The backwards tokenizer gets the comment ranges that we already built to skip comments. --------- Co-authored-by: Micha Reiser <micha@reiser.io>
2025-09-28 04:45:01 +00:00 · 2023-09-16 05:21:45 +02:00 · 2023-09-16 05:21:45 +02:00 · 2cbe1733c8
commit 2cbe1733c8
parent 1f6e1485f9
41 changed files with 744 additions and 628 deletions
--- a/crates/ruff_python_index/src/comment_ranges.rs
+++ b/crates/ruff_python_index/src/comment_ranges.rs
@ -1,70 +1,8 @@
-use itertools::Itertools;
-use std::fmt::{Debug, Formatter};
-use std::ops::Deref;
+use std::fmt::Debug;

 use ruff_python_parser::Tok;
-use ruff_text_size::{Ranged, TextRange};
-
-/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
-#[derive(Clone)]
-pub struct CommentRanges {
-    raw: Vec<TextRange>,
-}
-
-impl CommentRanges {
-    /// Returns `true` if the given range includes a comment.
-    pub fn intersects(&self, target: TextRange) -> bool {
-        self.raw
-            .binary_search_by(|range| {
-                if target.contains_range(*range) {
-                    std::cmp::Ordering::Equal
-                } else if range.end() < target.start() {
-                    std::cmp::Ordering::Less
-                } else {
-                    std::cmp::Ordering::Greater
-                }
-            })
-            .is_ok()
-    }
-
-    /// Returns the comments who are within the range
-    pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
-        let start = self
-            .raw
-            .partition_point(|comment| comment.start() < range.start());
-        // We expect there are few comments, so switching to find should be faster
-        match self.raw[start..]
-            .iter()
-            .find_position(|comment| comment.end() > range.end())
-        {
-            Some((in_range, _element)) => &self.raw[start..start + in_range],
-            None => &self.raw[start..],
-        }
-    }
-}
-
-impl Deref for CommentRanges {
-    type Target = [TextRange];
-
-    fn deref(&self) -> &Self::Target {
-        self.raw.as_slice()
-    }
-}
-
-impl Debug for CommentRanges {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.debug_tuple("CommentRanges").field(&self.raw).finish()
-    }
-}
-
-impl<'a> IntoIterator for &'a CommentRanges {
-    type IntoIter = std::slice::Iter<'a, TextRange>;
-    type Item = &'a TextRange;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.raw.iter()
-    }
-}
+use ruff_python_trivia::CommentRanges;
+use ruff_text_size::TextRange;

 #[derive(Debug, Clone, Default)]
 pub struct CommentRangesBuilder {
@ -79,6 +17,6 @@ impl CommentRangesBuilder {
    }

    pub fn finish(self) -> CommentRanges {
-        CommentRanges { raw: self.ranges }
+        CommentRanges::new(self.ranges)
    }
 }
--- a/crates/ruff_python_index/src/indexer.rs
+++ b/crates/ruff_python_index/src/indexer.rs
@ -1,15 +1,16 @@
 //! Struct used to index source code, to enable efficient lookup of tokens that
 //! are omitted from the AST (e.g., commented lines).

+use crate::CommentRangesBuilder;
 use ruff_python_ast::Stmt;
 use ruff_python_parser::lexer::LexResult;
 use ruff_python_parser::{StringKind, Tok};
-use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace};
+use ruff_python_trivia::{
+    has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges,
+};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange, TextSize};

-use super::comment_ranges::{CommentRanges, CommentRangesBuilder};
-
 pub struct Indexer {
    comment_ranges: CommentRanges,

--- a/crates/ruff_python_index/src/lib.rs
+++ b/crates/ruff_python_index/src/lib.rs
@ -1,5 +1,5 @@
 mod comment_ranges;
 mod indexer;

-pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
+pub use comment_ranges::CommentRangesBuilder;
 pub use indexer::Indexer;