Use shared Cursor across crates (#5715)

## Summary We have two `Cursor` implementations. This PR moves the implementation from the formatter into `ruff_python_whitespace` (kind of a poorly-named crate now) and uses it for both use-cases.
2025-07-24 05:25:17 +00:00 · 2023-07-12 17:09:27 -04:00 · 2023-07-12 17:09:27 -04:00 · 6dbc6d2e59
commit 6dbc6d2e59
parent 6ce252f0ed
6 changed files with 163 additions and 210 deletions
--- a/crates/ruff_python_formatter/src/trivia.rs
+++ b/crates/ruff_python_formatter/src/trivia.rs
@ -1,9 +1,8 @@
-use std::str::Chars;
-
-use ruff_python_whitespace::is_python_whitespace;
 use ruff_text_size::{TextLen, TextRange, TextSize};
 use unic_ucd_ident::{is_xid_continue, is_xid_start};

+use ruff_python_whitespace::{is_python_whitespace, Cursor};
+
 /// Searches for the first non-trivia character in `range`.
 ///
 /// The search skips over any whitespace and comments.
@ -402,9 +401,7 @@ impl<'a> SimpleTokenizer<'a> {

                // Skip the test whether there's a preceding comment if it has been performed before.
                if !self.back_line_has_no_comment {
-                    let rest = self.cursor.chars.as_str();
-
-                    for (back_index, c) in rest.chars().rev().enumerate() {
+                    for (back_index, c) in self.cursor.chars().rev().enumerate() {
                        match c {
                            '#' => {
                                // Potentially a comment
@ -515,100 +512,6 @@ impl DoubleEndedIterator for SimpleTokenizer<'_> {
    }
 }

-const EOF_CHAR: char = '\0';
-
-#[derive(Debug, Clone)]
-struct Cursor<'a> {
-    chars: Chars<'a>,
-    source_length: TextSize,
-}
-
-impl<'a> Cursor<'a> {
-    fn new(source: &'a str) -> Self {
-        Self {
-            source_length: source.text_len(),
-            chars: source.chars(),
-        }
-    }
-
-    /// Peeks the next character from the input stream without consuming it.
-    /// Returns [`EOF_CHAR`] if the file is at the end of the file.
-    fn first(&self) -> char {
-        self.chars.clone().next().unwrap_or(EOF_CHAR)
-    }
-
-    /// Peeks the next character from the input stream without consuming it.
-    /// Returns [`EOF_CHAR`] if the file is at the end of the file.
-    fn last(&self) -> char {
-        self.chars.clone().next_back().unwrap_or(EOF_CHAR)
-    }
-
-    // SAFETY: THe `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
-    #[allow(clippy::cast_possible_truncation)]
-    fn text_len(&self) -> TextSize {
-        TextSize::new(self.chars.as_str().len() as u32)
-    }
-
-    fn token_len(&self) -> TextSize {
-        self.source_length - self.text_len()
-    }
-
-    fn start_token(&mut self) {
-        self.source_length = self.text_len();
-    }
-
-    /// Returns `true` if the file is at the end of the file.
-    fn is_eof(&self) -> bool {
-        self.chars.as_str().is_empty()
-    }
-
-    /// Consumes the next character
-    fn bump(&mut self) -> Option<char> {
-        self.chars.next()
-    }
-
-    /// Consumes the next character from the back
-    fn bump_back(&mut self) -> Option<char> {
-        self.chars.next_back()
-    }
-
-    fn eat_char(&mut self, c: char) -> bool {
-        if self.first() == c {
-            self.bump();
-            true
-        } else {
-            false
-        }
-    }
-
-    fn eat_char_back(&mut self, c: char) -> bool {
-        if self.last() == c {
-            self.bump_back();
-            true
-        } else {
-            false
-        }
-    }
-
-    /// Eats symbols while predicate returns true or until the end of file is reached.
-    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
-        // It was tried making optimized version of this for eg. line comments, but
-        // LLVM can inline all of this and compile it down to fast iteration over bytes.
-        while predicate(self.first()) && !self.is_eof() {
-            self.bump();
-        }
-    }
-
-    /// Eats symbols from the back while predicate returns true or until the beginning of file is reached.
-    fn eat_back_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
-        // It was tried making optimized version of this for eg. line comments, but
-        // LLVM can inline all of this and compile it down to fast iteration over bytes.
-        while predicate(self.last()) && !self.is_eof() {
-            self.bump_back();
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use insta::assert_debug_snapshot;