ruff_python_parser: add Tokens::before method

This is analogous to the existing `Tokens::after` method. Its implementation is almost identical. We plan to use this for looking at the tokens immediately before the cursor when fetching completions.
2025-07-09 22:25:09 +00:00 · 2025-05-21 12:53:18 -04:00 · 2025-05-21 12:53:18 -04:00 · a827b16ebd
commit a827b16ebd
parent 47a2ec002e
1 changed files with 99 additions and 1 deletions
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -637,6 +637,41 @@ impl Tokens {
        }
    }

+    /// Returns a slice of tokens before the given [`TextSize`] offset.
+    ///
+    /// If the given offset is between two tokens, the returned slice will end just before the
+    /// following token. In other words, if the offset is between the end of previous token and
+    /// start of next token, the returned slice will end just before the next token.
+    ///
+    /// # Panics
+    ///
+    /// If the given offset is inside a token range at any point
+    /// other than the start of the range.
+    pub fn before(&self, offset: TextSize) -> &[Token] {
+        match self.binary_search_by(|token| token.start().cmp(&offset)) {
+            Ok(idx) => &self[..idx],
+            Err(idx) => {
+                // We can't use `saturating_sub` here because a file could contain a BOM header, in
+                // which case the token starts at offset 3 for UTF-8 encoded file content.
+                if idx > 0 {
+                    if let Some(prev) = self.get(idx - 1) {
+                        // If it's equal to the end offset, then it's at a token boundary which is
+                        // valid. If it's greater than the end offset, then it's in the gap between
+                        // the tokens which is valid as well.
+                        assert!(
+                            offset >= prev.end(),
+                            "Offset {:?} is inside a token range {:?}",
+                            offset,
+                            prev.range()
+                        );
+                    }
+                }
+
+                &self[..idx]
+            }
+        }
+    }
+
    /// Returns a slice of tokens after the given [`TextSize`] offset.
    ///
    /// If the given offset is between two tokens, the returned slice will start from the following
@ -645,7 +680,8 @@ impl Tokens {
    ///
    /// # Panics
    ///
-    /// If the given offset is inside a token range.
+    /// If the given offset is inside a token range at any point
+    /// other than the start of the range.
    pub fn after(&self, offset: TextSize) -> &[Token] {
        match self.binary_search_by(|token| token.start().cmp(&offset)) {
            Ok(idx) => &self[idx..],
@ -947,6 +983,68 @@ mod tests {
        tokens.after(TextSize::new(5));
    }

+    #[test]
+    fn tokens_before_offset_at_first_token_start() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(0));
+        assert_eq!(before.len(), 0);
+    }
+
+    #[test]
+    fn tokens_before_offset_after_first_token_gap() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(3));
+        assert_eq!(before.len(), 1);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
+    }
+
+    #[test]
+    fn tokens_before_offset_at_second_token_start() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(4));
+        assert_eq!(before.len(), 1);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Def);
+    }
+
+    #[test]
+    fn tokens_before_offset_at_token_start() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(8));
+        assert_eq!(before.len(), 3);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar);
+    }
+
+    #[test]
+    fn tokens_before_offset_at_token_end() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(11));
+        assert_eq!(before.len(), 6);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
+    }
+
+    #[test]
+    fn tokens_before_offset_between_tokens() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(13));
+        assert_eq!(before.len(), 6);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Newline);
+    }
+
+    #[test]
+    fn tokens_before_offset_at_last_token_end() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        let before = tokens.before(TextSize::new(33));
+        assert_eq!(before.len(), 10);
+        assert_eq!(before.last().unwrap().kind(), TokenKind::Pass);
+    }
+
+    #[test]
+    #[should_panic(expected = "Offset 5 is inside a token range 4..7")]
+    fn tokens_before_offset_inside_token() {
+        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
+        tokens.before(TextSize::new(5));
+    }
+
    #[test]
    fn tokens_in_range_at_token_offset() {
        let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());