Remove parser dependency from ruff-python-ast (#6096)

2025-09-28 04:45:01 +00:00 · 2023-07-26 17:47:22 +02:00 · 2023-07-26 17:47:22 +02:00 · 2cf00fee96
commit 2cf00fee96
parent 99127243f4
658 changed files with 1714 additions and 1546 deletions
--- a/crates/ruff_python_ast/src/source_code/indexer.rs
+++ b/crates/ruff_python_ast/src/source_code/indexer.rs
@ -1,296 +0,0 @@
-//! Struct used to index source code, to enable efficient lookup of tokens that
-//! are omitted from the AST (e.g., commented lines).
-
-use ruff_text_size::{TextRange, TextSize};
-use rustpython_parser::lexer::LexResult;
-use rustpython_parser::{StringKind, Tok};
-
-use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
-use crate::source_code::Locator;
-
-pub struct Indexer {
-    comment_ranges: CommentRanges,
-
-    /// Stores the start offset of continuation lines.
-    continuation_lines: Vec<TextSize>,
-
-    /// The range of all triple quoted strings in the source document. The ranges are sorted by their
-    /// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
-    triple_quoted_string_ranges: Vec<TextRange>,
-
-    /// The range of all f-string in the source document. The ranges are sorted by their
-    /// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
-    f_string_ranges: Vec<TextRange>,
-}
-
-impl Indexer {
-    pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
-        assert!(TextSize::try_from(locator.contents().len()).is_ok());
-
-        let mut comment_ranges_builder = CommentRangesBuilder::default();
-        let mut continuation_lines = Vec::new();
-        let mut triple_quoted_string_ranges = Vec::new();
-        let mut f_string_ranges = Vec::new();
-        // Token, end
-        let mut prev_end = TextSize::default();
-        let mut prev_token: Option<&Tok> = None;
-        let mut line_start = TextSize::default();
-
-        for (tok, range) in tokens.iter().flatten() {
-            let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
-
-            // Get the trivia between the previous and the current token and detect any newlines.
-            // This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
-            // between any two tokens that form a continuation. That's why we have to extract the
-            // newlines "manually".
-            for (index, text) in trivia.match_indices(['\n', '\r']) {
-                if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
-                    continue;
-                }
-
-                // Newlines after a newline never form a continuation.
-                if !matches!(prev_token, Some(Tok::Newline | Tok::NonLogicalNewline)) {
-                    continuation_lines.push(line_start);
-                }
-
-                // SAFETY: Safe because of the len assertion at the top of the function.
-                #[allow(clippy::cast_possible_truncation)]
-                {
-                    line_start = prev_end + TextSize::new((index + 1) as u32);
-                }
-            }
-
-            comment_ranges_builder.visit_token(tok, *range);
-
-            match tok {
-                Tok::Newline | Tok::NonLogicalNewline => {
-                    line_start = range.end();
-                }
-                Tok::String {
-                    triple_quoted: true,
-                    ..
-                } => {
-                    triple_quoted_string_ranges.push(*range);
-                }
-                Tok::String {
-                    kind: StringKind::FString | StringKind::RawFString,
-                    ..
-                } => {
-                    f_string_ranges.push(*range);
-                }
-                _ => {}
-            }
-
-            prev_token = Some(tok);
-            prev_end = range.end();
-        }
-        Self {
-            comment_ranges: comment_ranges_builder.finish(),
-            continuation_lines,
-            triple_quoted_string_ranges,
-            f_string_ranges,
-        }
-    }
-
-    /// Returns the byte offset ranges of comments
-    pub const fn comment_ranges(&self) -> &CommentRanges {
-        &self.comment_ranges
-    }
-
-    /// Returns the comments in the given range as source code slices
-    pub fn comments_in_range<'a>(
-        &'a self,
-        range: TextRange,
-        locator: &'a Locator,
-    ) -> impl Iterator<Item = &'a str> {
-        self.comment_ranges
-            .comments_in_range(range)
-            .iter()
-            .map(move |comment_range| locator.slice(*comment_range))
-    }
-
-    /// Returns the line start positions of continuations (backslash).
-    pub fn continuation_line_starts(&self) -> &[TextSize] {
-        &self.continuation_lines
-    }
-
-    /// Returns `true` if the given offset is part of a continuation line.
-    pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
-        let line_start = locator.line_start(offset);
-        self.continuation_lines.binary_search(&line_start).is_ok()
-    }
-
-    /// Return the [`TextRange`] of the triple-quoted-string containing a given offset.
-    pub fn triple_quoted_string_range(&self, offset: TextSize) -> Option<TextRange> {
-        let Ok(string_range_index) = self.triple_quoted_string_ranges.binary_search_by(|range| {
-            if offset < range.start() {
-                std::cmp::Ordering::Greater
-            } else if range.contains(offset) {
-                std::cmp::Ordering::Equal
-            } else {
-                std::cmp::Ordering::Less
-            }
-        }) else {
-            return None;
-        };
-        Some(self.triple_quoted_string_ranges[string_range_index])
-    }
-
-    /// Return the [`TextRange`] of the f-string containing a given offset.
-    pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
-        let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
-            if offset < range.start() {
-                std::cmp::Ordering::Greater
-            } else if range.contains(offset) {
-                std::cmp::Ordering::Equal
-            } else {
-                std::cmp::Ordering::Less
-            }
-        }) else {
-            return None;
-        };
-        Some(self.f_string_ranges[string_range_index])
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use ruff_text_size::{TextRange, TextSize};
-    use rustpython_parser::lexer::LexResult;
-    use rustpython_parser::{lexer, Mode};
-
-    use crate::source_code::{Indexer, Locator};
-
-    #[test]
-    fn continuation() {
-        let contents = r#"x = 1"#;
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
-        assert_eq!(indexer.continuation_line_starts(), &[]);
-
-        let contents = r#"
-        # Hello, world!
-
-x = 1
-
-y = 2
-        "#
-        .trim();
-
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
-        assert_eq!(indexer.continuation_line_starts(), &[]);
-
-        let contents = r#"
-x = \
-    1
-
-if True:
-    z = \
-        \
-        2
-
-(
-    "abc" # Foo
-    "def" \
-    "ghi"
-)
-"#
-        .trim();
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(
-            indexer.continuation_line_starts(),
-            [
-                // row 1
-                TextSize::from(0),
-                // row 5
-                TextSize::from(22),
-                // row 6
-                TextSize::from(32),
-                // row 11
-                TextSize::from(71),
-            ]
-        );
-
-        let contents = r"
-x = 1; import sys
-import os
-
-if True:
-    x = 1; import sys
-    import os
-
-if True:
-    x = 1; \
-        import os
-
-x = 1; \
-import os
-"
-        .trim();
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(
-            indexer.continuation_line_starts(),
-            [
-                // row 9
-                TextSize::from(84),
-                // row 12
-                TextSize::from(116)
-            ]
-        );
-    }
-
-    #[test]
-    fn string_ranges() {
-        let contents = r#""this is a single-quoted string""#;
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(indexer.triple_quoted_string_ranges, []);
-
-        let contents = r#"
-            """
-            this is a multiline string
-            """
-            "#;
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(
-            indexer.triple_quoted_string_ranges,
-            [TextRange::new(TextSize::from(13), TextSize::from(71))]
-        );
-
-        let contents = r#"
-            """
-            '''this is a multiline string with multiple delimiter types'''
-            """
-            "#;
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(
-            indexer.triple_quoted_string_ranges,
-            [TextRange::new(TextSize::from(13), TextSize::from(107))]
-        );
-
-        let contents = r#"
-            """
-            this is one
-            multiline string
-            """
-            """
-            and this is
-            another
-            """
-            "#;
-        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
-        let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
-        assert_eq!(
-            indexer.triple_quoted_string_ranges,
-            &[
-                TextRange::new(TextSize::from(13), TextSize::from(85)),
-                TextRange::new(TextSize::from(98), TextSize::from(161))
-            ]
-        );
-    }
-}