Generic "comment to node" association logic (#4642)

2025-11-01 20:30:49 +00:00 · 2023-05-30 11:28:01 +02:00 · 2023-05-30 11:28:01 +02:00 · 0cd453bdf0
commit 0cd453bdf0
parent 84a5584888
29 changed files with 1574 additions and 65 deletions
--- a/crates/ruff_python_ast/src/source_code/comment_ranges.rs
+++ b/crates/ruff_python_ast/src/source_code/comment_ranges.rs
@ -0,0 +1,50 @@
+use ruff_text_size::TextRange;
+use rustpython_parser::Tok;
+use std::fmt::{Debug, Formatter};
+use std::ops::Deref;
+
+/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
+#[derive(Clone)]
+pub struct CommentRanges {
+    raw: Vec<TextRange>,
+}
+
+impl Deref for CommentRanges {
+    type Target = [TextRange];
+
+    fn deref(&self) -> &Self::Target {
+        self.raw.as_slice()
+    }
+}
+
+impl Debug for CommentRanges {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_tuple("CommentRanges").field(&self.raw).finish()
+    }
+}
+
+impl<'a> IntoIterator for &'a CommentRanges {
+    type Item = &'a TextRange;
+    type IntoIter = std::slice::Iter<'a, TextRange>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.raw.iter()
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct CommentRangesBuilder {
+    ranges: Vec<TextRange>,
+}
+
+impl CommentRangesBuilder {
+    pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
+        if token.is_comment() {
+            self.ranges.push(range);
+        }
+    }
+
+    pub fn finish(self) -> CommentRanges {
+        CommentRanges { raw: self.ranges }
+    }
+}
--- a/crates/ruff_python_ast/src/source_code/indexer.rs
+++ b/crates/ruff_python_ast/src/source_code/indexer.rs
@ -1,6 +1,7 @@
 //! Struct used to index source code, to enable efficient lookup of tokens that
 //! are omitted from the AST (e.g., commented lines).

+use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
 use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::lexer::LexResult;
 use rustpython_parser::{StringKind, Tok};
@ -8,8 +9,7 @@ use rustpython_parser::{StringKind, Tok};
 use crate::source_code::Locator;

 pub struct Indexer {
-    /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
-    comment_ranges: Vec<TextRange>,
+    comment_ranges: CommentRanges,

    /// Stores the start offset of continuation lines.
    continuation_lines: Vec<TextSize>,
@ -27,7 +27,7 @@ impl Indexer {
    pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
        assert!(TextSize::try_from(locator.contents().len()).is_ok());

-        let mut comment_ranges = Vec::new();
+        let mut comment_ranges_builder = CommentRangesBuilder::default();
        let mut continuation_lines = Vec::new();
        let mut triple_quoted_string_ranges = Vec::new();
        let mut f_string_ranges = Vec::new();
@ -63,10 +63,9 @@ impl Indexer {
                }
            }

+            comment_ranges_builder.visit_token(tok, *range);
+
            match tok {
-                Tok::Comment(..) => {
-                    comment_ranges.push(*range);
-                }
                Tok::Newline | Tok::NonLogicalNewline => {
                    line_start = range.end();
                }
@ -89,7 +88,7 @@ impl Indexer {
            prev_end = range.end();
        }
        Self {
-            comment_ranges,
+            comment_ranges: comment_ranges_builder.finish(),
            continuation_lines,
            triple_quoted_string_ranges,
            f_string_ranges,
@ -97,7 +96,7 @@ impl Indexer {
    }

    /// Returns the byte offset ranges of comments
-    pub fn comment_ranges(&self) -> &[TextRange] {
+    pub fn comment_ranges(&self) -> &CommentRanges {
        &self.comment_ranges
    }

--- a/crates/ruff_python_ast/src/source_code/mod.rs
+++ b/crates/ruff_python_ast/src/source_code/mod.rs
@ -14,7 +14,9 @@ pub use locator::Locator;
 pub use stylist::{Quote, Stylist};

 pub use crate::source_code::line_index::{LineIndex, OneIndexed};
+pub use comment_ranges::{CommentRanges, CommentRangesBuilder};

+mod comment_ranges;
 mod generator;
 mod indexer;
 mod line_index;
--- a/crates/ruff_python_ast/src/whitespace.rs
+++ b/crates/ruff_python_ast/src/whitespace.rs
@ -38,3 +38,14 @@ pub fn clean(indentation: &str) -> String {
        .map(|char| if char.is_whitespace() { char } else { ' ' })
        .collect()
 }
+
+/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens)
+/// or new-line characters.
+pub const fn is_python_whitespace(c: char) -> bool {
+    matches!(
+        c,
+        ' ' | '\n' | '\t' | '\r' |
+        // Form-feed
+        '\x0C'
+    )
+}