mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-30 07:07:42 +00:00
Generic "comment to node" association logic (#4642)
This commit is contained in:
parent
84a5584888
commit
0cd453bdf0
29 changed files with 1574 additions and 65 deletions
50
crates/ruff_python_ast/src/source_code/comment_ranges.rs
Normal file
50
crates/ruff_python_ast/src/source_code/comment_ranges.rs
Normal file
|
@ -0,0 +1,50 @@
|
|||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::Tok;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
#[derive(Clone)]
|
||||
pub struct CommentRanges {
|
||||
raw: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl Deref for CommentRanges {
|
||||
type Target = [TextRange];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.raw.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for CommentRanges {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("CommentRanges").field(&self.raw).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a CommentRanges {
|
||||
type Item = &'a TextRange;
|
||||
type IntoIter = std::slice::Iter<'a, TextRange>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.raw.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CommentRangesBuilder {
|
||||
ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRangesBuilder {
|
||||
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
|
||||
if token.is_comment() {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(self) -> CommentRanges {
|
||||
CommentRanges { raw: self.ranges }
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
//! Struct used to index source code, to enable efficient lookup of tokens that
|
||||
//! are omitted from the AST (e.g., commented lines).
|
||||
|
||||
use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{StringKind, Tok};
|
||||
|
@ -8,8 +9,7 @@ use rustpython_parser::{StringKind, Tok};
|
|||
use crate::source_code::Locator;
|
||||
|
||||
pub struct Indexer {
|
||||
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
comment_ranges: Vec<TextRange>,
|
||||
comment_ranges: CommentRanges,
|
||||
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
@ -27,7 +27,7 @@ impl Indexer {
|
|||
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
|
||||
assert!(TextSize::try_from(locator.contents().len()).is_ok());
|
||||
|
||||
let mut comment_ranges = Vec::new();
|
||||
let mut comment_ranges_builder = CommentRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut triple_quoted_string_ranges = Vec::new();
|
||||
let mut f_string_ranges = Vec::new();
|
||||
|
@ -63,10 +63,9 @@ impl Indexer {
|
|||
}
|
||||
}
|
||||
|
||||
comment_ranges_builder.visit_token(tok, *range);
|
||||
|
||||
match tok {
|
||||
Tok::Comment(..) => {
|
||||
comment_ranges.push(*range);
|
||||
}
|
||||
Tok::Newline | Tok::NonLogicalNewline => {
|
||||
line_start = range.end();
|
||||
}
|
||||
|
@ -89,7 +88,7 @@ impl Indexer {
|
|||
prev_end = range.end();
|
||||
}
|
||||
Self {
|
||||
comment_ranges,
|
||||
comment_ranges: comment_ranges_builder.finish(),
|
||||
continuation_lines,
|
||||
triple_quoted_string_ranges,
|
||||
f_string_ranges,
|
||||
|
@ -97,7 +96,7 @@ impl Indexer {
|
|||
}
|
||||
|
||||
/// Returns the byte offset ranges of comments
|
||||
pub fn comment_ranges(&self) -> &[TextRange] {
|
||||
pub fn comment_ranges(&self) -> &CommentRanges {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,9 @@ pub use locator::Locator;
|
|||
pub use stylist::{Quote, Stylist};
|
||||
|
||||
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
|
||||
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
|
||||
mod comment_ranges;
|
||||
mod generator;
|
||||
mod indexer;
|
||||
mod line_index;
|
||||
|
|
|
@ -38,3 +38,14 @@ pub fn clean(indentation: &str) -> String {
|
|||
.map(|char| if char.is_whitespace() { char } else { ' ' })
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens)
|
||||
/// or new-line characters.
|
||||
pub const fn is_python_whitespace(c: char) -> bool {
|
||||
matches!(
|
||||
c,
|
||||
' ' | '\n' | '\t' | '\r' |
|
||||
// Form-feed
|
||||
'\x0C'
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue