Generic "comment to node" association logic (#4642)

This commit is contained in:
Micha Reiser 2023-05-30 11:28:01 +02:00 committed by GitHub
parent 84a5584888
commit 0cd453bdf0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 1574 additions and 65 deletions

View file

@ -0,0 +1,50 @@
use ruff_text_size::TextRange;
use rustpython_parser::Tok;
use std::fmt::{Debug, Formatter};
use std::ops::Deref;
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone)]
pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl Deref for CommentRanges {
type Target = [TextRange];
fn deref(&self) -> &Self::Target {
self.raw.as_slice()
}
}
impl Debug for CommentRanges {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("CommentRanges").field(&self.raw).finish()
}
}
impl<'a> IntoIterator for &'a CommentRanges {
type Item = &'a TextRange;
type IntoIter = std::slice::Iter<'a, TextRange>;
fn into_iter(self) -> Self::IntoIter {
self.raw.iter()
}
}
#[derive(Debug, Clone, Default)]
pub struct CommentRangesBuilder {
ranges: Vec<TextRange>,
}
impl CommentRangesBuilder {
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
if token.is_comment() {
self.ranges.push(range);
}
}
pub fn finish(self) -> CommentRanges {
CommentRanges { raw: self.ranges }
}
}

View file

@ -1,6 +1,7 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use crate::source_code::comment_ranges::{CommentRanges, CommentRangesBuilder};
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{StringKind, Tok};
@ -8,8 +9,7 @@ use rustpython_parser::{StringKind, Tok};
use crate::source_code::Locator;
pub struct Indexer {
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
comment_ranges: Vec<TextRange>,
comment_ranges: CommentRanges,
/// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>,
@ -27,7 +27,7 @@ impl Indexer {
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
assert!(TextSize::try_from(locator.contents().len()).is_ok());
let mut comment_ranges = Vec::new();
let mut comment_ranges_builder = CommentRangesBuilder::default();
let mut continuation_lines = Vec::new();
let mut triple_quoted_string_ranges = Vec::new();
let mut f_string_ranges = Vec::new();
@ -63,10 +63,9 @@ impl Indexer {
}
}
comment_ranges_builder.visit_token(tok, *range);
match tok {
Tok::Comment(..) => {
comment_ranges.push(*range);
}
Tok::Newline | Tok::NonLogicalNewline => {
line_start = range.end();
}
@ -89,7 +88,7 @@ impl Indexer {
prev_end = range.end();
}
Self {
comment_ranges,
comment_ranges: comment_ranges_builder.finish(),
continuation_lines,
triple_quoted_string_ranges,
f_string_ranges,
@ -97,7 +96,7 @@ impl Indexer {
}
/// Returns the byte offset ranges of comments
pub fn comment_ranges(&self) -> &[TextRange] {
pub fn comment_ranges(&self) -> &CommentRanges {
&self.comment_ranges
}

View file

@ -14,7 +14,9 @@ pub use locator::Locator;
pub use stylist::{Quote, Stylist};
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
mod comment_ranges;
mod generator;
mod indexer;
mod line_index;

View file

@ -38,3 +38,14 @@ pub fn clean(indentation: &str) -> String {
.map(|char| if char.is_whitespace() { char } else { ' ' })
.collect()
}
/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens)
/// or new-line characters.
pub const fn is_python_whitespace(c: char) -> bool {
matches!(
c,
' ' | '\n' | '\t' | '\r' |
// Form-feed
'\x0C'
)
}