mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Remove parser dependency from ruff-python-ast (#6096)
This commit is contained in:
parent
99127243f4
commit
2cf00fee96
658 changed files with 1714 additions and 1546 deletions
84
crates/ruff_python_index/src/comment_ranges.rs
Normal file
84
crates/ruff_python_index/src/comment_ranges.rs
Normal file
|
@ -0,0 +1,84 @@
|
|||
use itertools::Itertools;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::ops::Deref;
|
||||
|
||||
use ruff_text_size::TextRange;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
|
||||
#[derive(Clone)]
|
||||
pub struct CommentRanges {
|
||||
raw: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRanges {
|
||||
/// Returns `true` if the given range includes a comment.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
.binary_search_by(|range| {
|
||||
if target.contains_range(*range) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else if range.end() < target.start() {
|
||||
std::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Returns the comments who are within the range
|
||||
pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
|
||||
let start = self
|
||||
.raw
|
||||
.partition_point(|comment| comment.start() < range.start());
|
||||
// We expect there are few comments, so switching to find should be faster
|
||||
match self.raw[start..]
|
||||
.iter()
|
||||
.find_position(|comment| comment.end() > range.end())
|
||||
{
|
||||
Some((in_range, _element)) => &self.raw[start..start + in_range],
|
||||
None => &self.raw[start..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CommentRanges {
|
||||
type Target = [TextRange];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.raw.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for CommentRanges {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("CommentRanges").field(&self.raw).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a CommentRanges {
|
||||
type IntoIter = std::slice::Iter<'a, TextRange>;
|
||||
type Item = &'a TextRange;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.raw.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CommentRangesBuilder {
|
||||
ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl CommentRangesBuilder {
|
||||
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
|
||||
if token.is_comment() {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(self) -> CommentRanges {
|
||||
CommentRanges { raw: self.ranges }
|
||||
}
|
||||
}
|
406
crates/ruff_python_index/src/indexer.rs
Normal file
406
crates/ruff_python_index/src/indexer.rs
Normal file
|
@ -0,0 +1,406 @@
|
|||
//! Struct used to index source code, to enable efficient lookup of tokens that
|
||||
//! are omitted from the AST (e.g., commented lines).
|
||||
|
||||
use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_ast::{Ranged, Stmt};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{StringKind, Tok};
|
||||
|
||||
use super::comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
use ruff_source_file::Locator;
|
||||
|
||||
pub struct Indexer {
|
||||
comment_ranges: CommentRanges,
|
||||
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
triple_quoted_string_ranges: Vec<TextRange>,
|
||||
|
||||
/// The range of all f-string in the source document. The ranges are sorted by their
|
||||
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
|
||||
f_string_ranges: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
|
||||
assert!(TextSize::try_from(locator.contents().len()).is_ok());
|
||||
|
||||
let mut comment_ranges_builder = CommentRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut triple_quoted_string_ranges = Vec::new();
|
||||
let mut f_string_ranges = Vec::new();
|
||||
// Token, end
|
||||
let mut prev_end = TextSize::default();
|
||||
let mut prev_token: Option<&Tok> = None;
|
||||
let mut line_start = TextSize::default();
|
||||
|
||||
for (tok, range) in tokens.iter().flatten() {
|
||||
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
|
||||
|
||||
// Get the trivia between the previous and the current token and detect any newlines.
|
||||
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
|
||||
// between any two tokens that form a continuation. That's why we have to extract the
|
||||
// newlines "manually".
|
||||
for (index, text) in trivia.match_indices(['\n', '\r']) {
|
||||
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Newlines after a newline never form a continuation.
|
||||
if !matches!(prev_token, Some(Tok::Newline | Tok::NonLogicalNewline)) {
|
||||
continuation_lines.push(line_start);
|
||||
}
|
||||
|
||||
// SAFETY: Safe because of the len assertion at the top of the function.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
{
|
||||
line_start = prev_end + TextSize::new((index + 1) as u32);
|
||||
}
|
||||
}
|
||||
|
||||
comment_ranges_builder.visit_token(tok, *range);
|
||||
|
||||
match tok {
|
||||
Tok::Newline | Tok::NonLogicalNewline => {
|
||||
line_start = range.end();
|
||||
}
|
||||
Tok::String {
|
||||
triple_quoted: true,
|
||||
..
|
||||
} => {
|
||||
triple_quoted_string_ranges.push(*range);
|
||||
}
|
||||
Tok::String {
|
||||
kind: StringKind::FString | StringKind::RawFString,
|
||||
..
|
||||
} => {
|
||||
f_string_ranges.push(*range);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
prev_token = Some(tok);
|
||||
prev_end = range.end();
|
||||
}
|
||||
Self {
|
||||
comment_ranges: comment_ranges_builder.finish(),
|
||||
continuation_lines,
|
||||
triple_quoted_string_ranges,
|
||||
f_string_ranges,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of comments
|
||||
pub const fn comment_ranges(&self) -> &CommentRanges {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the comments in the given range as source code slices
|
||||
pub fn comments_in_range<'a>(
|
||||
&'a self,
|
||||
range: TextRange,
|
||||
locator: &'a Locator,
|
||||
) -> impl Iterator<Item = &'a str> {
|
||||
self.comment_ranges
|
||||
.comments_in_range(range)
|
||||
.iter()
|
||||
.map(move |comment_range| locator.slice(*comment_range))
|
||||
}
|
||||
|
||||
/// Returns the line start positions of continuations (backslash).
|
||||
pub fn continuation_line_starts(&self) -> &[TextSize] {
|
||||
&self.continuation_lines
|
||||
}
|
||||
|
||||
/// Returns `true` if the given offset is part of a continuation line.
|
||||
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
|
||||
let line_start = locator.line_start(offset);
|
||||
self.continuation_lines.binary_search(&line_start).is_ok()
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the triple-quoted-string containing a given offset.
|
||||
pub fn triple_quoted_string_range(&self, offset: TextSize) -> Option<TextRange> {
|
||||
let Ok(string_range_index) = self.triple_quoted_string_ranges.binary_search_by(|range| {
|
||||
if offset < range.start() {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if range.contains(offset) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
Some(self.triple_quoted_string_ranges[string_range_index])
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the f-string containing a given offset.
|
||||
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
|
||||
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
|
||||
if offset < range.start() {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if range.contains(offset) {
|
||||
std::cmp::Ordering::Equal
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
Some(self.f_string_ranges[string_range_index])
|
||||
}
|
||||
|
||||
/// Returns `true` if a statement or expression includes at least one comment.
|
||||
pub fn has_comments<T>(&self, node: &T, locator: &Locator) -> bool
|
||||
where
|
||||
T: Ranged,
|
||||
{
|
||||
let start = if has_leading_content(node.start(), locator) {
|
||||
node.start()
|
||||
} else {
|
||||
locator.line_start(node.start())
|
||||
};
|
||||
let end = if has_trailing_content(node.end(), locator) {
|
||||
node.end()
|
||||
} else {
|
||||
locator.line_end(node.end())
|
||||
};
|
||||
|
||||
self.comment_ranges().intersects(TextRange::new(start, end))
|
||||
}
|
||||
|
||||
/// Given an offset at the end of a line (including newlines), return the offset of the
|
||||
/// continuation at the end of that line.
|
||||
fn find_continuation(&self, offset: TextSize, locator: &Locator) -> Option<TextSize> {
|
||||
let newline_pos = usize::from(offset).saturating_sub(1);
|
||||
|
||||
// Skip the newline.
|
||||
let newline_len = match locator.contents().as_bytes()[newline_pos] {
|
||||
b'\n' => {
|
||||
if locator
|
||||
.contents()
|
||||
.as_bytes()
|
||||
.get(newline_pos.saturating_sub(1))
|
||||
== Some(&b'\r')
|
||||
{
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
b'\r' => 1,
|
||||
// No preceding line.
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
self.is_continuation(offset - TextSize::from(newline_len), locator)
|
||||
.then(|| offset - TextSize::from(newline_len) - TextSize::from(1))
|
||||
}
|
||||
|
||||
/// If the node starting at the given [`TextSize`] is preceded by at least one continuation line
|
||||
/// (i.e., a line ending in a backslash), return the starting offset of the first such continuation
|
||||
/// character.
|
||||
///
|
||||
/// For example, given:
|
||||
/// ```python
|
||||
/// x = 1; \
|
||||
/// y = 2
|
||||
/// ```
|
||||
///
|
||||
/// When passed the offset of `y`, this function will return the offset of the backslash at the end
|
||||
/// of the first line.
|
||||
///
|
||||
/// Similarly, given:
|
||||
/// ```python
|
||||
/// x = 1; \
|
||||
/// \
|
||||
/// y = 2;
|
||||
/// ```
|
||||
///
|
||||
/// When passed the offset of `y`, this function will again return the offset of the backslash at
|
||||
/// the end of the first line.
|
||||
pub fn preceded_by_continuations(
|
||||
&self,
|
||||
offset: TextSize,
|
||||
locator: &Locator,
|
||||
) -> Option<TextSize> {
|
||||
// Find the first preceding continuation.
|
||||
let mut continuation = self.find_continuation(locator.line_start(offset), locator)?;
|
||||
|
||||
// Continue searching for continuations, in the unlikely event that we have multiple
|
||||
// continuations in a row.
|
||||
loop {
|
||||
let previous_line_end = locator.line_start(continuation);
|
||||
if locator
|
||||
.slice(TextRange::new(previous_line_end, continuation))
|
||||
.chars()
|
||||
.all(is_python_whitespace)
|
||||
{
|
||||
if let Some(next_continuation) = self.find_continuation(previous_line_end, locator)
|
||||
{
|
||||
continuation = next_continuation;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
Some(continuation)
|
||||
}
|
||||
|
||||
/// Return `true` if a `Stmt` appears to be part of a multi-statement line, with
|
||||
/// other statements preceding it.
|
||||
pub fn preceded_by_multi_statement_line(&self, stmt: &Stmt, locator: &Locator) -> bool {
|
||||
has_leading_content(stmt.start(), locator)
|
||||
|| self
|
||||
.preceded_by_continuations(stmt.start(), locator)
|
||||
.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{lexer, Mode};
|
||||
|
||||
use crate::Indexer;
|
||||
use ruff_source_file::Locator;
|
||||
|
||||
#[test]
|
||||
fn continuation() {
|
||||
let contents = r#"x = 1"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
# Hello, world!
|
||||
|
||||
x = 1
|
||||
|
||||
y = 2
|
||||
"#
|
||||
.trim();
|
||||
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
|
||||
assert_eq!(indexer.continuation_line_starts(), &[]);
|
||||
|
||||
let contents = r#"
|
||||
x = \
|
||||
1
|
||||
|
||||
if True:
|
||||
z = \
|
||||
\
|
||||
2
|
||||
|
||||
(
|
||||
"abc" # Foo
|
||||
"def" \
|
||||
"ghi"
|
||||
)
|
||||
"#
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 1
|
||||
TextSize::from(0),
|
||||
// row 5
|
||||
TextSize::from(22),
|
||||
// row 6
|
||||
TextSize::from(32),
|
||||
// row 11
|
||||
TextSize::from(71),
|
||||
]
|
||||
);
|
||||
|
||||
let contents = r"
|
||||
x = 1; import sys
|
||||
import os
|
||||
|
||||
if True:
|
||||
x = 1; import sys
|
||||
import os
|
||||
|
||||
if True:
|
||||
x = 1; \
|
||||
import os
|
||||
|
||||
x = 1; \
|
||||
import os
|
||||
"
|
||||
.trim();
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.continuation_line_starts(),
|
||||
[
|
||||
// row 9
|
||||
TextSize::from(84),
|
||||
// row 12
|
||||
TextSize::from(116)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_ranges() {
|
||||
let contents = r#""this is a single-quoted string""#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(indexer.triple_quoted_string_ranges, []);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
this is a multiline string
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(71))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
'''this is a multiline string with multiple delimiter types'''
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
[TextRange::new(TextSize::from(13), TextSize::from(107))]
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
"""
|
||||
this is one
|
||||
multiline string
|
||||
"""
|
||||
"""
|
||||
and this is
|
||||
another
|
||||
"""
|
||||
"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
|
||||
assert_eq!(
|
||||
indexer.triple_quoted_string_ranges,
|
||||
&[
|
||||
TextRange::new(TextSize::from(13), TextSize::from(85)),
|
||||
TextRange::new(TextSize::from(98), TextSize::from(161))
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
5
crates/ruff_python_index/src/lib.rs
Normal file
5
crates/ruff_python_index/src/lib.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
mod comment_ranges;
|
||||
mod indexer;
|
||||
|
||||
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
|
||||
pub use indexer::Indexer;
|
Loading…
Add table
Add a link
Reference in a new issue