Use CommentRanges in backwards lexing (#7360)

## Summary

The tokenizer was split into a forward and a backwards tokenizer. The
backwards tokenizer uses the same names as the forwards ones (e.g.
`next_token`). The backwards tokenizer gets the comment ranges that we
already built to skip comments.

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
konsti 2023-09-16 05:21:45 +02:00 committed by GitHub
parent 1f6e1485f9
commit 2cbe1733c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 744 additions and 628 deletions

View file

@ -1,70 +1,8 @@
use itertools::Itertools;
use std::fmt::{Debug, Formatter};
use std::ops::Deref;
use std::fmt::Debug;
use ruff_python_parser::Tok;
use ruff_text_size::{Ranged, TextRange};
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone)]
pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl CommentRanges {
/// Returns `true` if the given range includes a comment.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
/// Returns the comments who are within the range
pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
let start = self
.raw
.partition_point(|comment| comment.start() < range.start());
// We expect there are few comments, so switching to find should be faster
match self.raw[start..]
.iter()
.find_position(|comment| comment.end() > range.end())
{
Some((in_range, _element)) => &self.raw[start..start + in_range],
None => &self.raw[start..],
}
}
}
impl Deref for CommentRanges {
type Target = [TextRange];
fn deref(&self) -> &Self::Target {
self.raw.as_slice()
}
}
impl Debug for CommentRanges {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("CommentRanges").field(&self.raw).finish()
}
}
impl<'a> IntoIterator for &'a CommentRanges {
type IntoIter = std::slice::Iter<'a, TextRange>;
type Item = &'a TextRange;
fn into_iter(self) -> Self::IntoIter {
self.raw.iter()
}
}
use ruff_python_trivia::CommentRanges;
use ruff_text_size::TextRange;
#[derive(Debug, Clone, Default)]
pub struct CommentRangesBuilder {
@ -79,6 +17,6 @@ impl CommentRangesBuilder {
}
pub fn finish(self) -> CommentRanges {
CommentRanges { raw: self.ranges }
CommentRanges::new(self.ranges)
}
}

View file

@ -1,15 +1,16 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use crate::CommentRangesBuilder;
use ruff_python_ast::Stmt;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{StringKind, Tok};
use ruff_python_trivia::{has_leading_content, has_trailing_content, is_python_whitespace};
use ruff_python_trivia::{
has_leading_content, has_trailing_content, is_python_whitespace, CommentRanges,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};
use super::comment_ranges::{CommentRanges, CommentRangesBuilder};
pub struct Indexer {
comment_ranges: CommentRanges,

View file

@ -1,5 +1,5 @@
mod comment_ranges;
mod indexer;
pub use comment_ranges::{CommentRanges, CommentRangesBuilder};
pub use comment_ranges::CommentRangesBuilder;
pub use indexer::Indexer;