ruff/crates/ruff_python_ast/src/source_code/comment_ranges.rs
Charlie Marsh 4782675bf9
Remove lexer-based comment range detection (#5785)
## Summary

I'm doing some unrelated profiling, and I noticed that this method is
actually measurable on the CPython benchmark -- it's > 1% of execution
time. We don't need to lex here, we already know the ranges of all
comments, so we can just do a simple binary search for overlap, which
brings the method down to 0%.

## Test Plan

`cargo test`
2023-07-16 01:03:27 +00:00

68 lines
1.7 KiB
Rust

use std::fmt::{Debug, Formatter};
use std::ops::Deref;
use ruff_text_size::TextRange;
use rustpython_parser::Tok;
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone)]
pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl CommentRanges {
/// Returns `true` if the given range includes a comment.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
}
impl Deref for CommentRanges {
type Target = [TextRange];
fn deref(&self) -> &Self::Target {
self.raw.as_slice()
}
}
impl Debug for CommentRanges {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("CommentRanges").field(&self.raw).finish()
}
}
impl<'a> IntoIterator for &'a CommentRanges {
type IntoIter = std::slice::Iter<'a, TextRange>;
type Item = &'a TextRange;
fn into_iter(self) -> Self::IntoIter {
self.raw.iter()
}
}
#[derive(Debug, Clone, Default)]
pub struct CommentRangesBuilder {
ranges: Vec<TextRange>,
}
impl CommentRangesBuilder {
pub fn visit_token(&mut self, token: &Tok, range: TextRange) {
if token.is_comment() {
self.ranges.push(range);
}
}
pub fn finish(self) -> CommentRanges {
CommentRanges { raw: self.ranges }
}
}