ruff/crates/ruff_python_trivia/src/comment_ranges.rs
Dhruv Manilawala f0046ab28e
Move has_comments to CommentRanges (#11495)
## Summary

This PR moves the `has_comments` function from `Indexer` to
`CommentRanges`. The main motivation is that the `CommentRanges` will
now be built by the parser which is shared between the linter and the
formatter. Thus, the `CommentRanges` will be removed from the `Indexer`.

## Test Plan

`cargo test`
2024-05-22 13:35:16 +00:00

224 lines
7.8 KiB
Rust

use std::fmt::{Debug, Formatter};
use std::ops::Deref;
use itertools::Itertools;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::{has_leading_content, has_trailing_content, is_python_whitespace};
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
#[derive(Clone, Default)]
pub struct CommentRanges {
raw: Vec<TextRange>,
}
impl CommentRanges {
pub fn new(ranges: Vec<TextRange>) -> Self {
Self { raw: ranges }
}
/// Returns `true` if the given range intersects with any comment range.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.intersect(*range).is_some() {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
/// Returns the comments who are within the range
pub fn comments_in_range(&self, range: TextRange) -> &[TextRange] {
let start = self
.raw
.partition_point(|comment| comment.start() < range.start());
// We expect there are few comments, so switching to find should be faster
match self.raw[start..]
.iter()
.find_position(|comment| comment.end() > range.end())
{
Some((in_range, _element)) => &self.raw[start..start + in_range],
None => &self.raw[start..],
}
}
/// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(&self, node: &T, locator: &Locator) -> bool
where
T: Ranged,
{
let start = if has_leading_content(node.start(), locator) {
node.start()
} else {
locator.line_start(node.start())
};
let end = if has_trailing_content(node.end(), locator) {
node.end()
} else {
locator.line_end(node.end())
};
self.intersects(TextRange::new(start, end))
}
/// Given a [`CommentRanges`], determine which comments are grouped together
/// in "comment blocks". A "comment block" is a sequence of consecutive
/// own-line comments in which the comment hash (`#`) appears in the same
/// column in each line, and at least one comment is non-empty.
///
/// Returns a sorted vector containing the offset of the leading hash (`#`)
/// for each comment in any block comment.
///
/// ## Examples
/// ```python
/// # This is a block comment
/// # because it spans multiple lines
///
/// # This is also a block comment
/// # even though it is indented
///
/// # this is not a block comment
///
/// x = 1 # this is not a block comment because
/// y = 2 # the lines do not *only* contain comments
///
/// # This is not a block comment because
/// # not all consecutive lines have the
/// # first `#` character in the same column
///
/// """
/// # This is not a block comment because it is
/// # contained within a multi-line string/comment
/// """
/// ```
pub fn block_comments(&self, locator: &Locator) -> Vec<TextSize> {
let mut block_comments: Vec<TextSize> = Vec::new();
let mut current_block: Vec<TextSize> = Vec::new();
let mut current_block_column: Option<TextSize> = None;
let mut current_block_non_empty = false;
let mut prev_line_end = None;
for comment_range in &self.raw {
let offset = comment_range.start();
let line_start = locator.line_start(offset);
let line_end = locator.full_line_end(offset);
let column = offset - line_start;
// If this is an end-of-line comment, reset the current block.
if !Self::is_own_line(offset, locator) {
// Push the current block, and reset.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
current_block = vec![];
current_block_column = None;
current_block_non_empty = false;
prev_line_end = Some(line_end);
continue;
}
// If there's a blank line between this comment and the previous
// comment, reset the current block.
if prev_line_end.is_some_and(|prev_line_end| {
locator.contains_line_break(TextRange::new(prev_line_end, line_start))
}) {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
continue;
}
if let Some(current_column) = current_block_column {
if column == current_column {
// Add the comment to the current block.
current_block.push(offset);
current_block_non_empty |= !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
} else {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
}
} else {
// Push the current block.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
// Reset the block state.
current_block = vec![offset];
current_block_column = Some(column);
current_block_non_empty = !Self::is_empty(*comment_range, locator);
prev_line_end = Some(line_end);
}
}
// Push any lingering blocks.
if current_block.len() > 1 && current_block_non_empty {
block_comments.extend(current_block);
}
block_comments
}
/// Returns `true` if the given range is an empty comment.
fn is_empty(range: TextRange, locator: &Locator) -> bool {
locator
.slice(range)
.chars()
.skip(1)
.all(is_python_whitespace)
}
/// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment).
fn is_own_line(offset: TextSize, locator: &Locator) -> bool {
let range = TextRange::new(locator.line_start(offset), offset);
locator.slice(range).chars().all(is_python_whitespace)
}
}
impl Deref for CommentRanges {
type Target = [TextRange];
fn deref(&self) -> &Self::Target {
self.raw.as_slice()
}
}
impl Debug for CommentRanges {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("CommentRanges").field(&self.raw).finish()
}
}
impl<'a> IntoIterator for &'a CommentRanges {
type Item = &'a TextRange;
type IntoIter = std::slice::Iter<'a, TextRange>;
fn into_iter(self) -> Self::IntoIter {
self.raw.iter()
}
}