Build CommentRanges outside the parser (#11792)

## Summary

This PR updates the parser to remove building the `CommentRanges` and
instead it'll be built by the linter and the formatter when it's
required.

For the linter, it'll be built and owned by the `Indexer` while for the
formatter it'll be built from the `Tokens` struct and passed as an
argument.

## Test Plan

`cargo insta test`
This commit is contained in:
Dhruv Manilawala 2024-06-09 15:25:17 +05:30 committed by GitHub
parent 7509a48eab
commit 549cc1e437
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 151 additions and 102 deletions

View file

@ -239,7 +239,6 @@ pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
errors: Vec<ParseError>,
comment_ranges: CommentRanges,
}
impl<T> Parsed<T> {
@ -258,11 +257,6 @@ impl<T> Parsed<T> {
&self.errors
}
/// Returns the comment ranges for the parsed output.
pub fn comment_ranges(&self) -> &CommentRanges {
&self.comment_ranges
}
/// Consumes the [`Parsed`] output and returns the contained syntax node.
pub fn into_syntax(self) -> T {
self.syntax
@ -313,7 +307,6 @@ impl Parsed<Mod> {
syntax: module,
tokens: self.tokens,
errors: self.errors,
comment_ranges: self.comment_ranges,
}),
Mod::Expression(_) => None,
}
@ -333,7 +326,6 @@ impl Parsed<Mod> {
syntax: expression,
tokens: self.tokens,
errors: self.errors,
comment_ranges: self.comment_ranges,
}),
}
}
@ -518,6 +510,18 @@ impl Deref for Tokens {
}
}
impl From<&Tokens> for CommentRanges {
fn from(tokens: &Tokens) -> Self {
let mut ranges = vec![];
for token in tokens {
if token.kind() == TokenKind::Comment {
ranges.push(token.range());
}
}
CommentRanges::new(ranges)
}
}
/// Control in the different modes by which a source file can be parsed.
///
/// The mode argument specifies in what way code must be parsed.

View file

@ -147,7 +147,7 @@ impl<'src> Parser<'src> {
// TODO consider re-integrating lexical error handling into the parser?
let parse_errors = self.errors;
let (tokens, comment_ranges, lex_errors) = self.tokens.finish();
let (tokens, lex_errors) = self.tokens.finish();
// Fast path for when there are no lex errors.
// There's no fast path for when there are no parse errors because a lex error
@ -156,7 +156,6 @@ impl<'src> Parser<'src> {
return Parsed {
syntax,
tokens: Tokens::new(tokens),
comment_ranges,
errors: parse_errors,
};
}
@ -188,7 +187,6 @@ impl<'src> Parser<'src> {
Parsed {
syntax,
tokens: Tokens::new(tokens),
comment_ranges,
errors: merged,
}
}

View file

@ -1,4 +1,3 @@
use ruff_python_trivia::CommentRanges;
use ruff_text_size::{TextRange, TextSize};
use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
@ -14,9 +13,6 @@ pub(crate) struct TokenSource<'src> {
/// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector
/// holds both the trivia and non-trivia tokens.
tokens: Vec<Token>,
/// A vector containing the range of all the comment tokens emitted by the lexer.
comments: Vec<TextRange>,
}
impl<'src> TokenSource<'src> {
@ -26,7 +22,6 @@ impl<'src> TokenSource<'src> {
TokenSource {
lexer,
tokens: vec![],
comments: vec![],
}
}
@ -103,9 +98,6 @@ impl<'src> TokenSource<'src> {
loop {
let kind = self.lexer.next_token();
if is_trivia(kind) {
if kind == TokenKind::Comment {
self.comments.push(self.current_range());
}
self.tokens
.push(Token::new(kind, self.current_range(), self.current_flags()));
continue;
@ -130,7 +122,6 @@ impl<'src> TokenSource<'src> {
TokenSourceCheckpoint {
lexer_checkpoint: self.lexer.checkpoint(),
tokens_position: self.tokens.len(),
comments_position: self.comments.len(),
}
}
@ -139,18 +130,16 @@ impl<'src> TokenSource<'src> {
let TokenSourceCheckpoint {
lexer_checkpoint,
tokens_position,
comments_position,
} = checkpoint;
self.lexer.rewind(lexer_checkpoint);
self.tokens.truncate(tokens_position);
self.comments.truncate(comments_position);
}
/// Consumes the token source, returning the collected tokens, comment ranges, and any errors
/// encountered during lexing. The token collection includes both the trivia and non-trivia
/// tokens.
pub(crate) fn finish(mut self) -> (Vec<Token>, CommentRanges, Vec<LexicalError>) {
pub(crate) fn finish(mut self) -> (Vec<Token>, Vec<LexicalError>) {
assert_eq!(
self.current_kind(),
TokenKind::EndOfFile,
@ -163,15 +152,13 @@ impl<'src> TokenSource<'src> {
assert_eq!(last.kind(), TokenKind::EndOfFile);
}
let comment_ranges = CommentRanges::new(self.comments);
(self.tokens, comment_ranges, self.lexer.finish())
(self.tokens, self.lexer.finish())
}
}
pub(crate) struct TokenSourceCheckpoint {
lexer_checkpoint: LexerCheckpoint,
tokens_position: usize,
comments_position: usize,
}
/// Allocates a [`Vec`] with an approximated capacity to fit all tokens