mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 05:26:23 +00:00
Build CommentRanges
outside the parser (#11792)
## Summary This PR updates the parser to remove building the `CommentRanges` and instead it'll be built by the linter and the formatter when it's required. For the linter, it'll be built and owned by the `Indexer` while for the formatter it'll be built from the `Tokens` struct and passed as an argument. ## Test Plan `cargo insta test`
This commit is contained in:
parent
7509a48eab
commit
549cc1e437
28 changed files with 151 additions and 102 deletions
|
@ -239,7 +239,6 @@ pub struct Parsed<T> {
|
|||
syntax: T,
|
||||
tokens: Tokens,
|
||||
errors: Vec<ParseError>,
|
||||
comment_ranges: CommentRanges,
|
||||
}
|
||||
|
||||
impl<T> Parsed<T> {
|
||||
|
@ -258,11 +257,6 @@ impl<T> Parsed<T> {
|
|||
&self.errors
|
||||
}
|
||||
|
||||
/// Returns the comment ranges for the parsed output.
|
||||
pub fn comment_ranges(&self) -> &CommentRanges {
|
||||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Consumes the [`Parsed`] output and returns the contained syntax node.
|
||||
pub fn into_syntax(self) -> T {
|
||||
self.syntax
|
||||
|
@ -313,7 +307,6 @@ impl Parsed<Mod> {
|
|||
syntax: module,
|
||||
tokens: self.tokens,
|
||||
errors: self.errors,
|
||||
comment_ranges: self.comment_ranges,
|
||||
}),
|
||||
Mod::Expression(_) => None,
|
||||
}
|
||||
|
@ -333,7 +326,6 @@ impl Parsed<Mod> {
|
|||
syntax: expression,
|
||||
tokens: self.tokens,
|
||||
errors: self.errors,
|
||||
comment_ranges: self.comment_ranges,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
@ -518,6 +510,18 @@ impl Deref for Tokens {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&Tokens> for CommentRanges {
|
||||
fn from(tokens: &Tokens) -> Self {
|
||||
let mut ranges = vec![];
|
||||
for token in tokens {
|
||||
if token.kind() == TokenKind::Comment {
|
||||
ranges.push(token.range());
|
||||
}
|
||||
}
|
||||
CommentRanges::new(ranges)
|
||||
}
|
||||
}
|
||||
|
||||
/// Control in the different modes by which a source file can be parsed.
|
||||
///
|
||||
/// The mode argument specifies in what way code must be parsed.
|
||||
|
|
|
@ -147,7 +147,7 @@ impl<'src> Parser<'src> {
|
|||
|
||||
// TODO consider re-integrating lexical error handling into the parser?
|
||||
let parse_errors = self.errors;
|
||||
let (tokens, comment_ranges, lex_errors) = self.tokens.finish();
|
||||
let (tokens, lex_errors) = self.tokens.finish();
|
||||
|
||||
// Fast path for when there are no lex errors.
|
||||
// There's no fast path for when there are no parse errors because a lex error
|
||||
|
@ -156,7 +156,6 @@ impl<'src> Parser<'src> {
|
|||
return Parsed {
|
||||
syntax,
|
||||
tokens: Tokens::new(tokens),
|
||||
comment_ranges,
|
||||
errors: parse_errors,
|
||||
};
|
||||
}
|
||||
|
@ -188,7 +187,6 @@ impl<'src> Parser<'src> {
|
|||
Parsed {
|
||||
syntax,
|
||||
tokens: Tokens::new(tokens),
|
||||
comment_ranges,
|
||||
errors: merged,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use ruff_python_trivia::CommentRanges;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
||||
use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
|
||||
|
@ -14,9 +13,6 @@ pub(crate) struct TokenSource<'src> {
|
|||
/// is finished consuming all the tokens. Note that unlike the emitted tokens, this vector
|
||||
/// holds both the trivia and non-trivia tokens.
|
||||
tokens: Vec<Token>,
|
||||
|
||||
/// A vector containing the range of all the comment tokens emitted by the lexer.
|
||||
comments: Vec<TextRange>,
|
||||
}
|
||||
|
||||
impl<'src> TokenSource<'src> {
|
||||
|
@ -26,7 +22,6 @@ impl<'src> TokenSource<'src> {
|
|||
TokenSource {
|
||||
lexer,
|
||||
tokens: vec![],
|
||||
comments: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,9 +98,6 @@ impl<'src> TokenSource<'src> {
|
|||
loop {
|
||||
let kind = self.lexer.next_token();
|
||||
if is_trivia(kind) {
|
||||
if kind == TokenKind::Comment {
|
||||
self.comments.push(self.current_range());
|
||||
}
|
||||
self.tokens
|
||||
.push(Token::new(kind, self.current_range(), self.current_flags()));
|
||||
continue;
|
||||
|
@ -130,7 +122,6 @@ impl<'src> TokenSource<'src> {
|
|||
TokenSourceCheckpoint {
|
||||
lexer_checkpoint: self.lexer.checkpoint(),
|
||||
tokens_position: self.tokens.len(),
|
||||
comments_position: self.comments.len(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,18 +130,16 @@ impl<'src> TokenSource<'src> {
|
|||
let TokenSourceCheckpoint {
|
||||
lexer_checkpoint,
|
||||
tokens_position,
|
||||
comments_position,
|
||||
} = checkpoint;
|
||||
|
||||
self.lexer.rewind(lexer_checkpoint);
|
||||
self.tokens.truncate(tokens_position);
|
||||
self.comments.truncate(comments_position);
|
||||
}
|
||||
|
||||
/// Consumes the token source, returning the collected tokens, comment ranges, and any errors
|
||||
/// encountered during lexing. The token collection includes both the trivia and non-trivia
|
||||
/// tokens.
|
||||
pub(crate) fn finish(mut self) -> (Vec<Token>, CommentRanges, Vec<LexicalError>) {
|
||||
pub(crate) fn finish(mut self) -> (Vec<Token>, Vec<LexicalError>) {
|
||||
assert_eq!(
|
||||
self.current_kind(),
|
||||
TokenKind::EndOfFile,
|
||||
|
@ -163,15 +152,13 @@ impl<'src> TokenSource<'src> {
|
|||
assert_eq!(last.kind(), TokenKind::EndOfFile);
|
||||
}
|
||||
|
||||
let comment_ranges = CommentRanges::new(self.comments);
|
||||
(self.tokens, comment_ranges, self.lexer.finish())
|
||||
(self.tokens, self.lexer.finish())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct TokenSourceCheckpoint {
|
||||
lexer_checkpoint: LexerCheckpoint,
|
||||
tokens_position: usize,
|
||||
comments_position: usize,
|
||||
}
|
||||
|
||||
/// Allocates a [`Vec`] with an approximated capacity to fit all tokens
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue