Add Tokens newtype wrapper, TokenKind iterator (#11361)

## Summary

Alternative to #11237 

This PR adds a new `Tokens` struct which is a newtype wrapper around a
vector of lexer output. This allows us to add a `kinds` method which
returns an iterator over the corresponding `TokenKind`. This iterator is
implemented as a separate `TokenKindIter` struct to allow using the type
and provide additional methods like `peek` directly on the iterator.

This exposes the linter to access the stream of `TokenKind` instead of
`Tok`.

Edit: I've made the necessary downstream changes and plan to merge the
entire stack at once.
This commit is contained in:
Dhruv Manilawala 2024-05-14 22:15:04 +05:30 committed by GitHub
parent 50f14d017e
commit 025768d303
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 142 additions and 25 deletions

View file

@ -14,7 +14,7 @@ use ruff_python_ast::{PySourceType, Suite};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{AsMode, ParseError};
use ruff_python_parser::{AsMode, ParseError, TokenKindIter, Tokens};
use ruff_source_file::{Locator, SourceFileBuilder};
use ruff_text_size::Ranged;
@ -353,7 +353,7 @@ pub fn add_noqa_to_path(
let contents = source_kind.source_code();
// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode());
// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
@ -518,8 +518,7 @@ pub fn lint_fix<'a>(
// Continuously fix until the source code stabilizes.
loop {
// Tokenize once.
let tokens: Vec<LexResult> =
ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());
let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());
// Map row and column locations to byte slices (lazily).
let locator = Locator::new(transformed.source_code());
@ -715,7 +714,7 @@ impl<'a> ParseSource<'a> {
#[derive(Debug, Clone)]
pub enum TokenSource<'a> {
/// Use the precomputed tokens to generate the AST.
Tokens(Vec<LexResult>),
Tokens(Tokens),
/// Use the precomputed tokens and AST.
Precomputed {
tokens: &'a [LexResult],
@ -723,6 +722,18 @@ pub enum TokenSource<'a> {
},
}
impl TokenSource<'_> {
/// Returns an iterator over the [`TokenKind`] and the corresponding range.
///
/// [`TokenKind`]: ruff_python_parser::TokenKind
pub fn kinds(&self) -> TokenKindIter {
match self {
TokenSource::Tokens(tokens) => tokens.kinds(),
TokenSource::Precomputed { tokens, .. } => TokenKindIter::new(tokens),
}
}
}
impl Deref for TokenSource<'_> {
type Target = [LexResult];