Add Tokens newtype wrapper, TokenKind iterator (#11361)

## Summary Alternative to #11237 This PR adds a new `Tokens` struct which is a newtype wrapper around a vector of lexer output. This allows us to add a `kinds` method which returns an iterator over the corresponding `TokenKind`. This iterator is implemented as a separate `TokenKindIter` struct to allow using the type and provide additional methods like `peek` directly on the iterator. This exposes the linter to access the stream of `TokenKind` instead of `Tok`. Edit: I've made the necessary downstream changes and plan to merge the entire stack at once.
2025-10-01 06:11:43 +00:00 · 2024-05-14 22:15:04 +05:30 · 2024-05-14 22:15:04 +05:30 · 025768d303
commit 025768d303
parent 50f14d017e
9 changed files with 142 additions and 25 deletions
--- a/crates/ruff_linter/src/linter.rs
+++ b/crates/ruff_linter/src/linter.rs
@ -14,7 +14,7 @@ use ruff_python_ast::{PySourceType, Suite};
 use ruff_python_codegen::Stylist;
 use ruff_python_index::Indexer;
 use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::{AsMode, ParseError};
+use ruff_python_parser::{AsMode, ParseError, TokenKindIter, Tokens};
 use ruff_source_file::{Locator, SourceFileBuilder};
 use ruff_text_size::Ranged;

@ -353,7 +353,7 @@ pub fn add_noqa_to_path(
    let contents = source_kind.source_code();

    // Tokenize once.
-    let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
+    let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode());

    // Map row and column locations to byte slices (lazily).
    let locator = Locator::new(contents);
@ -518,8 +518,7 @@ pub fn lint_fix<'a>(
    // Continuously fix until the source code stabilizes.
    loop {
        // Tokenize once.
-        let tokens: Vec<LexResult> =
-            ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());
+        let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());

        // Map row and column locations to byte slices (lazily).
        let locator = Locator::new(transformed.source_code());
@ -715,7 +714,7 @@ impl<'a> ParseSource<'a> {
 #[derive(Debug, Clone)]
 pub enum TokenSource<'a> {
    /// Use the precomputed tokens to generate the AST.
-    Tokens(Vec<LexResult>),
+    Tokens(Tokens),
    /// Use the precomputed tokens and AST.
    Precomputed {
        tokens: &'a [LexResult],
@ -723,6 +722,18 @@ pub enum TokenSource<'a> {
    },
 }

+impl TokenSource<'_> {
+    /// Returns an iterator over the [`TokenKind`] and the corresponding range.
+    ///
+    /// [`TokenKind`]: ruff_python_parser::TokenKind
+    pub fn kinds(&self) -> TokenKindIter {
+        match self {
+            TokenSource::Tokens(tokens) => tokens.kinds(),
+            TokenSource::Precomputed { tokens, .. } => TokenKindIter::new(tokens),
+        }
+    }
+}
+
 impl Deref for TokenSource<'_> {
    type Target = [LexResult];