From f1f31324d03204fc74321f433b51a4a246360884 Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Sun, 19 Feb 2023 22:37:00 -0500
Subject: [PATCH] Use muiltipeek

---
 parser/src/parser.rs        |   4 +-
 parser/src/soft_keywords.rs | 175 +++++++++++++++++++-----------------
 2 files changed, 94 insertions(+), 85 deletions(-)
diff --git a/parser/src/parser.rs b/parser/src/parser.rs
index d8f09cd..f4c00f0 100644
--- a/parser/src/parser.rs
+++ b/parser/src/parser.rs
@@ -14,11 +14,11 @@
 
 use crate::lexer::{LexResult, Tok};
 pub use crate::mode::Mode;
+use crate::soft_keywords::SoftKeywordTransformer;
 use crate::{ast, error::ParseError, lexer, python};
 use ast::Location;
 use itertools::Itertools;
 use std::iter;
-use crate::soft_keywords::soft_keywords;
 
 /// Parse a full Python program usually consisting of multiple lines.
 ///  
@@ -190,7 +190,7 @@ pub fn parse_tokens(
         .chain(lxr)
         .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
     python::TopParser::new()
-        .parse(soft_keywords(tokenizer, mode).into_iter())
+        .parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
         .map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
 }
 
diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs
index 5a96929..b2c16f9 100644
--- a/parser/src/soft_keywords.rs
+++ b/parser/src/soft_keywords.rs
@@ -1,20 +1,10 @@
+use itertools::{Itertools, MultiPeek};
+
 use crate::lexer::{LexResult, Tok};
 pub use crate::mode::Mode;
 
-/// Collect all tokens from a token stream in a vector.
-fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResult> {
-    let mut tokens: Vec<LexResult> = vec![];
-    for tok in tokenizer {
-        let is_err = tok.is_err();
-        tokens.push(tok);
-        if is_err {
-            break;
-        }
-    }
-    tokens
-}
-
-/// Modify a token stream to accommodate soft keywords (namely, `match` and `case`).
+/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
+/// and `case`).
 ///
 /// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
 /// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
@@ -25,93 +15,112 @@ fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResu
 ///
 /// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
 /// parser, as neither of them need to be aware of soft keywords.
-pub fn soft_keywords(
-    tokenizer: impl IntoIterator<Item = LexResult>,
-    mode: Mode,
-) -> Vec<LexResult> {
-    let mut tokenizer: Vec<LexResult> = collect_tokens(tokenizer);
-    let mut start_of_line = matches!(mode, Mode::Module | Mode::Interactive);
-    for i in 0..tokenizer.len() {
-        // If the token is a `match` or `case` token, check if it's used as an identifier.
-        // We assume every `match` or `case` is an identifier unless both of the following
-        // conditions are met:
-        // 1. The token is at the start of a logical line.
-        // 2. The logical line contains a top-level colon (that is, a colon that is not nested
-        //    inside a parenthesized expression, list, or dictionary).
-        // 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
-        //    (This is to avoid treating `match` and `case` as identifiers when annotated with
-        //    type hints.)
-        if tokenizer[i]
-            .as_ref()
-            .map_or(false, |(_, tok, _)| matches!(tok, Tok::Match | Tok::Case))
-        {
-            let is_identifier = {
-                if !start_of_line {
-                    // If the `match` or `case` token is not at the start of a line, it's definitely
-                    // an identifier.
-                    true
+pub struct SoftKeywordTransformer<I>
+where
+    I: Iterator<Item = LexResult>,
+{
+    pub underlying: MultiPeek<I>,
+    pub start_of_line: bool,
+}
+
+impl<I> SoftKeywordTransformer<I>
+where
+    I: Iterator<Item = LexResult>,
+{
+    pub fn new(tokenizer: I, mode: Mode) -> Self {
+        Self {
+            underlying: tokenizer.multipeek(),
+            start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
+        }
+    }
+}
+
+impl<I> Iterator for SoftKeywordTransformer<I>
+where
+    I: Iterator<Item = LexResult>,
+{
+    type Item = LexResult;
+
+    #[inline]
+    fn next(&mut self) -> Option<LexResult> {
+        let mut next = self.underlying.next();
+        if let Some(Ok((start, tok, end))) = next.as_ref() {
+            // If the token is a `match` or `case` token, check if it's used as an identifier.
+            // We assume every `match` or `case` is an identifier unless both of the following
+            // conditions are met:
+            // 1. The token is at the start of a logical line.
+            // 2. The logical line contains a top-level colon (that is, a colon that is not nested
+            //    inside a parenthesized expression, list, or dictionary).
+            // 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
+            //    (This is to avoid treating `match` and `case` as identifiers when annotated with
+            //    type hints.)
+            if matches!(tok, Tok::Match | Tok::Case) {
+                if !self.start_of_line {
+                    next = Some(Ok((
+                        *start,
+                        Tok::Name {
+                            name: if matches!(tok, Tok::Match) {
+                                "match".to_string()
+                            } else {
+                                "case".to_string()
+                            },
+                        },
+                        *end,
+                    )));
                 } else {
-                    //
-                    let mut seen_colon = false;
-                    let mut first = true;
                     let mut par_count = 0;
                     let mut sqb_count = 0;
                     let mut brace_count = 0;
-                    for (_, tok, _) in tokenizer.iter().skip(i + 1).flatten() {
+                    let mut first = true;
+                    let mut seen_colon = false;
+                    while let Some(Ok((_, tok, _))) = self.underlying.peek() {
                         match tok {
                             Tok::Newline => break,
                             Tok::Colon if par_count == 0 && sqb_count == 0 && brace_count == 0 => {
                                 if !first {
                                     seen_colon = true;
                                 }
-                                break;
-                            }
-                            Tok::Lpar => {
-                                par_count += 1;
-                            }
-                            Tok::Rpar => {
-                                par_count -= 1;
-                            }
-                            Tok::Lsqb => {
-                                sqb_count += 1;
-                            }
-                            Tok::Rsqb => {
-                                sqb_count -= 1;
-                            }
-                            Tok::Lbrace => {
-                                brace_count += 1;
-                            }
-                            Tok::Rbrace => {
-                                brace_count -= 1;
                             }
+                            Tok::Lpar => par_count += 1,
+                            Tok::Rpar => par_count -= 1,
+                            Tok::Lsqb => sqb_count += 1,
+                            Tok::Rsqb => sqb_count -= 1,
+                            Tok::Lbrace => brace_count += 1,
+                            Tok::Rbrace => brace_count -= 1,
                             _ => {}
                         }
                         first = false;
                     }
-                    !seen_colon
-                }
-            };
-            if is_identifier {
-                if let Ok((_, tok, _)) = &mut tokenizer[i] {
-                    if let Tok::Match = tok {
-                        *tok = Tok::Name {
-                            name: "match".to_string(),
-                        };
-                    } else if let Tok::Case = tok {
-                        *tok = Tok::Name {
-                            name: "case".to_string(),
-                        };
+                    if !seen_colon {
+                        next = Some(Ok((
+                            *start,
+                            Tok::Name {
+                                name: if matches!(tok, Tok::Match) {
+                                    "match".to_string()
+                                } else {
+                                    "case".to_string()
+                                },
+                            },
+                            *end,
+                        )));
                     }
                 }
             }
         }
-        start_of_line = tokenizer[i].as_ref().map_or(false, |(_, tok, _)| {
-            matches!(
-                tok,
-                Tok::StartModule | Tok::StartInteractive | Tok::Newline | Tok::Indent | Tok::Dedent
-            )
-        });
-    }
 
-    tokenizer
+        self.start_of_line = next.as_ref().map_or(false, |lex_result| {
+            lex_result.as_ref().map_or(false, |(_, tok, _)| {
+                matches!(
+                    tok,
+                    Tok::StartModule
+                        | Tok::StartInteractive
+                        | Tok::Newline
+                        | Tok::Indent
+                        | Tok::Dedent
+                )
+            })
+        });
+
+        next
+    }
 }