add token stream back and move pre-allocation (#260)

2025-12-23 08:47:53 +00:00 · 2025-09-24 17:00:25 -05:00 · 2025-09-24 17:00:25 -05:00 · 6b07eec7a0
commit 6b07eec7a0
parent f1b6891a3c
2 changed files with 57 additions and 6 deletions
--- a/crates/djls-templates/src/lexer.rs
+++ b/crates/djls-templates/src/lexer.rs
@ -2,6 +2,7 @@ use djls_source::Span;

 use crate::tokens::TagDelimiter;
 use crate::tokens::Token;
+use crate::tokens::TokenStream;

 pub struct Lexer {
    source: String,
@ -20,11 +21,7 @@ impl Lexer {
    }

    pub fn tokenize(&mut self) -> Vec<Token> {
-        // Conservative estimate: most templates have 1 token per 15-20 chars
-        // Min 32 to avoid reallocation for tiny templates
-        // Max 1024 to avoid over-allocation for huge templates
-        let estimated_tokens = (self.source.len() / 15).clamp(32, 1024);
-        let mut tokens = Vec::with_capacity(estimated_tokens);
+        let mut tokens = TokenStream::with_estimated_capacity(&self.source);

        while !self.is_at_end() {
            self.start = self.current;
@ -59,7 +56,7 @@ impl Lexer {

        tokens.push(Token::Eof);

-        tokens
+        tokens.into()
    }

    fn lex_django_tag(
--- a/crates/djls-templates/src/tokens.rs
+++ b/crates/djls-templates/src/tokens.rs
@ -289,3 +289,57 @@ impl TokenSnapshotVec {
        self.0.iter().map(Token::to_snapshot).collect()
    }
 }
+
+#[derive(Debug, Clone)]
+pub struct TokenStream(Vec<Token>);
+
+impl TokenStream {
+    const CHARS_PER_TOKEN: usize = 6;
+    const MIN_CAPACITY: usize = 32;
+    const MAX_CAPACITY: usize = 1024;
+
+    #[must_use]
+    pub fn with_estimated_capacity(source: &str) -> Self {
+        let capacity =
+            (source.len() / Self::CHARS_PER_TOKEN).clamp(Self::MIN_CAPACITY, Self::MAX_CAPACITY);
+        Self(Vec::with_capacity(capacity))
+    }
+
+    #[inline]
+    pub fn push(&mut self, token: Token) {
+        self.0.push(token);
+    }
+
+    /// Get the number of tokens in the stream.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Get the number of content tokens (excluding EOF).
+    #[must_use]
+    pub fn content_len(&self) -> usize {
+        self.0.len().saturating_sub(1)
+    }
+
+    /// Check if stream is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+impl From<TokenStream> for Vec<Token> {
+    fn from(val: TokenStream) -> Self {
+        val.0
+    }
+}
+
+impl IntoIterator for TokenStream {
+    type Item = Token;
+    type IntoIter = std::vec::IntoIter<Token>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}