add token stream back and move pre-allocation (#260)
Some checks are pending
test / generate-matrix (push) Waiting to run
test / Python , Django () (push) Blocked by required conditions
test / tests (push) Blocked by required conditions
zizmor 🌈 / zizmor latest via PyPI (push) Waiting to run
bench / benchmarks (push) Waiting to run
lint / pre-commit (push) Waiting to run
lint / clippy (push) Waiting to run
lint / cargo-check (push) Waiting to run
lint / rustfmt (push) Waiting to run
release / build (push) Waiting to run
release / test (push) Waiting to run
release / release (push) Blocked by required conditions

This commit is contained in:
Josh Thomas 2025-09-24 17:00:25 -05:00 committed by GitHub
parent f1b6891a3c
commit 6b07eec7a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 6 deletions

View file

@ -2,6 +2,7 @@ use djls_source::Span;
use crate::tokens::TagDelimiter;
use crate::tokens::Token;
use crate::tokens::TokenStream;
pub struct Lexer {
source: String,
@ -20,11 +21,7 @@ impl Lexer {
}
pub fn tokenize(&mut self) -> Vec<Token> {
// Conservative estimate: most templates have 1 token per 15-20 chars
// Min 32 to avoid reallocation for tiny templates
// Max 1024 to avoid over-allocation for huge templates
let estimated_tokens = (self.source.len() / 15).clamp(32, 1024);
let mut tokens = Vec::with_capacity(estimated_tokens);
let mut tokens = TokenStream::with_estimated_capacity(&self.source);
while !self.is_at_end() {
self.start = self.current;
@ -59,7 +56,7 @@ impl Lexer {
tokens.push(Token::Eof);
tokens
tokens.into()
}
fn lex_django_tag(

View file

@ -289,3 +289,57 @@ impl TokenSnapshotVec {
self.0.iter().map(Token::to_snapshot).collect()
}
}
#[derive(Debug, Clone)]
pub struct TokenStream(Vec<Token>);
impl TokenStream {
const CHARS_PER_TOKEN: usize = 6;
const MIN_CAPACITY: usize = 32;
const MAX_CAPACITY: usize = 1024;
#[must_use]
pub fn with_estimated_capacity(source: &str) -> Self {
let capacity =
(source.len() / Self::CHARS_PER_TOKEN).clamp(Self::MIN_CAPACITY, Self::MAX_CAPACITY);
Self(Vec::with_capacity(capacity))
}
#[inline]
pub fn push(&mut self, token: Token) {
self.0.push(token);
}
/// Get the number of tokens in the stream.
#[must_use]
pub fn len(&self) -> usize {
self.0.len()
}
/// Get the number of content tokens (excluding EOF).
#[must_use]
pub fn content_len(&self) -> usize {
self.0.len().saturating_sub(1)
}
/// Check if stream is empty.
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
}
impl From<TokenStream> for Vec<Token> {
fn from(val: TokenStream) -> Self {
val.0
}
}
impl IntoIterator for TokenStream {
type Item = Token;
type IntoIter = std::vec::IntoIter<Token>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}