create djls-ast crate and implement lexer and parser (#8)

2025-07-16 08:55:04 +00:00 · 2024-12-09 17:48:24 -06:00 · 2024-12-09 17:48:24 -06:00 · f96b3da951
commit f96b3da951
parent 81199d1699
20 changed files with 2255 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,6 +4,7 @@ resolver = "2"
 [workspace.dependencies]
 djls = { path = "crates/djls" }
 djls-ast = { path = "crates/djls-ast" }
 djls-django = { path = "crates/djls-django" }
 djls-python = { path = "crates/djls-python" }
--- a/crates/djls-ast/Cargo.toml
+++ b/crates/djls-ast/Cargo.toml
@ -0,0 +1,16 @@
 [package]
 name = "djls-ast"
 version = "0.0.0"
 edition = "2021"
 [dependencies]
 serde = { workspace = true }
 thiserror = "2.0.6"
 [dev-dependencies]
 insta = { version = "1.41.1", features = ["yaml"] }
 [profile.dev.package]
 insta.opt-level = 3
 similar.opt-level = 3
--- a/crates/djls-ast/src/ast.rs
+++ b/crates/djls-ast/src/ast.rs
@ -0,0 +1,225 @@
 use serde::Serialize;
 use std::collections::BTreeMap;
 use std::str::FromStr;
 use thiserror::Error;
 #[derive(Clone, Debug, Default, Serialize)]
 pub struct Ast {
    nodes: Vec<Node>,
 }
 impl Ast {
    pub fn nodes(&self) -> &Vec<Node> {
        &self.nodes
    }
    pub fn add_node(&mut self, node: Node) {
        self.nodes.push(node);
    }
    pub fn finalize(&mut self) -> Result<Ast, AstError> {
        if self.nodes.is_empty() {
            return Err(AstError::EmptyAst);
        }
        Ok(self.clone())
    }
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum Node {
    Django(DjangoNode),
    Html(HtmlNode),
    Script(ScriptNode),
    Style(StyleNode),
    Text(String),
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum DjangoNode {
    Comment(String),
    Tag {
        kind: DjangoTagKind,
        bits: Vec<String>,
        children: Vec<Node>,
    },
    Variable {
        bits: Vec<String>,
        filters: Vec<DjangoFilter>,
    },
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum DjangoTagKind {
    Autoescape,
    Block,
    Comment,
    CsrfToken,
    Cycle,
    Debug,
    Elif,
    Else,
    Empty,
    Extends,
    Filter,
    FirstOf,
    For,
    If,
    IfChanged,
    Include,
    Load,
    Lorem,
    Now,
    Other(String),
    Querystring, // 5.1
    Regroup,
    ResetCycle,
    Spaceless,
    TemplateTag,
    Url,
    Verbatim,
    WidthRatio,
    With,
 }
 impl DjangoTagKind {
    const AUTOESCAPE: &'static str = "autoescape";
    const BLOCK: &'static str = "block";
    const COMMENT: &'static str = "comment";
    const CSRF_TOKEN: &'static str = "csrf_token";
    const CYCLE: &'static str = "cycle";
    const DEBUG: &'static str = "debug";
    const ELIF: &'static str = "elif";
    const ELSE: &'static str = "else";
    const EMPTY: &'static str = "empty";
    const EXTENDS: &'static str = "extends";
    const FILTER: &'static str = "filter";
    const FIRST_OF: &'static str = "firstof";
    const FOR: &'static str = "for";
    const IF: &'static str = "if";
    const IF_CHANGED: &'static str = "ifchanged";
    const INCLUDE: &'static str = "include";
    const LOAD: &'static str = "load";
    const LOREM: &'static str = "lorem";
    const NOW: &'static str = "now";
    const QUERYSTRING: &'static str = "querystring";
    const REGROUP: &'static str = "regroup";
    const RESET_CYCLE: &'static str = "resetcycle";
    const SPACELESS: &'static str = "spaceless";
    const TEMPLATE_TAG: &'static str = "templatetag";
    const URL: &'static str = "url";
    const VERBATIM: &'static str = "verbatim";
    const WIDTH_RATIO: &'static str = "widthratio";
    const WITH: &'static str = "with";
 }
 impl FromStr for DjangoTagKind {
    type Err = AstError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if s.is_empty() {
            return Err(AstError::EmptyTag);
        }
        match s {
            Self::AUTOESCAPE => Ok(Self::Autoescape),
            Self::BLOCK => Ok(Self::Block),
            Self::COMMENT => Ok(Self::Comment),
            Self::CSRF_TOKEN => Ok(Self::CsrfToken),
            Self::CYCLE => Ok(Self::Cycle),
            Self::DEBUG => Ok(Self::Debug),
            Self::ELIF => Ok(Self::Elif),
            Self::ELSE => Ok(Self::Else),
            Self::EMPTY => Ok(Self::Empty),
            Self::EXTENDS => Ok(Self::Extends),
            Self::FILTER => Ok(Self::Filter),
            Self::FIRST_OF => Ok(Self::FirstOf),
            Self::FOR => Ok(Self::For),
            Self::IF => Ok(Self::If),
            Self::IF_CHANGED => Ok(Self::IfChanged),
            Self::INCLUDE => Ok(Self::Include),
            Self::LOAD => Ok(Self::Load),
            Self::LOREM => Ok(Self::Lorem),
            Self::NOW => Ok(Self::Now),
            Self::QUERYSTRING => Ok(Self::Querystring),
            Self::REGROUP => Ok(Self::Regroup),
            Self::RESET_CYCLE => Ok(Self::ResetCycle),
            Self::SPACELESS => Ok(Self::Spaceless),
            Self::TEMPLATE_TAG => Ok(Self::TemplateTag),
            Self::URL => Ok(Self::Url),
            Self::VERBATIM => Ok(Self::Verbatim),
            Self::WIDTH_RATIO => Ok(Self::WidthRatio),
            Self::WITH => Ok(Self::With),
            other => Ok(Self::Other(other.to_string())),
        }
    }
 }
 #[derive(Clone, Debug, Serialize)]
 pub struct DjangoFilter {
    name: String,
    arguments: Vec<String>,
 }
 impl DjangoFilter {
    pub fn new(name: String, arguments: Vec<String>) -> Self {
        Self { name, arguments }
    }
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum HtmlNode {
    Comment(String),
    Doctype(String),
    Element {
        tag_name: String,
        attributes: Attributes,
        children: Vec<Node>,
    },
    Void {
        tag_name: String,
        attributes: Attributes,
    },
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum ScriptNode {
    Comment {
        content: String,
        kind: ScriptCommentKind,
    },
    Element {
        attributes: Attributes,
        children: Vec<Node>,
    },
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum ScriptCommentKind {
    SingleLine, // //
    MultiLine,  // /* */
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum StyleNode {
    Comment(String),
    Element {
        attributes: Attributes,
        children: Vec<Node>,
    },
 }
 #[derive(Clone, Debug, Serialize)]
 pub enum AttributeValue {
    Value(String),
    Boolean,
 }
 pub type Attributes = BTreeMap<String, AttributeValue>;
 #[derive(Error, Debug)]
 pub enum AstError {
    #[error("error parsing django tag, recieved empty tag name")]
    EmptyTag,
    #[error("empty ast")]
    EmptyAst,
 }
--- a/crates/djls-ast/src/lexer.rs
+++ b/crates/djls-ast/src/lexer.rs
@ -0,0 +1,414 @@
 use crate::tokens::{Token, TokenStream, TokenType};
 use thiserror::Error;
 pub struct Lexer {
    source: String,
    chars: Vec<char>,
    start: usize,
    current: usize,
    line: usize,
 }
 impl Lexer {
    pub fn new(source: &str) -> Self {
        Lexer {
            source: String::from(source),
            chars: source.chars().collect(),
            start: 0,
            current: 0,
            line: 1,
        }
    }
    pub fn tokenize(&mut self) -> Result<TokenStream, LexerError> {
        let mut tokens = TokenStream::default();
        while !self.is_at_end() {
            let token = self.next_token()?;
            tokens.add_token(token);
        }
        tokens.finalize(self.line);
        Ok(tokens)
    }
    fn next_token(&mut self) -> Result<Token, LexerError> {
        self.start = self.current;
        let token_type = match self.peek()? {
            '{' => match self.peek_next()? {
                '%' => {
                    self.consume_n(2)?; // {%
                    let content = self.consume_until("%}")?;
                    self.consume_n(2)?; // %}
                    TokenType::DjangoBlock(content)
                }
                '{' => {
                    self.consume_n(2)?; // {{
                    let content = self.consume_until("}}")?;
                    self.consume_n(2)?; // }}
                    TokenType::DjangoVariable(content)
                }
                '#' => {
                    self.consume_n(2)?; // {#
                    let content = self.consume_until("#}")?;
                    self.consume_n(2)?; // #}
                    TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
                }
                _ => {
                    self.consume()?; // {
                    TokenType::Text(String::from("{"))
                }
            },
            '<' => match self.peek_next()? {
                '/' => {
                    self.consume_n(2)?; // </
                    let tag = self.consume_until(">")?;
                    self.consume()?; // >
                    TokenType::HtmlTagClose(tag)
                }
                '!' if self.matches("<!--")? => {
                    self.consume_n(4)?; // <!--
                    let content = self.consume_until("-->")?;
                    self.consume_n(3)?; // -->
                    TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
                }
                _ => {
                    self.consume()?; // consume <
                    let tag = self.consume_until(">")?;
                    self.consume()?; // consume >
                    if tag.starts_with("script") {
                        TokenType::ScriptTagOpen(tag)
                    } else if tag.starts_with("style") {
                        TokenType::StyleTagOpen(tag)
                    } else if tag.ends_with("/") {
                        TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
                    } else {
                        TokenType::HtmlTagOpen(tag)
                    }
                }
            },
            '/' => match self.peek_next()? {
                '/' => {
                    self.consume_n(2)?; // //
                    let content = self.consume_until("\n")?;
                    TokenType::Comment(content, "//".to_string(), None)
                }
                '*' => {
                    self.consume_n(2)?; // /*
                    let content = self.consume_until("*/")?;
                    self.consume_n(2)?; // */
                    TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
                }
                _ => {
                    self.consume()?;
                    TokenType::Text("/".to_string())
                }
            },
            c if c.is_whitespace() => {
                if c == '\n' || c == '\r' {
                    self.consume()?; // \r or \n
                    if c == '\r' && self.peek()? == '\n' {
                        self.consume()?; // \n of \r\n
                    }
                    TokenType::Newline
                } else {
                    self.consume()?; // Consume the first whitespace
                    while !self.is_at_end() && self.peek()?.is_whitespace() {
                        if self.peek()? == '\n' || self.peek()? == '\r' {
                            break;
                        }
                        self.consume()?;
                    }
                    let whitespace_count = self.current - self.start;
                    TokenType::Whitespace(whitespace_count)
                }
            }
            _ => {
                let mut text = String::new();
                while !self.is_at_end() {
                    let c = self.peek()?;
                    if c == '{' || c == '<' || c == '\n' {
                        break;
                    }
                    text.push(c);
                    self.consume()?;
                }
                TokenType::Text(text)
            }
        };
        let token = Token::new(token_type, self.line, Some(self.start));
        match self.peek_previous()? {
            '\n' => self.line += 1,
            '\r' => {
                self.line += 1;
                if self.peek()? == '\n' {
                    self.current += 1;
                }
            }
            _ => {}
        }
        Ok(token)
    }
    fn peek(&self) -> Result<char, LexerError> {
        self.peek_at(0)
    }
    fn peek_next(&self) -> Result<char, LexerError> {
        self.peek_at(1)
    }
    fn peek_previous(&self) -> Result<char, LexerError> {
        self.peek_at(-1)
    }
    fn peek_until(&self, end: &str) -> Result<bool, LexerError> {
        let mut index = self.current;
        let end_chars: Vec<char> = end.chars().collect();
        while index < self.chars.len() {
            if self.chars[index..].starts_with(&end_chars) {
                return Ok(true);
            }
            index += 1;
        }
        Ok(false)
    }
    fn peek_at(&self, offset: isize) -> Result<char, LexerError> {
        let index = self.current as isize + offset;
        self.item_at(index as usize)
    }
    fn item_at(&self, index: usize) -> Result<char, LexerError> {
        if index >= self.source.len() {
            // Return a null character when past the end, a bit of a departure from
            // idiomatic Rust code, but makes writing the matching above and testing
            // much easier
            Ok('\0')
        } else {
            Ok(self.source.chars().nth(index).unwrap())
        }
    }
    fn matches(&mut self, pattern: &str) -> Result<bool, LexerError> {
        let mut i = self.current;
        for c in pattern.chars() {
            if i >= self.chars.len() || self.chars[i] != c {
                return Ok(false);
            }
            i += 1;
        }
        Ok(true)
    }
    fn is_at_end(&self) -> bool {
        self.current >= self.source.len()
    }
    fn consume(&mut self) -> Result<char, LexerError> {
        if self.is_at_end() {
            return Err(LexerError::AtEndOfSource);
        }
        self.current += 1;
        self.peek_previous()
    }
    fn consume_n(&mut self, count: usize) -> Result<String, LexerError> {
        let start = self.current;
        for _ in 0..count {
            self.consume()?;
        }
        Ok(self.source[start..self.current].trim().to_string())
    }
    fn consume_chars(&mut self, s: &str) -> Result<char, LexerError> {
        for c in s.chars() {
            if c != self.peek()? {
                return Err(LexerError::UnexpectedCharacter(c, self.line));
            }
            self.consume()?;
        }
        self.peek_previous()
    }
    fn consume_until(&mut self, s: &str) -> Result<String, LexerError> {
        let start = self.current;
        while !self.is_at_end() {
            if self.chars[self.current..self.chars.len()]
                .starts_with(s.chars().collect::<Vec<_>>().as_slice())
            {
                return Ok(self.source[start..self.current].trim().to_string());
            }
            self.consume()?;
        }
        Err(LexerError::UnexpectedEndOfInput)
    }
 }
 #[derive(Error, Debug)]
 pub enum LexerError {
    #[error("empty token at line {0}")]
    EmptyToken(usize),
    #[error("unexpected character '{0}' at line {1}")]
    UnexpectedCharacter(char, usize),
    #[error("unexpected end of input")]
    UnexpectedEndOfInput,
    #[error("source is empty")]
    EmptySource,
    #[error("at beginning of source")]
    AtBeginningOfSource,
    #[error("at end of source")]
    AtEndOfSource,
    #[error("invalid character access")]
    InvalidCharacterAccess,
    #[error("unexpected token type '{0:?}'")]
    UnexpectedTokenType(TokenType),
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_tokenize_html() {
        let source = r#"<div class="container" id="main" disabled></div>"#;
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_django_variable() {
        let source = "{{ user.name|default:\"Anonymous\"|title }}";
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_django_block() {
        let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}";
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_comments() {
        let source = r#"<!-- HTML comment -->
 {# Django comment #}
 <script>
    // JS single line comment
    /* JS multi-line
       comment */
 </script>
 <style>
    /* CSS comment */
 </style>"#;
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_script() {
        let source = r#"<script type="text/javascript">
    // Single line comment
    const x = 1;
    /* Multi-line
       comment */
    console.log(x);
 </script>"#;
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_style() {
        let source = r#"<style type="text/css">
    /* Header styles */
    .header {
        color: blue;
    }
 </style>"#;
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
    #[test]
    fn test_tokenize_error_cases() {
        // Unterminated tokens
        assert!(Lexer::new("{{ user.name").tokenize().is_err()); // No closing }}
        assert!(Lexer::new("{% if").tokenize().is_err()); // No closing %}
        assert!(Lexer::new("{#").tokenize().is_err()); // No closing #}
        assert!(Lexer::new("<div").tokenize().is_err()); // No closing >
        // Invalid characters or syntax within tokens
        assert!(Lexer::new("{{}}").tokenize().is_ok()); // Empty but valid
        assert!(Lexer::new("{%  %}").tokenize().is_ok()); // Empty but valid
        assert!(Lexer::new("{##}").tokenize().is_ok()); // Empty but valid
    }
    #[test]
    fn test_tokenize_nested_delimiters() {
        let source = r#"{{ user.name }}
 {% if true %}
 {# comment #}
 <!-- html comment -->
 <div>text</div>"#;
        assert!(Lexer::new(source).tokenize().is_ok());
    }
    #[test]
    fn test_tokenize_everything() {
        let source = r#"<!DOCTYPE html>
 <html>
 <head>
    <style type="text/css">
        /* Style header */
        .header { color: blue; }
    </style>
    <script type="text/javascript">
        // Init app
        const app = {
            /* Config */
            debug: true
        };
    </script>
 </head>
 <body>
    <!-- Header section -->
    <div class="header" id="main" data-value="123" disabled>
        {% if user.is_authenticated %}
            {# Welcome message #}
            <h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
            {% if user.is_staff %}
                <span>Admin</span>
            {% else %}
                <span>User</span>
            {% endif %}
        {% endif %}
    </div>
 </body>
 </html>"#;
        let mut lexer = Lexer::new(source);
        let tokens = lexer.tokenize().unwrap();
        insta::assert_yaml_snapshot!(tokens);
    }
 }
--- a/crates/djls-ast/src/lib.rs
+++ b/crates/djls-ast/src/lib.rs
@ -0,0 +1,4 @@
 mod ast;
 mod lexer;
 mod parser;
 mod tokens;
--- a/crates/djls-ast/src/parser.rs
+++ b/crates/djls-ast/src/parser.rs
@ -0,0 +1,638 @@
 use crate::ast::{
    Ast, AstError, AttributeValue, DjangoFilter, DjangoNode, DjangoTagKind, HtmlNode, Node,
    ScriptCommentKind, ScriptNode, StyleNode,
 };
 use crate::tokens::{Token, TokenStream, TokenType};
 use std::collections::BTreeMap;
 use std::str::FromStr;
 use thiserror::Error;
 pub struct Parser {
    tokens: TokenStream,
    current: usize,
 }
 impl Parser {
    pub fn new(tokens: TokenStream) -> Self {
        Parser { tokens, current: 0 }
    }
    pub fn parse(&mut self) -> Result<Ast, ParserError> {
        let mut ast = Ast::default();
        while !self.is_at_end() {
            let node = self.next_node();
            match node {
                Ok(node) => {
                    ast.add_node(node);
                }
                Err(ParserError::AtEndOfStream) => {
                    if ast.nodes().is_empty() {
                        return Err(ParserError::UnexpectedEof);
                    }
                    break;
                }
                Err(_) => {
                    self.synchronize(&[
                        TokenType::DjangoBlock(String::new()),
                        TokenType::HtmlTagOpen(String::new()),
                        TokenType::HtmlTagVoid(String::new()),
                        TokenType::ScriptTagOpen(String::new()),
                        TokenType::StyleTagOpen(String::new()),
                        TokenType::Newline,
                        TokenType::Eof,
                    ])?;
                    continue;
                }
            }
        }
        Ok(ast.finalize()?)
    }
    fn next_node(&mut self) -> Result<Node, ParserError> {
        let token = self.peek()?;
        let node = match token.token_type() {
            TokenType::Comment(s, start, end) => self.parse_comment(s, start, end.as_deref()),
            TokenType::DjangoBlock(s) => self.parse_django_block(s),
            TokenType::DjangoVariable(s) => self.parse_django_variable(s),
            TokenType::Eof => self.parse_eof(),
            TokenType::HtmlTagClose(tag) => Err(ParserError::ClosingTagFound(tag.to_string())),
            TokenType::HtmlTagOpen(s) => self.parse_html_tag_open(s),
            TokenType::HtmlTagVoid(s) => self.parse_html_tag_void(s),
            TokenType::Newline => self.parse_newline(),
            TokenType::ScriptTagOpen(s) => self.parse_script_tag_open(s),
            TokenType::ScriptTagClose(_) => Err(ParserError::ClosingTagFound("script".to_string())),
            TokenType::StyleTagOpen(s) => self.parse_style_tag_open(s),
            TokenType::StyleTagClose(_) => Err(ParserError::ClosingTagFound("style".to_string())),
            TokenType::Text(s) => self.parse_text(s),
            TokenType::Whitespace(_) => self.parse_whitespace(),
        }?;
        Ok(node)
    }
    fn parse_comment(
        &mut self,
        content: &str,
        start: &str,
        end: Option<&str>,
    ) -> Result<Node, ParserError> {
        self.consume()?;
        match start {
            "{#" => Ok(Node::Django(DjangoNode::Comment(content.to_string()))),
            "<!--" => Ok(Node::Html(HtmlNode::Comment(content.to_string()))),
            "//" => Ok(Node::Script(ScriptNode::Comment {
                content: content.to_string(),
                kind: ScriptCommentKind::SingleLine,
            })),
            "/*" => {
                // Look back for script/style context
                let token_type = self
                    .peek_back(self.current)?
                    .iter()
                    .find_map(|token| match token.token_type() {
                        TokenType::ScriptTagOpen(_) => {
                            Some(TokenType::ScriptTagOpen(String::new()))
                        }
                        TokenType::StyleTagOpen(_) => Some(TokenType::StyleTagOpen(String::new())),
                        TokenType::ScriptTagClose(_) | TokenType::StyleTagClose(_) => None,
                        _ => None,
                    })
                    .ok_or(ParserError::InvalidMultLineComment)?;
                match token_type {
                    TokenType::ScriptTagOpen(_) => Ok(Node::Script(ScriptNode::Comment {
                        content: content.to_string(),
                        kind: ScriptCommentKind::MultiLine,
                    })),
                    TokenType::StyleTagOpen(_) => {
                        Ok(Node::Style(StyleNode::Comment(content.to_string())))
                    }
                    _ => unreachable!(),
                }
            }
            _ => Err(ParserError::UnexpectedToken(Token::new(
                TokenType::Comment(
                    content.to_string(),
                    start.to_string(),
                    end.map(String::from),
                ),
                0,
                None,
            ))),
        }
    }
    fn parse_django_block(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let bits: Vec<String> = s.split_whitespace().map(String::from).collect();
        let kind = DjangoTagKind::from_str(&bits[0])?;
        // If this is an end tag, signal it like we do with HTML closing tags
        if bits[0].starts_with("end") {
            return Err(ParserError::ClosingTagFound(bits[0].clone()));
        }
        let mut children = Vec::new();
        let end_tag = format!("end{}", bits[0]);
        while !self.is_at_end() {
            match self.next_node() {
                Ok(node) => {
                    println!("found django child node: {:?}", node);
                    children.push(node);
                }
                Err(ParserError::ClosingTagFound(tag)) => {
                    if tag == end_tag {
                        self.consume()?;
                        break;
                    }
                    // If it's not our end tag, keep collecting children
                }
                Err(e) => return Err(e),
            }
        }
        Ok(Node::Django(DjangoNode::Tag {
            kind,
            bits,
            children,
        }))
    }
    fn parse_django_variable(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let parts: Vec<&str> = s.split('|').collect();
        let bits: Vec<String> = parts[0].trim().split('.').map(String::from).collect();
        let filters: Vec<DjangoFilter> = parts[1..]
            .iter()
            .map(|filter_str| {
                let filter_parts: Vec<&str> = filter_str.trim().split(':').collect();
                let name = filter_parts[0].to_string();
                let arguments = if filter_parts.len() > 1 {
                    filter_parts[1]
                        .trim_matches('"')
                        .split(',')
                        .map(|arg| arg.trim().to_string())
                        .collect()
                } else {
                    Vec::new()
                };
                DjangoFilter::new(name, arguments)
            })
            .collect();
        Ok(Node::Django(DjangoNode::Variable { bits, filters }))
    }
    fn parse_eof(&mut self) -> Result<Node, ParserError> {
        if self.is_at_end() {
            self.consume()?;
            self.next_node()
        } else {
            Err(ParserError::UnexpectedEof)
        }
    }
    fn parse_html_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let mut parts = s.split_whitespace();
        let tag_name = parts
            .next()
            .ok_or(ParserError::InvalidTokenAccess)?
            .to_string();
        let mut attributes = BTreeMap::new();
        for attr in parts {
            if let Some((key, value)) = attr.split_once('=') {
                // Key-value attribute (class="container")
                attributes.insert(
                    key.to_string(),
                    AttributeValue::Value(value.trim_matches('"').to_string()),
                );
            } else {
                // Boolean attribute (disabled)
                attributes.insert(attr.to_string(), AttributeValue::Boolean);
            }
        }
        let mut children = Vec::new();
        while !self.is_at_end() {
            match self.next_node() {
                Ok(node) => {
                    children.push(node);
                }
                Err(ParserError::ClosingTagFound(tag)) => {
                    if tag == tag_name {
                        self.consume()?;
                        break;
                    }
                }
                Err(e) => return Err(e),
            }
        }
        Ok(Node::Html(HtmlNode::Element {
            tag_name,
            attributes,
            children,
        }))
    }
    fn parse_html_tag_void(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let mut parts = s.split_whitespace();
        let tag_name = parts
            .next()
            .ok_or(ParserError::InvalidTokenAccess)?
            .to_string();
        let mut attributes = BTreeMap::new();
        for attr in parts {
            if let Some((key, value)) = attr.split_once('=') {
                attributes.insert(
                    key.to_string(),
                    AttributeValue::Value(value.trim_matches('"').to_string()),
                );
            } else {
                attributes.insert(attr.to_string(), AttributeValue::Boolean);
            }
        }
        Ok(Node::Html(HtmlNode::Void {
            tag_name,
            attributes,
        }))
    }
    fn parse_newline(&mut self) -> Result<Node, ParserError> {
        self.consume()?;
        self.next_node()
    }
    fn parse_script_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let parts = s.split_whitespace();
        let mut attributes = BTreeMap::new();
        for attr in parts {
            if let Some((key, value)) = attr.split_once('=') {
                attributes.insert(
                    key.to_string(),
                    AttributeValue::Value(value.trim_matches('"').to_string()),
                );
            } else {
                attributes.insert(attr.to_string(), AttributeValue::Boolean);
            }
        }
        let mut children = Vec::new();
        while !self.is_at_end() {
            match self.next_node() {
                Ok(node) => {
                    children.push(node);
                }
                Err(ParserError::ClosingTagFound(tag)) => {
                    if tag == "script" {
                        self.consume()?;
                        break;
                    }
                    // If it's not our closing tag, keep collecting children
                }
                Err(e) => return Err(e),
            }
        }
        Ok(Node::Script(ScriptNode::Element {
            attributes,
            children,
        }))
    }
    fn parse_style_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        let mut parts = s.split_whitespace();
        let _tag_name = parts
            .next()
            .ok_or(ParserError::InvalidTokenAccess)?
            .to_string();
        let mut attributes = BTreeMap::new();
        for attr in parts {
            if let Some((key, value)) = attr.split_once('=') {
                attributes.insert(
                    key.to_string(),
                    AttributeValue::Value(value.trim_matches('"').to_string()),
                );
            } else {
                attributes.insert(attr.to_string(), AttributeValue::Boolean);
            }
        }
        let mut children = Vec::new();
        while !self.is_at_end() {
            match self.next_node() {
                Ok(node) => {
                    children.push(node);
                }
                Err(ParserError::ClosingTagFound(tag)) => {
                    if tag == "style" {
                        self.consume()?;
                        break;
                    }
                    // If it's not our closing tag, keep collecting children
                }
                Err(e) => return Err(e),
            }
        }
        Ok(Node::Style(StyleNode::Element {
            attributes,
            children,
        }))
    }
    fn parse_text(&mut self, s: &str) -> Result<Node, ParserError> {
        self.consume()?;
        Ok(Node::Text(s.to_string()))
    }
    fn parse_whitespace(&mut self) -> Result<Node, ParserError> {
        self.consume()?;
        self.next_node()
    }
    fn peek(&self) -> Result<Token, ParserError> {
        self.peek_at(0)
    }
    fn peek_next(&self) -> Result<Token, ParserError> {
        self.peek_at(1)
    }
    fn peek_previous(&self) -> Result<Token, ParserError> {
        self.peek_at(-1)
    }
    fn peek_forward(&self, steps: usize) -> Result<Vec<Token>, ParserError> {
        (0..steps).map(|i| self.peek_at(i as isize)).collect()
    }
    fn peek_back(&self, steps: usize) -> Result<Vec<Token>, ParserError> {
        (1..=steps).map(|i| self.peek_at(-(i as isize))).collect()
    }
    fn peek_at(&self, offset: isize) -> Result<Token, ParserError> {
        let index = self.current as isize + offset;
        self.item_at(index as usize)
    }
    fn item_at(&self, index: usize) -> Result<Token, ParserError> {
        if let Some(token) = self.tokens.get(index) {
            Ok(token.clone())
        } else {
            let error = if self.tokens.is_empty() {
                ParserError::EmptyTokenStream
            } else if index < self.current {
                ParserError::AtBeginningOfStream
            } else if index >= self.tokens.len() {
                ParserError::AtEndOfStream
            } else {
                ParserError::InvalidTokenAccess
            };
            Err(error)
        }
    }
    fn is_at_end(&self) -> bool {
        self.current + 1 >= self.tokens.len()
    }
    fn consume(&mut self) -> Result<Token, ParserError> {
        if self.is_at_end() {
            return Err(ParserError::AtEndOfStream);
        }
        self.current += 1;
        self.peek_previous()
    }
    fn backtrack(&mut self, steps: usize) -> Result<Token, ParserError> {
        if self.current < steps {
            return Err(ParserError::AtBeginningOfStream);
        }
        self.current -= steps;
        self.peek_next()
    }
    fn lookahead(&self, types: &[TokenType]) -> Result<bool, ParserError> {
        for (i, t) in types.iter().enumerate() {
            if !self.peek_at(i as isize)?.is_token_type(t) {
                return Ok(false);
            }
        }
        Ok(true)
    }
    fn consume_if(&mut self, token_type: TokenType) -> Result<Token, ParserError> {
        let token = self.peek()?;
        if token.is_token_type(&token_type) {
            return Err(ParserError::ExpectedTokenType(
                self.peek()?.clone(),
                token_type,
            ));
        }
        self.consume()?;
        Ok(token)
    }
    fn consume_until(&mut self, end_type: TokenType) -> Result<Vec<Token>, ParserError> {
        let mut consumed = Vec::new();
        while !self.is_at_end() && self.peek()?.is_token_type(&end_type) {
            let token = self.consume()?;
            consumed.push(token);
        }
        Ok(consumed)
    }
    fn synchronize(&mut self, sync_types: &[TokenType]) -> Result<(), ParserError> {
        while !self.is_at_end() {
            if sync_types.contains(self.peek()?.token_type()) {
                return Ok(());
            }
            self.consume()?;
        }
        Ok(())
    }
 }
 #[derive(Error, Debug)]
 pub enum ParserError {
    #[error("token stream is empty")]
    EmptyTokenStream,
    #[error("at beginning of token stream")]
    AtBeginningOfStream,
    #[error("at end of token stream")]
    AtEndOfStream,
    #[error("invalid token access")]
    InvalidTokenAccess,
    #[error("unexpected token '{0:?}', expected type '{1:?}'")]
    ExpectedTokenType(Token, TokenType),
    #[error("unexpected token '{0:?}'")]
    UnexpectedToken(Token),
    #[error("unexpected end tag: {0}")]
    UnexpectedEndTag(String),
    #[error("multi-line comment outside of script or style context")]
    InvalidMultLineComment,
    #[error("unexpected end of file")]
    UnexpectedEof,
    #[error("found closing tag: {0}")]
    ClosingTagFound(String),
    #[error(transparent)]
    Node(#[from] AstError),
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::lexer::Lexer;
    #[test]
    fn test_parse_comments() {
        let source = r#"<!-- HTML comment -->
 {# Django comment #}
 <script>
    // JS single line
    /* JS multi
        line */
 </script>
 <style>
    /* CSS comment */
 </style>"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_django_block() {
        let source = r#"{% if user.is_staff %}Admin{% else %}User{% endif %}"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_django_variable() {
        let source = r#"{{ user.name|default:"Anonymous"|title }}"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_html_tag() {
        let source = r#"<div class="container" id="main" disabled></div>"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_script() {
        let source = r#"<script type="text/javascript">
    // Single line comment
    const x = 1;
    /* Multi-line
        comment */
    console.log(x);
 </script>"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_style() {
        let source = r#"<style type="text/css">
    /* Header styles */
    .header {
        color: blue;
    }
 </style>"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    fn test_parse_full() {
        let source = r#"<!DOCTYPE html>
 <html>
    <head>
        <style type="text/css">
            /* Style header */
            .header { color: blue; }
        </style>
        <script type="text/javascript">
            // Init app
            const app = {
                /* Config */
                debug: true
            };
        </script>
    </head>
    <body>
        <!-- Header section -->
        <div class="header" id="main" data-value="123" disabled>
            {% if user.is_authenticated %}
                {# Welcome message #}
                <h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
                {% if user.is_staff %}
                    <span>Admin</span>
                {% else %}
                    <span>User</span>
                {% endif %}
            {% endif %}
        </div>
    </body>
 </html>"#;
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse().unwrap();
        insta::assert_yaml_snapshot!(ast);
    }
    #[test]
    fn test_parse_unexpected_eof() {
        let source = "<div>\n";
        let tokens = Lexer::new(source).tokenize().unwrap();
        let mut parser = Parser::new(tokens);
        let ast = parser.parse();
        assert!(matches!(ast, Err(ParserError::UnexpectedEof)));
    }
 }
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_comments.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_comments.snap
@ -0,0 +1,94 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    Comment:
      - HTML comment
      - "<!--"
      - "-->"
  line: 1
  start: 0
 - token_type: Newline
  line: 1
  start: 21
 - token_type:
    Comment:
      - Django comment
      - "{#"
      - "#}"
  line: 2
  start: 22
 - token_type: Newline
  line: 2
  start: 42
 - token_type:
    ScriptTagOpen: script
  line: 3
  start: 43
 - token_type: Newline
  line: 3
  start: 51
 - token_type:
    Whitespace: 4
  line: 4
  start: 52
 - token_type:
    Comment:
      - JS single line comment
      - //
      - ~
  line: 4
  start: 56
 - token_type: Newline
  line: 4
  start: 81
 - token_type:
    Whitespace: 4
  line: 5
  start: 82
 - token_type:
    Comment:
      - "JS multi-line\n       comment"
      - /*
      - "*/"
  line: 5
  start: 86
 - token_type: Newline
  line: 5
  start: 120
 - token_type:
    HtmlTagClose: script
  line: 6
  start: 121
 - token_type: Newline
  line: 6
  start: 130
 - token_type:
    StyleTagOpen: style
  line: 7
  start: 131
 - token_type: Newline
  line: 7
  start: 138
 - token_type:
    Whitespace: 4
  line: 8
  start: 139
 - token_type:
    Comment:
      - CSS comment
      - /*
      - "*/"
  line: 8
  start: 143
 - token_type: Newline
  line: 8
  start: 160
 - token_type:
    HtmlTagClose: style
  line: 9
  start: 161
 - token_type: Eof
  line: 9
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_django_block.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_django_block.snap
@ -0,0 +1,27 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    DjangoBlock: if user.is_staff
  line: 1
  start: 0
 - token_type:
    Text: Admin
  line: 1
  start: 22
 - token_type:
    DjangoBlock: else
  line: 1
  start: 27
 - token_type:
    Text: User
  line: 1
  start: 37
 - token_type:
    DjangoBlock: endif
  line: 1
  start: 41
 - token_type: Eof
  line: 1
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_django_variable.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_django_variable.snap
@ -0,0 +1,11 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    DjangoVariable: "user.name|default:\"Anonymous\"|title"
  line: 1
  start: 0
 - token_type: Eof
  line: 1
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_everything.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_everything.snap
@ -0,0 +1,369 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    HtmlTagOpen: "!DOCTYPE html"
  line: 1
  start: 0
 - token_type: Newline
  line: 1
  start: 15
 - token_type:
    HtmlTagOpen: html
  line: 2
  start: 16
 - token_type: Newline
  line: 2
  start: 22
 - token_type:
    HtmlTagOpen: head
  line: 3
  start: 23
 - token_type: Newline
  line: 3
  start: 29
 - token_type:
    Whitespace: 4
  line: 4
  start: 30
 - token_type:
    StyleTagOpen: "style type=\"text/css\""
  line: 4
  start: 34
 - token_type: Newline
  line: 4
  start: 57
 - token_type:
    Whitespace: 8
  line: 5
  start: 58
 - token_type:
    Comment:
      - Style header
      - /*
      - "*/"
  line: 5
  start: 66
 - token_type: Newline
  line: 5
  start: 84
 - token_type:
    Whitespace: 8
  line: 6
  start: 85
 - token_type:
    Text: ".header "
  line: 6
  start: 93
 - token_type:
    Text: "{"
  line: 6
  start: 101
 - token_type:
    Whitespace: 1
  line: 6
  start: 102
 - token_type:
    Text: "color: blue; }"
  line: 6
  start: 103
 - token_type: Newline
  line: 6
  start: 117
 - token_type:
    Whitespace: 4
  line: 7
  start: 118
 - token_type:
    HtmlTagClose: style
  line: 7
  start: 122
 - token_type: Newline
  line: 7
  start: 130
 - token_type:
    Whitespace: 4
  line: 8
  start: 131
 - token_type:
    ScriptTagOpen: "script type=\"text/javascript\""
  line: 8
  start: 135
 - token_type: Newline
  line: 8
  start: 166
 - token_type:
    Whitespace: 8
  line: 9
  start: 167
 - token_type:
    Comment:
      - Init app
      - //
      - ~
  line: 9
  start: 175
 - token_type: Newline
  line: 9
  start: 186
 - token_type:
    Whitespace: 8
  line: 10
  start: 187
 - token_type:
    Text: "const app = "
  line: 10
  start: 195
 - token_type:
    Text: "{"
  line: 10
  start: 207
 - token_type: Newline
  line: 10
  start: 208
 - token_type:
    Whitespace: 12
  line: 11
  start: 209
 - token_type:
    Comment:
      - Config
      - /*
      - "*/"
  line: 11
  start: 221
 - token_type: Newline
  line: 11
  start: 233
 - token_type:
    Whitespace: 12
  line: 12
  start: 234
 - token_type:
    Text: "debug: true"
  line: 12
  start: 246
 - token_type: Newline
  line: 12
  start: 257
 - token_type:
    Whitespace: 8
  line: 13
  start: 258
 - token_type:
    Text: "};"
  line: 13
  start: 266
 - token_type: Newline
  line: 13
  start: 268
 - token_type:
    Whitespace: 4
  line: 14
  start: 269
 - token_type:
    HtmlTagClose: script
  line: 14
  start: 273
 - token_type: Newline
  line: 14
  start: 282
 - token_type:
    HtmlTagClose: head
  line: 15
  start: 283
 - token_type: Newline
  line: 15
  start: 290
 - token_type:
    HtmlTagOpen: body
  line: 16
  start: 291
 - token_type: Newline
  line: 16
  start: 297
 - token_type:
    Whitespace: 4
  line: 17
  start: 298
 - token_type:
    Comment:
      - Header section
      - "<!--"
      - "-->"
  line: 17
  start: 302
 - token_type: Newline
  line: 17
  start: 325
 - token_type:
    Whitespace: 4
  line: 18
  start: 326
 - token_type:
    HtmlTagOpen: "div class=\"header\" id=\"main\" data-value=\"123\" disabled"
  line: 18
  start: 330
 - token_type: Newline
  line: 18
  start: 386
 - token_type:
    Whitespace: 8
  line: 19
  start: 387
 - token_type:
    DjangoBlock: if user.is_authenticated
  line: 19
  start: 395
 - token_type: Newline
  line: 19
  start: 425
 - token_type:
    Whitespace: 12
  line: 20
  start: 426
 - token_type:
    Comment:
      - Welcome message
      - "{#"
      - "#}"
  line: 20
  start: 438
 - token_type: Newline
  line: 20
  start: 459
 - token_type:
    Whitespace: 12
  line: 21
  start: 460
 - token_type:
    HtmlTagOpen: h1
  line: 21
  start: 472
 - token_type:
    Text: "Welcome, "
  line: 21
  start: 476
 - token_type:
    DjangoVariable: "user.name|default:\"Guest\"|title"
  line: 21
  start: 485
 - token_type:
    Text: "!"
  line: 21
  start: 522
 - token_type:
    HtmlTagClose: h1
  line: 21
  start: 523
 - token_type: Newline
  line: 21
  start: 528
 - token_type:
    Whitespace: 12
  line: 22
  start: 529
 - token_type:
    DjangoBlock: if user.is_staff
  line: 22
  start: 541
 - token_type: Newline
  line: 22
  start: 563
 - token_type:
    Whitespace: 16
  line: 23
  start: 564
 - token_type:
    HtmlTagOpen: span
  line: 23
  start: 580
 - token_type:
    Text: Admin
  line: 23
  start: 586
 - token_type:
    HtmlTagClose: span
  line: 23
  start: 591
 - token_type: Newline
  line: 23
  start: 598
 - token_type:
    Whitespace: 12
  line: 24
  start: 599
 - token_type:
    DjangoBlock: else
  line: 24
  start: 611
 - token_type: Newline
  line: 24
  start: 621
 - token_type:
    Whitespace: 16
  line: 25
  start: 622
 - token_type:
    HtmlTagOpen: span
  line: 25
  start: 638
 - token_type:
    Text: User
  line: 25
  start: 644
 - token_type:
    HtmlTagClose: span
  line: 25
  start: 648
 - token_type: Newline
  line: 25
  start: 655
 - token_type:
    Whitespace: 12
  line: 26
  start: 656
 - token_type:
    DjangoBlock: endif
  line: 26
  start: 668
 - token_type: Newline
  line: 26
  start: 679
 - token_type:
    Whitespace: 8
  line: 27
  start: 680
 - token_type:
    DjangoBlock: endif
  line: 27
  start: 688
 - token_type: Newline
  line: 27
  start: 699
 - token_type:
    Whitespace: 4
  line: 28
  start: 700
 - token_type:
    HtmlTagClose: div
  line: 28
  start: 704
 - token_type: Newline
  line: 28
  start: 710
 - token_type:
    HtmlTagClose: body
  line: 29
  start: 711
 - token_type: Newline
  line: 29
  start: 718
 - token_type:
    HtmlTagClose: html
  line: 30
  start: 719
 - token_type: Eof
  line: 30
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_html.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_html.snap
@ -0,0 +1,15 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    HtmlTagOpen: "div class=\"container\" id=\"main\" disabled"
  line: 1
  start: 0
 - token_type:
    HtmlTagClose: div
  line: 1
  start: 42
 - token_type: Eof
  line: 1
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_script.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_script.snap
@ -0,0 +1,68 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    ScriptTagOpen: "script type=\"text/javascript\""
  line: 1
  start: 0
 - token_type: Newline
  line: 1
  start: 31
 - token_type:
    Whitespace: 4
  line: 2
  start: 32
 - token_type:
    Comment:
      - Single line comment
      - //
      - ~
  line: 2
  start: 36
 - token_type: Newline
  line: 2
  start: 58
 - token_type:
    Whitespace: 4
  line: 3
  start: 59
 - token_type:
    Text: const x = 1;
  line: 3
  start: 63
 - token_type: Newline
  line: 3
  start: 75
 - token_type:
    Whitespace: 4
  line: 4
  start: 76
 - token_type:
    Comment:
      - "Multi-line\n       comment"
      - /*
      - "*/"
  line: 4
  start: 80
 - token_type: Newline
  line: 4
  start: 111
 - token_type:
    Whitespace: 4
  line: 5
  start: 112
 - token_type:
    Text: console.log(x);
  line: 5
  start: 116
 - token_type: Newline
  line: 5
  start: 131
 - token_type:
    HtmlTagClose: script
  line: 6
  start: 132
 - token_type: Eof
  line: 6
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_style.snap
+++ b/crates/djls-ast/src/snapshots/djls_astlexertests__tokenize_style.snap
@ -0,0 +1,69 @@
 ---
 source: crates/djls-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
    StyleTagOpen: "style type=\"text/css\""
  line: 1
  start: 0
 - token_type: Newline
  line: 1
  start: 23
 - token_type:
    Whitespace: 4
  line: 2
  start: 24
 - token_type:
    Comment:
      - Header styles
      - /*
      - "*/"
  line: 2
  start: 28
 - token_type: Newline
  line: 2
  start: 47
 - token_type:
    Whitespace: 4
  line: 3
  start: 48
 - token_type:
    Text: ".header "
  line: 3
  start: 52
 - token_type:
    Text: "{"
  line: 3
  start: 60
 - token_type: Newline
  line: 3
  start: 61
 - token_type:
    Whitespace: 8
  line: 4
  start: 62
 - token_type:
    Text: "color: blue;"
  line: 4
  start: 70
 - token_type: Newline
  line: 4
  start: 82
 - token_type:
    Whitespace: 4
  line: 5
  start: 83
 - token_type:
    Text: "}"
  line: 5
  start: 87
 - token_type: Newline
  line: 5
  start: 88
 - token_type:
    HtmlTagClose: style
  line: 6
  start: 89
 - token_type: Eof
  line: 6
  start: ~
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_comments.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_comments.snap
@ -0,0 +1,28 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Html:
      Comment: HTML comment
  - Django:
      Comment: Django comment
  - Script:
      Element:
        attributes:
          script: Boolean
        children:
          - Script:
              Comment:
                content: JS single line
                kind: SingleLine
          - Script:
              Comment:
                content: "JS multi\n        line"
                kind: MultiLine
  - Style:
      Element:
        attributes: {}
        children:
          - Style:
              Comment: CSS comment
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_django_block.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_django_block.snap
@ -0,0 +1,20 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Django:
      Tag:
        kind: If
        bits:
          - if
          - user.is_staff
        children:
          - Text: Admin
          - Django:
              Tag:
                kind: Else
                bits:
                  - else
                children:
                  - Text: User
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_django_variable.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_django_variable.snap
@ -0,0 +1,16 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Django:
      Variable:
        bits:
          - user
          - name
        filters:
          - name: default
            arguments:
              - Anonymous
          - name: title
            arguments: []
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_html_tag.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_html_tag.snap
@ -0,0 +1,15 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Html:
      Element:
        tag_name: div
        attributes:
          class:
            Value: container
          disabled: Boolean
          id:
            Value: main
        children: []
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_script.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_script.snap
@ -0,0 +1,22 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Script:
      Element:
        attributes:
          script: Boolean
          type:
            Value: text/javascript
        children:
          - Script:
              Comment:
                content: Single line comment
                kind: SingleLine
          - Text: const x = 1;
          - Script:
              Comment:
                content: "Multi-line\n        comment"
                kind: MultiLine
          - Text: console.log(x);
--- a/crates/djls-ast/src/snapshots/djls_astparsertests__parse_style.snap
+++ b/crates/djls-ast/src/snapshots/djls_astparsertests__parse_style.snap
@ -0,0 +1,17 @@
 ---
 source: crates/djls-ast/src/parser.rs
 expression: ast
 ---
 nodes:
  - Style:
      Element:
        attributes:
          type:
            Value: text/css
        children:
          - Style:
              Comment: Header styles
          - Text: ".header "
          - Text: "{"
          - Text: "color: blue;"
          - Text: "}"
--- a/crates/djls-ast/src/tokens.rs
+++ b/crates/djls-ast/src/tokens.rs
@ -0,0 +1,186 @@
 use serde::Serialize;
 use std::fmt;
 use std::ops::{Deref, DerefMut};
 #[derive(Clone, Debug, Serialize, PartialEq)]
 pub enum TokenType {
    Comment(String, String, Option<String>),
    DjangoBlock(String),
    DjangoVariable(String),
    Eof,
    HtmlTagOpen(String),
    HtmlTagClose(String),
    HtmlTagVoid(String),
    Newline,
    ScriptTagOpen(String),
    ScriptTagClose(String),
    StyleTagOpen(String),
    StyleTagClose(String),
    Text(String),
    Whitespace(usize),
 }
 impl TokenType {
    pub fn len(&self) -> Option<usize> {
        match self {
            TokenType::DjangoBlock(s)
            | TokenType::DjangoVariable(s)
            | TokenType::HtmlTagOpen(s)
            | TokenType::HtmlTagClose(s)
            | TokenType::HtmlTagVoid(s)
            | TokenType::ScriptTagOpen(s)
            | TokenType::ScriptTagClose(s)
            | TokenType::StyleTagOpen(s)
            | TokenType::StyleTagClose(s)
            | TokenType::Text(s) => Some(s.len()),
            TokenType::Comment(content, start, end) => {
                Some(content.len() + start.len() + end.as_ref().map_or(0, |e| e.len()))
            }
            TokenType::Whitespace(len) => Some(len.clone()),
            TokenType::Newline => Some(1),
            TokenType::Eof => None,
        }
    }
    pub fn lexeme(&self) -> &str {
        match self {
            TokenType::DjangoBlock(s)
            | TokenType::DjangoVariable(s)
            | TokenType::HtmlTagOpen(s)
            | TokenType::HtmlTagClose(s)
            | TokenType::HtmlTagVoid(s)
            | TokenType::ScriptTagOpen(s)
            | TokenType::ScriptTagClose(s)
            | TokenType::StyleTagOpen(s)
            | TokenType::StyleTagClose(s)
            | TokenType::Text(s) => s,
            TokenType::Comment(content, _, _) => content, // Just return the content
            TokenType::Whitespace(_) => " ",
            TokenType::Newline => "\n",
            TokenType::Eof => "",
        }
    }
 }
 impl fmt::Display for TokenType {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use TokenType::*;
        match self {
            Comment(content, start, end) => match end {
                Some(end) => write!(f, "{}{}{}", start, content, end),
                None => write!(f, "{}{}", start, content),
            },
            DjangoBlock(s) => write!(f, "{{% {} %}}", s),
            DjangoVariable(s) => write!(f, "{{{{ {} }}}}", s),
            Eof => Ok(()),
            HtmlTagOpen(s) => write!(f, "<{}>", s),
            HtmlTagClose(s) => write!(f, "</{}>", s),
            HtmlTagVoid(s) => write!(f, "<{}/>", s),
            Newline => f.write_str("\n"),
            ScriptTagOpen(s) => write!(f, "<script{}>", s),
            ScriptTagClose(_) => f.write_str("</script>"),
            StyleTagOpen(s) => write!(f, "<style{}>", s),
            StyleTagClose(_) => f.write_str("</style>"),
            Text(s) => f.write_str(s),
            Whitespace(len) => f.write_str(&" ".repeat(*len)),
        }
    }
 }
 #[derive(Clone, Debug, Serialize, PartialEq)]
 pub struct Token {
    token_type: TokenType,
    line: usize,
    start: Option<usize>,
 }
 impl Token {
    pub fn new(token_type: TokenType, line: usize, start: Option<usize>) -> Self {
        Self {
            token_type,
            line,
            start,
        }
    }
    pub fn lexeme_from_source<'a>(&self, source: &'a str) -> Option<&'a str> {
        match (self.start, self.token_type.len()) {
            (Some(start), Some(len)) => Some(&source[start..start + len]),
            _ => None,
        }
    }
    pub fn lexeme(&self) -> &str {
        self.token_type.lexeme()
    }
    pub fn token_type(&self) -> &TokenType {
        &self.token_type
    }
    pub fn is_token_type(&self, token_type: &TokenType) -> bool {
        &self.token_type == token_type
    }
 }
 #[derive(Clone, Debug, Default, Serialize)]
 pub struct TokenStream(Vec<Token>);
 impl TokenStream {
    pub fn tokens(&self) -> &Vec<Token> {
        &self.0
    }
    pub fn add_token(&mut self, token: Token) {
        self.0.push(token);
    }
    pub fn finalize(&mut self, line: usize) -> TokenStream {
        let eof_token = Token {
            token_type: TokenType::Eof,
            line,
            start: None,
        };
        self.add_token(eof_token);
        self.clone()
    }
 }
 impl AsRef<[Token]> for TokenStream {
    fn as_ref(&self) -> &[Token] {
        &self.0
    }
 }
 impl Deref for TokenStream {
    type Target = Vec<Token>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 impl DerefMut for TokenStream {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
 }
 impl IntoIterator for TokenStream {
    type Item = Token;
    type IntoIter = std::vec::IntoIter<Self::Item>;
    fn into_iter(self) -> Self::IntoIter {
        self.0.into_iter()
    }
 }
 impl<'a> IntoIterator for &'a TokenStream {
    type Item = &'a Token;
    type IntoIter = std::slice::Iter<'a, Token>;
    fn into_iter(self) -> Self::IntoIter {
        self.0.iter()
    }
 }