use crate::ast::{Assignment, Ast, AstError, Block, DjangoFilter, LineOffsets, Node, Span, Tag}; use crate::tagspecs::{TagSpec, TagType}; use crate::tokens::{Token, TokenStream, TokenType}; use thiserror::Error; pub struct Parser { tokens: TokenStream, current: usize, errors: Vec, } impl Parser { pub fn new(tokens: TokenStream) -> Self { Self { tokens, current: 0, errors: Vec::new(), } } pub fn parse(&mut self) -> Result<(Ast, Vec), ParserError> { let mut ast = Ast::default(); let mut line_offsets = LineOffsets::new(); // First pass: collect line offsets let mut current_line_start = 0; for token in self.tokens.tokens() { if let TokenType::Newline = token.token_type() { if let Some(start) = token.start() { // Add offset for next line current_line_start = start + 1; line_offsets.add_line(current_line_start); } } } // Reset current position self.current = 0; // Second pass: parse nodes while !self.is_at_end() { match self.next_node() { Ok(node) => { ast.add_node(node); } Err(_) => self.synchronize()?, } } ast.set_line_offsets(line_offsets); Ok((ast, std::mem::take(&mut self.errors))) } fn next_node(&mut self) -> Result { let token = self.peek()?; match token.token_type() { TokenType::Comment(content, start, end) => { self.consume()?; self.parse_comment(content, start, end.as_deref()) } TokenType::Eof => Err(ParserError::Ast(AstError::StreamError("AtEnd".to_string()))), TokenType::DjangoBlock(content) => { self.consume()?; self.parse_django_block(content) } TokenType::DjangoVariable(content) => { self.consume()?; self.parse_django_variable(content) } TokenType::HtmlTagClose(_) | TokenType::HtmlTagOpen(_) | TokenType::HtmlTagVoid(_) | TokenType::Newline | TokenType::ScriptTagClose(_) | TokenType::ScriptTagOpen(_) | TokenType::StyleTagClose(_) | TokenType::StyleTagOpen(_) | TokenType::Text(_) | TokenType::Whitespace(_) => { self.consume()?; self.parse_text() } } } fn parse_comment( &mut self, content: &str, start: &str, end: Option<&str>, ) -> Result { let token = self.peek_previous()?; let start_pos = token.start().unwrap_or(0); // Only treat Django comments as Comment nodes if start == "{#" && end == Some("#}") { Ok(Node::Comment { content: content.to_string(), span: Span::new(start_pos, token.token_type().len().unwrap_or(0) as u32), }) } else { self.parse_text() } } fn parse_django_block(&mut self, content: &str) -> Result { let token = self.peek_previous()?; let start_pos = token.start().unwrap_or(0); let total_length = token.length().unwrap_or(0); let span = Span::new(start_pos, total_length); let bits: Vec = content.split_whitespace().map(String::from).collect(); let tag_name = bits.first().ok_or(ParserError::EmptyTag)?.clone(); let tag = Tag { name: tag_name.clone(), bits: bits.clone(), span, tag_span: span, assignment: None, }; let specs = TagSpec::load_builtin_specs()?; let spec = match specs.get(&tag_name) { Some(spec) => spec, None => return Ok(Node::Block(Block::Tag { tag })), }; let block = match spec.tag_type { TagType::Block => { let mut nodes = Vec::new(); let mut closing = None; while !self.is_at_end() { match self.next_node() { Ok(Node::Block(Block::Tag { tag })) => { if let Some(expected_closing) = &spec.closing { if tag.name == *expected_closing { closing = Some(Box::new(Block::Closing { tag })); break; } } // If we get here, either there was no expected closing tag or it didn't match if let Some(branches) = &spec.branches { if branches.iter().any(|b| b.name == tag.name) { let mut branch_tag = tag.clone(); let mut branch_nodes = Vec::new(); let mut found_closing = false; while let Ok(node) = self.next_node() { match &node { Node::Block(Block::Tag { tag: next_tag }) => { if let Some(expected_closing) = &spec.closing { if next_tag.name == *expected_closing { // Found the closing tag nodes.push(Node::Block(Block::Branch { tag: branch_tag.clone(), nodes: branch_nodes.clone(), })); closing = Some(Box::new(Block::Closing { tag: next_tag.clone(), })); found_closing = true; break; } } // Check if this is another branch tag if branches.iter().any(|b| b.name == next_tag.name) { // Push the current branch and start a new one nodes.push(Node::Block(Block::Branch { tag: branch_tag.clone(), nodes: branch_nodes.clone(), })); branch_nodes = Vec::new(); branch_tag = next_tag.clone(); continue; } branch_nodes.push(node); } _ => branch_nodes.push(node), } } if !found_closing { // Push the last branch if we didn't find a closing tag nodes.push(Node::Block(Block::Branch { tag: branch_tag.clone(), nodes: branch_nodes.clone(), })); // Add error for unclosed tag self.errors.push(ParserError::Ast(AstError::UnclosedTag( tag_name.clone(), ))); } if found_closing { break; } continue; } } nodes.push(Node::Block(Block::Tag { tag })); } Ok(node) => nodes.push(node), Err(e) => { self.errors.push(e); break; } } } Block::Block { tag, nodes, closing, assignments: None, } } TagType::Tag => Block::Tag { tag }, TagType::Variable => Block::Variable { tag }, TagType::Inclusion => { let template_name = bits.get(1).cloned().unwrap_or_default(); Block::Inclusion { tag, template_name } } }; // Add error if we didn't find a closing tag for a block if let Block::Block { closing: None, tag: tag_ref, .. } = &block { if let Some(_expected_closing) = &spec.closing { self.errors.push(ParserError::Ast(AstError::UnclosedTag( tag_ref.name.clone(), ))); } } Ok(Node::Block(block)) } fn parse_django_variable(&mut self, content: &str) -> Result { let token = self.peek_previous()?; let start = token.start().unwrap_or(0); let mut bits = Vec::new(); let mut filters = Vec::new(); let parts: Vec<&str> = content.split('|').map(|s| s.trim()).collect(); if !parts.is_empty() { bits = parts[0].split('.').map(|s| s.trim().to_string()).collect(); for filter_part in parts.iter().skip(1) { let filter_parts: Vec<&str> = filter_part.split(':').collect(); let filter_name = filter_parts[0].trim(); let filter_args = if filter_parts.len() > 1 { filter_parts[1] .split(',') .map(|s| s.trim().to_string()) .collect() } else { Vec::new() }; filters.push(DjangoFilter { name: filter_name.to_string(), args: filter_args, span: Span::new(start + 4, content.len() as u32), }); } } Ok(Node::Variable { bits, filters, span: Span::new(start + 3, content.len() as u32), }) } fn parse_text(&mut self) -> Result { let start_token = self.peek_previous()?; let start_pos = start_token.start().unwrap_or(0); match start_token.token_type() { TokenType::Newline => return self.next_node(), TokenType::Whitespace(_) if self .peek_at(-2) .map_or(false, |t| matches!(t.token_type(), TokenType::Newline)) => { return self.next_node() } _ => {} } let mut text = start_token.token_type().to_string(); let mut total_length: u32 = u32::try_from(text.len()).unwrap(); while let Ok(token) = self.peek() { match token.token_type() { TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) | TokenType::Comment(_, _, _) | TokenType::Newline | TokenType::Eof => break, _ => { let token_text = token.token_type().to_string(); text.push_str(&token_text); total_length += u32::try_from(token_text.len()).unwrap(); self.consume()?; } } } if text.trim().is_empty() { self.next_node() } else { Ok(Node::Text { content: text, span: Span::new(start_pos, total_length), }) } } fn peek(&self) -> Result { self.peek_at(0) } fn peek_previous(&self) -> Result { self.peek_at(-1) } fn peek_at(&self, offset: isize) -> Result { let index = self.current as isize + offset; self.item_at(index as usize) } fn item_at(&self, index: usize) -> Result { if let Some(token) = self.tokens.get(index) { Ok(token.clone()) } else { let error = if self.tokens.is_empty() { ParserError::stream_error("Empty") } else if index < self.current { ParserError::stream_error("AtBeginning") } else if index >= self.tokens.len() { ParserError::stream_error("AtEnd") } else { ParserError::stream_error("InvalidAccess") }; Err(error) } } fn is_at_end(&self) -> bool { self.current + 1 >= self.tokens.len() } fn consume(&mut self) -> Result { if self.is_at_end() { return Err(ParserError::stream_error("AtEnd")); } self.current += 1; self.peek_previous() } fn synchronize(&mut self) -> Result<(), ParserError> { let sync_types = &[ TokenType::DjangoBlock(String::new()), TokenType::DjangoVariable(String::new()), TokenType::Comment(String::new(), String::from("{#"), Some(String::from("#}"))), TokenType::Eof, ]; while !self.is_at_end() { let current = self.peek()?; for sync_type in sync_types { if *current.token_type() == *sync_type { return Ok(()); } } self.consume()?; } Ok(()) } } #[derive(Debug)] pub enum Signal { ClosingTagFound(String), IntermediateTagFound(String, Vec), IntermediateTag(String), SpecialTag(String), ClosingTag, } #[derive(Debug, Error)] pub enum ParserError { #[error("{0}")] Ast(#[from] AstError), #[error("Signal: {0:?}")] ErrorSignal(Signal), #[error("{0}")] Other(#[from] anyhow::Error), #[error("empty tag")] EmptyTag, } impl ParserError { pub fn stream_error(kind: impl Into) -> Self { Self::Ast(AstError::StreamError(kind.into())) } } #[cfg(test)] mod tests { use super::*; use crate::lexer::Lexer; mod html { use super::*; #[test] fn test_parse_html_doctype() { let source = ""; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_html_tag() { let source = "
Hello
"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_html_void() { let source = ""; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod django { use super::*; #[test] fn test_parse_django_variable() { let source = "{{ user.name|title }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_filter_chains() { let source = "{{ value|default:'nothing'|title|upper }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_django_if_block() { let source = "{% if user.is_authenticated %}Welcome{% endif %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_django_for_block() { let source = "{% for item in items %}{{ item }}{% empty %}No items{% endfor %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_complex_if_elif() { let source = "{% if x > 0 %}Positive{% elif x < 0 %}Negative{% else %}Zero{% endif %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_nested_for_if() { let source = "{% for item in items %}{% if item.active %}{{ item.name }}{% endif %}{% endfor %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_mixed_content() { let source = "Welcome, {% if user.is_authenticated %} {{ user.name|title|default:'Guest' }} {% for group in user.groups %} {% if forloop.first %}({% endif %} {{ group.name }} {% if not forloop.last %}, {% endif %} {% if forloop.last %}){% endif %} {% empty %} (no groups) {% endfor %} {% else %} Guest {% endif %}!"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod script { use super::*; #[test] fn test_parse_script() { let source = r#""#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod style { use super::*; #[test] fn test_parse_style() { let source = r#""#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod comments { use super::*; #[test] fn test_parse_comments() { let source = "{# Django comment #}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod errors { use super::*; #[test] fn test_parse_unclosed_html_tag() { let source = "
"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } #[test] fn test_parse_unclosed_django_if() { let source = "{% if user.is_authenticated %}Welcome"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert_eq!(errors.len(), 1); assert!( matches!(&errors[0], ParserError::Ast(AstError::UnclosedTag(tag)) if tag == "if") ); } #[test] fn test_parse_unclosed_django_for() { let source = "{% for item in items %}{{ item.name }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert_eq!(errors.len(), 1); assert!( matches!(&errors[0], ParserError::Ast(AstError::UnclosedTag(tag)) if tag == "for") ); } #[test] fn test_parse_unclosed_script() { let source = "
{% if user.is_authenticated %} {# Welcome message #}

Welcome, {{ user.name|title|default:'Guest' }}!

{% if user.is_staff %} Admin {% else %} User {% endif %} {% endif %}
"#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert!(errors.is_empty()); } } mod line_tracking { use super::*; #[test] fn test_parser_tracks_line_offsets() { let source = "line1\nline2"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let (ast, errors) = parser.parse().unwrap(); let offsets = ast.line_offsets(); eprintln!("{:?}", offsets); assert_eq!(offsets.position_to_line_col(0), (1, 0)); // Start of line 1 assert_eq!(offsets.position_to_line_col(6), (2, 0)); // Start of line 2 assert!(errors.is_empty()); } } }