use crate::ast::{Ast, AstError, DjangoFilter, DjangoNode, Node, TagNode}; use crate::tagspecs::TagSpec; use crate::tokens::{Token, TokenStream, TokenType}; use thiserror::Error; pub struct Parser { tokens: TokenStream, current: usize, } impl Parser { pub fn new(tokens: TokenStream) -> Self { Parser { tokens, current: 0 } } pub fn parse(&mut self) -> Result { let mut ast = Ast::default(); while !self.is_at_end() { match self.next_node() { Ok(node) => { ast.add_node(node); } Err(ParserError::ErrorSignal(Signal::SpecialTag(_))) => { continue; } Err(err) => { match err { ParserError::Ast(err, Some(node)) => { ast.add_node(node); ast.add_error(err); } ParserError::Ast(err, None) => { ast.add_error(err); } _ => return Err(err), } if let Err(e) = self.synchronize() { match e { ParserError::Ast(AstError::StreamError(ref kind), _) if kind == "AtEnd" => { break } _ => return Err(e), } } continue; } } } ast.finalize()?; Ok(ast) } fn next_node(&mut self) -> Result { if self.is_at_end() { return Err(ParserError::Ast( AstError::StreamError("AtEnd".to_string()), None, )); } let token = self.peek()?; let node = match token.token_type() { TokenType::DjangoBlock(content) => { self.consume()?; self.parse_django_block(content) } TokenType::DjangoVariable(content) => { self.consume()?; self.parse_django_variable(content) } TokenType::Comment(content, start, end) => { self.consume()?; self.parse_comment(content, start, end.as_deref()) } TokenType::Text(_) | TokenType::Whitespace(_) | TokenType::Newline | TokenType::HtmlTagOpen(_) | TokenType::HtmlTagClose(_) | TokenType::HtmlTagVoid(_) | TokenType::ScriptTagOpen(_) | TokenType::ScriptTagClose(_) | TokenType::StyleTagOpen(_) | TokenType::StyleTagClose(_) => self.parse_text(), TokenType::Eof => Err(ParserError::Ast( AstError::StreamError("AtEnd".to_string()), None, )), }?; Ok(node) } fn parse_comment( &mut self, content: &str, start: &str, end: Option<&str>, ) -> Result { match start { "{#" => Ok(Node::Django(DjangoNode::Comment(content.to_string()))), _ => Ok(Node::Text(format!( "{}{}{}", start, content, end.unwrap_or("") ))), } } fn parse_django_block(&mut self, s: &str) -> Result { let bits: Vec = s.split_whitespace().map(String::from).collect(); let tag_name = bits.first().ok_or(AstError::EmptyTag)?.clone(); let specs = TagSpec::load_builtin_specs().unwrap_or_default(); // Check if this is a closing or branch tag for (_, spec) in specs.iter() { if Some(&tag_name) == spec.closing.as_ref() || spec .branches .as_ref() .map(|ints| ints.iter().any(|i| i.name == tag_name)) .unwrap_or(false) { return Err(ParserError::ErrorSignal(Signal::SpecialTag(tag_name))); } } let tag_spec = specs.get(tag_name.as_str()).cloned(); let mut children = Vec::new(); let mut current_branch: Option<(String, Vec, Vec)> = None; let mut found_closing_tag = false; while !self.is_at_end() { match self.next_node() { Ok(node) => { if let Some((_, _, branch_children)) = &mut current_branch { branch_children.push(node); } else { children.push(node); } } Err(ParserError::ErrorSignal(Signal::SpecialTag(tag))) => { if let Some(spec) = &tag_spec { // Check if closing tag if spec.closing.as_ref().map(|s| s.as_str()) == Some(&tag) { // If we have a current branch, add it to children if let Some((name, bits, branch_children)) = current_branch { children.push(Node::Django(DjangoNode::Tag(TagNode::Branch { name, bits, children: branch_children, }))); } children.push(Node::Django(DjangoNode::Tag(TagNode::Closing { name: tag, bits: vec![], }))); found_closing_tag = true; break; } // Check if intermediate tag if let Some(branches) = &spec.branches { if let Some(branch) = branches.iter().find(|i| i.name == tag) { // If we have a current branch, add it to children if let Some((name, bits, branch_children)) = current_branch { children.push(Node::Django(DjangoNode::Tag(TagNode::Branch { name, bits, children: branch_children, }))); } // Create new branch node let branch_bits = if branch.args { match &self.tokens[self.current - 1].token_type() { TokenType::DjangoBlock(content) => content .split_whitespace() .skip(1) // Skip the tag name .map(|s| s.to_string()) .collect(), _ => vec![tag.clone()], } } else { vec![] }; current_branch = Some((tag, branch_bits, Vec::new())); continue; } } } // If we get here, it's an unexpected tag let node = Node::Django(DjangoNode::Tag(TagNode::Block { name: tag_name.clone(), bits: bits.clone(), children: children.clone(), })); return Err(ParserError::Ast(AstError::UnexpectedTag(tag), Some(node))); } Err(ParserError::Ast(AstError::StreamError(kind), _)) if kind == "AtEnd" => { break; } Err(e) => return Err(e), } } let node = Node::Django(DjangoNode::Tag(TagNode::Block { name: tag_name.clone(), bits, children, })); if !found_closing_tag { return Err(ParserError::Ast( AstError::UnclosedTag(tag_name), Some(node), )); } Ok(node) } fn parse_django_variable(&mut self, s: &str) -> Result { let parts: Vec<&str> = s.split('|').collect(); let bits: Vec = parts[0].trim().split('.').map(String::from).collect(); let filters: Vec = parts[1..] .iter() .map(|filter_str| { let filter_parts: Vec<&str> = filter_str.trim().split(':').collect(); let name = filter_parts[0].to_string(); let arguments = if filter_parts.len() > 1 { filter_parts[1] .trim_matches('"') .split(',') .map(|arg| arg.trim().to_string()) .collect() } else { Vec::new() }; DjangoFilter::new(name, arguments) }) .collect(); Ok(Node::Django(DjangoNode::Variable { bits, filters })) } fn parse_text(&mut self) -> Result { let mut text = String::new(); while let Ok(token) = self.peek() { match token.token_type() { TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) | TokenType::Comment(_, _, _) => break, TokenType::Text(s) => { self.consume()?; text.push_str(s); } TokenType::HtmlTagOpen(s) | TokenType::HtmlTagClose(s) | TokenType::HtmlTagVoid(s) | TokenType::ScriptTagOpen(s) | TokenType::ScriptTagClose(s) | TokenType::StyleTagOpen(s) | TokenType::StyleTagClose(s) => { self.consume()?; text.push_str(s); } TokenType::Whitespace(len) => { self.consume()?; text.push_str(&" ".repeat(*len)); } TokenType::Newline => { self.consume()?; text.push('\n'); } TokenType::Eof => break, } } Ok(Node::Text(text)) } fn peek(&self) -> Result { self.peek_at(0) } fn peek_previous(&self) -> Result { self.peek_at(-1) } fn peek_at(&self, offset: isize) -> Result { let index = self.current as isize + offset; self.item_at(index as usize) } fn item_at(&self, index: usize) -> Result { if let Some(token) = self.tokens.get(index) { Ok(token.clone()) } else { let error = if self.tokens.is_empty() { ParserError::stream_error("Empty") } else if index < self.current { ParserError::stream_error("AtBeginning") } else if index >= self.tokens.len() { ParserError::stream_error("AtEnd") } else { ParserError::stream_error("InvalidAccess") }; Err(error) } } fn is_at_end(&self) -> bool { self.current + 1 >= self.tokens.len() } fn consume(&mut self) -> Result { if self.is_at_end() { return Err(ParserError::stream_error("AtEnd")); } self.current += 1; self.peek_previous() } fn synchronize(&mut self) -> Result<(), ParserError> { let sync_types = [ TokenType::DjangoBlock(String::new()), TokenType::DjangoVariable(String::new()), TokenType::Comment(String::new(), String::from("{#"), Some(String::from("#}"))), TokenType::Eof, ]; while !self.is_at_end() { let current = self.peek()?; for sync_type in &sync_types { if current.token_type() == sync_type { return Ok(()); } } self.consume()?; } Err(ParserError::Ast(AstError::StreamError("AtEnd".into()), None)) } } #[derive(Debug)] pub enum Signal { ClosingTagFound(String), IntermediateTagFound(String, Vec), IntermediateTag(String), SpecialTag(String), ClosingTag, } #[derive(Error, Debug)] pub enum ParserError { #[error("ast error: {0}")] Ast(AstError, Option), #[error("internal signal: {0:?}")] ErrorSignal(Signal), } impl From for ParserError { fn from(err: AstError) -> Self { ParserError::Ast(err, None) } } impl ParserError { pub fn unclosed_tag(tag: impl Into) -> Self { Self::Ast(AstError::UnclosedTag(tag.into()), None) } pub fn unexpected_tag(tag: impl Into) -> Self { Self::Ast(AstError::UnexpectedTag(tag.into()), None) } pub fn invalid_tag(kind: impl Into) -> Self { Self::Ast(AstError::InvalidTag(kind.into()), None) } pub fn block_error(kind: impl Into, name: impl Into) -> Self { Self::Ast(AstError::BlockError(kind.into(), name.into()), None) } pub fn stream_error(kind: impl Into) -> Self { Self::Ast(AstError::StreamError(kind.into()), None) } pub fn token_error(expected: impl Into, actual: Token) -> Self { Self::Ast( AstError::TokenError(format!("expected {}, got {:?}", expected.into(), actual)), None, ) } pub fn argument_error(kind: impl Into, details: impl Into) -> Self { Self::Ast(AstError::ArgumentError(kind.into(), details.into()), None) } } #[cfg(test)] mod tests { use super::*; use crate::lexer::Lexer; mod html { use super::*; #[test] fn test_parse_html_doctype() { let source = ""; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_html_tag() { let source = "
Hello
"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_html_void() { let source = ""; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } mod django { use super::*; #[test] fn test_parse_django_variable() { let source = "{{ user.name|title }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_filter_chains() { let source = "{{ value|default:'nothing'|title|upper }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_django_if_block() { let source = "{% if user.is_authenticated %}Welcome{% endif %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_django_for_block() { let source = "{% for item in items %}{{ item }}{% empty %}No items{% endfor %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_complex_if_elif() { let source = "{% if x > 0 %}Positive{% elif x < 0 %}Negative{% else %}Zero{% endif %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_nested_for_if() { let source = "{% for item in items %}{% if item.active %}{{ item.name }}{% endif %}{% endfor %}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } #[test] fn test_parse_mixed_content() { let source = "Welcome, {% if user.is_authenticated %} {{ user.name|title|default:'Guest' }} {% for group in user.groups %} {% if forloop.first %}({% endif %} {{ group.name }} {% if not forloop.last %}, {% endif %} {% if forloop.last %}){% endif %} {% empty %} (no groups) {% endfor %} {% else %} Guest {% endif %}!"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } mod script { use super::*; #[test] fn test_parse_script() { let source = r#""#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } mod style { use super::*; #[test] fn test_parse_style() { let source = r#""#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } mod comments { use super::*; #[test] fn test_parse_comments() { let source = "{# Django comment #}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } mod errors { use super::*; #[test] fn test_parse_unclosed_html_tag() { let source = "
"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert_eq!(ast.errors().len(), 0); } #[test] fn test_parse_unclosed_django_if() { let source = "{% if user.is_authenticated %}Welcome"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert_eq!(ast.errors().len(), 1); assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "if")); } #[test] fn test_parse_unclosed_django_for() { let source = "{% for item in items %}{{ item.name }}"; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); assert_eq!(ast.errors().len(), 1); assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "for")); } #[test] fn test_parse_unclosed_script() { let source = "
{% if user.is_authenticated %} {# Welcome message #}

Welcome, {{ user.name|title|default:'Guest' }}!

{% if user.is_staff %} Admin {% else %} User {% endif %} {% endif %}
"#; let tokens = Lexer::new(source).tokenize().unwrap(); let mut parser = Parser::new(tokens); let ast = parser.parse().unwrap(); insta::assert_yaml_snapshot!(ast); } } }