allow for errors to not halt parsing

This commit is contained in:
Josh Thomas 2025-01-04 17:30:17 -06:00
parent a4b6742c6a
commit 50c451ec86
8 changed files with 136 additions and 112 deletions

View file

@ -136,20 +136,20 @@ pub type Attributes = BTreeMap<String, AttributeValue>;
pub enum AstError { pub enum AstError {
#[error("Empty AST")] #[error("Empty AST")]
EmptyAst, EmptyAst,
#[error("Stream error: {0}")] #[error("Empty tag")]
StreamError(String), EmptyTag,
#[error("Unclosed tag: {0}")] #[error("unclosed tag: {0}")]
UnclosedTag(String), UnclosedTag(String),
#[error("Unexpected tag: {0}")] #[error("unexpected tag: {0}")]
UnexpectedTag(String), UnexpectedTag(String),
#[error("Invalid tag: {0}")] #[error("invalid tag: {0}")]
InvalidTag(String), InvalidTag(String),
#[error("Block error: {0} in {1}")] #[error("block error: {0} in {1}")]
BlockError(String, String), BlockError(String, String),
#[error("Argument error: {0} - {1}")] #[error("stream error: {0}")]
StreamError(String),
#[error("token error: {0}")]
TokenError(String),
#[error("argument error: {0} - {1}")]
ArgumentError(String, String), ArgumentError(String, String),
#[error("Unexpected token")]
UnexpectedToken,
#[error("Unexpected end of file")]
UnexpectedEof,
} }

View file

@ -27,33 +27,37 @@ impl Parser {
ast.add_node(node); ast.add_node(node);
had_nodes = true; had_nodes = true;
} }
Err(ParserError::StreamError { kind }) if kind == *"AtEnd" => { Err(ParserError::Ast(AstError::StreamError(kind))) if kind == "AtEnd" => {
if !had_nodes { if !had_nodes {
return Err(ParserError::stream_error("UnexpectedEof")); return Ok(ast.finalize()?);
} }
break; break;
} }
Err(ParserError::ErrorSignal(Signal::SpecialTag(_))) => { Err(ParserError::ErrorSignal(Signal::SpecialTag(_))) => {
continue; continue;
} }
Err(ParserError::UnclosedTag(tag)) => { Err(ParserError::Ast(err @ AstError::UnclosedTag(_))) => {
return Err(ParserError::UnclosedTag(tag)); ast.add_error(err);
}
Err(_) => {
self.synchronize()?; self.synchronize()?;
continue; continue;
} }
Err(ParserError::Ast(err)) => {
ast.add_error(err);
self.synchronize()?;
continue;
}
Err(err) => return Err(err),
} }
} }
if !had_nodes { Ok(ast.finalize()?)
return Err(ParserError::stream_error("UnexpectedEof"));
}
ast.finalize()?;
Ok(ast)
} }
fn next_node(&mut self) -> Result<Node, ParserError> { fn next_node(&mut self) -> Result<Node, ParserError> {
if self.is_at_end() {
return Err(ParserError::Ast(AstError::StreamError("AtEnd".to_string())));
}
let token = self.consume()?; let token = self.consume()?;
let node = match token.token_type() { let node = match token.token_type() {
TokenType::Comment(s, start, end) => self.parse_comment(s, start, end.as_deref()), TokenType::Comment(s, start, end) => self.parse_comment(s, start, end.as_deref()),
@ -61,9 +65,9 @@ impl Parser {
TokenType::DjangoVariable(s) => self.parse_django_variable(s), TokenType::DjangoVariable(s) => self.parse_django_variable(s),
TokenType::Eof => { TokenType::Eof => {
if self.is_at_end() { if self.is_at_end() {
self.next_node() Err(ParserError::Ast(AstError::StreamError("AtEnd".to_string())))
} else { } else {
Err(ParserError::stream_error("UnexpectedEof")) self.next_node()
} }
} }
TokenType::HtmlTagClose(tag) => { TokenType::HtmlTagClose(tag) => {
@ -232,14 +236,14 @@ impl Parser {
} }
} }
} }
return Err(ParserError::UnexpectedTag(tag)); return Err(ParserError::unexpected_tag(tag));
} }
Err(e) => return Err(e), Err(e) => return Err(e),
} }
} }
// never found the closing tag // never found the closing tag
Err(ParserError::UnclosedTag(tag_name)) Err(ParserError::Ast(AstError::UnclosedTag(tag_name)))
} }
fn parse_django_variable(&mut self, s: &str) -> Result<Node, ParserError> { fn parse_django_variable(&mut self, s: &str) -> Result<Node, ParserError> {
@ -275,29 +279,23 @@ impl Parser {
let tag_name = parts let tag_name = parts
.next() .next()
.ok_or(ParserError::stream_error("InvalidAccess"))? .ok_or(ParserError::Ast(AstError::EmptyTag))?
.to_string(); .to_string();
if tag_name.to_lowercase() == "!doctype" { if tag_name.to_lowercase() == "!doctype" {
return Ok(Node::Html(HtmlNode::Doctype(tag_name))); return Ok(Node::Html(HtmlNode::Doctype("!DOCTYPE html".to_string())));
} }
let mut attributes = BTreeMap::new(); let mut attributes = BTreeMap::new();
for attr in parts { for attr in parts {
if let Some((key, value)) = attr.split_once('=') { if let Some((key, value)) = parse_attribute(attr)? {
// Key-value attribute (class="container") attributes.insert(key, value);
attributes.insert(
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
// Boolean attribute (disabled)
attributes.insert(attr.to_string(), AttributeValue::Boolean);
} }
} }
let mut children = Vec::new(); let mut children = Vec::new();
let mut found_closing_tag = false;
while !self.is_at_end() { while !self.is_at_end() {
match self.next_node() { match self.next_node() {
@ -306,6 +304,7 @@ impl Parser {
} }
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => { Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => {
if tag == tag_name { if tag == tag_name {
found_closing_tag = true;
self.consume()?; self.consume()?;
break; break;
} }
@ -314,6 +313,12 @@ impl Parser {
} }
} }
if !found_closing_tag {
return Err(ParserError::Ast(AstError::UnclosedTag(
tag_name.to_string(),
)));
}
Ok(Node::Html(HtmlNode::Element { Ok(Node::Html(HtmlNode::Element {
tag_name, tag_name,
attributes, attributes,
@ -326,19 +331,14 @@ impl Parser {
let tag_name = parts let tag_name = parts
.next() .next()
.ok_or(ParserError::stream_error("InvalidAccess"))? .ok_or(ParserError::Ast(AstError::EmptyTag))?
.to_string(); .to_string();
let mut attributes = BTreeMap::new(); let mut attributes = BTreeMap::new();
for attr in parts { for attr in parts {
if let Some((key, value)) = attr.split_once('=') { if let Some((key, value)) = parse_attribute(attr)? {
attributes.insert( attributes.insert(key, value);
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
} }
} }
@ -353,19 +353,14 @@ impl Parser {
let _tag_name = parts let _tag_name = parts
.next() .next()
.ok_or(ParserError::stream_error("InvalidAccess"))? .ok_or(ParserError::Ast(AstError::EmptyTag))?
.to_string(); .to_string();
let mut attributes = BTreeMap::new(); let mut attributes = BTreeMap::new();
for attr in parts { for attr in parts {
if let Some((key, value)) = attr.split_once('=') { if let Some((key, value)) = parse_attribute(attr)? {
attributes.insert( attributes.insert(key, value);
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
} }
} }
@ -389,7 +384,9 @@ impl Parser {
} }
if !found_closing_tag { if !found_closing_tag {
return Err(ParserError::unclosed_tag("script")); return Err(ParserError::Ast(AstError::UnclosedTag(
"script".to_string(),
)));
} }
Ok(Node::Script(ScriptNode::Element { Ok(Node::Script(ScriptNode::Element {
@ -404,13 +401,8 @@ impl Parser {
let mut attributes = BTreeMap::new(); let mut attributes = BTreeMap::new();
for attr in parts { for attr in parts {
if let Some((key, value)) = attr.split_once('=') { if let Some((key, value)) = parse_attribute(attr)? {
attributes.insert( attributes.insert(key, value);
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
} }
} }
@ -434,7 +426,7 @@ impl Parser {
} }
if !found_closing_tag { if !found_closing_tag {
return Err(ParserError::unclosed_tag("style")); return Err(ParserError::Ast(AstError::UnclosedTag("style".to_string())));
} }
Ok(Node::Style(StyleNode::Element { Ok(Node::Style(StyleNode::Element {
@ -536,66 +528,58 @@ pub enum Signal {
ClosingTag, ClosingTag,
} }
fn parse_attribute(attr: &str) -> Result<Option<(String, AttributeValue)>, ParserError> {
if let Some((key, value)) = attr.split_once('=') {
Ok(Some((
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
)))
} else {
Ok(Some((attr.to_string(), AttributeValue::Boolean)))
}
}
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum ParserError { pub enum ParserError {
#[error("unclosed tag: {0}")] #[error(transparent)]
UnclosedTag(String), Ast(#[from] AstError),
#[error("unexpected tag: {0}")]
UnexpectedTag(String),
#[error("invalid tag: {kind}")]
InvalidTag { kind: String },
#[error("block error: {kind} {name}")]
BlockError { kind: String, name: String },
#[error("stream error: {kind}")]
StreamError { kind: String },
#[error("token error: expected {expected}, got {actual:?}")]
TokenError { expected: String, actual: Token },
#[error("argument error: {kind} {details}")]
ArgumentError { kind: String, details: String },
#[error("multi-line comment outside of script or style context")] #[error("multi-line comment outside of script or style context")]
InvalidMultiLineComment, InvalidMultiLineComment,
#[error(transparent)]
AstError(#[from] AstError),
#[error("internal signal: {0:?}")] #[error("internal signal: {0:?}")]
ErrorSignal(Signal), ErrorSignal(Signal),
} }
impl ParserError { impl ParserError {
pub fn unclosed_tag(tag: impl Into<String>) -> Self { pub fn unclosed_tag(tag: impl Into<String>) -> Self {
Self::UnclosedTag(tag.into()) Self::Ast(AstError::UnclosedTag(tag.into()))
} }
pub fn unexpected_tag(tag: impl Into<String>) -> Self { pub fn unexpected_tag(tag: impl Into<String>) -> Self {
Self::UnexpectedTag(tag.into()) Self::Ast(AstError::UnexpectedTag(tag.into()))
} }
pub fn invalid_tag(kind: impl Into<String>) -> Self { pub fn invalid_tag(kind: impl Into<String>) -> Self {
Self::InvalidTag { kind: kind.into() } Self::Ast(AstError::InvalidTag(kind.into()))
} }
pub fn block_error(kind: impl Into<String>, name: impl Into<String>) -> Self { pub fn block_error(kind: impl Into<String>, name: impl Into<String>) -> Self {
Self::BlockError { Self::Ast(AstError::BlockError(kind.into(), name.into()))
kind: kind.into(),
name: name.into(),
}
} }
pub fn stream_error(kind: impl Into<String>) -> Self { pub fn stream_error(kind: impl Into<String>) -> Self {
Self::StreamError { kind: kind.into() } Self::Ast(AstError::StreamError(kind.into()))
} }
pub fn token_error(expected: impl Into<String>, actual: Token) -> Self { pub fn token_error(expected: impl Into<String>, actual: Token) -> Self {
Self::TokenError { Self::Ast(AstError::TokenError(format!(
expected: expected.into(), "expected {}, got {:?}",
actual, expected.into(),
} actual
)))
} }
pub fn argument_error(kind: impl Into<String>, details: impl Into<String>) -> Self { pub fn argument_error(kind: impl Into<String>, details: impl Into<String>) -> Self {
Self::ArgumentError { Self::Ast(AstError::ArgumentError(kind.into(), details.into()))
kind: kind.into(),
details: details.into(),
}
} }
} }
@ -769,15 +753,14 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn test_parse_unexpected_eof() { fn test_parse_unclosed_html_tag() {
let source = "<div>\n"; let source = "<div>";
let tokens = Lexer::new(source).tokenize().unwrap(); let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
let ast = parser.parse(); let ast = parser.parse().unwrap();
assert!(matches!( insta::assert_yaml_snapshot!(ast);
ast, assert_eq!(ast.errors().len(), 1);
Err(ParserError::StreamError { kind }) if kind == "UnexpectedEof" assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "div"));
));
} }
#[test] #[test]
@ -785,9 +768,10 @@ mod tests {
let source = "{% if user.is_authenticated %}Welcome"; let source = "{% if user.is_authenticated %}Welcome";
let tokens = Lexer::new(source).tokenize().unwrap(); let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
let result = parser.parse(); let ast = parser.parse().unwrap();
println!("Error: {:?}", result); insta::assert_yaml_snapshot!(ast);
assert!(matches!(result, Err(ParserError::UnclosedTag(tag)) if tag == "if")); assert_eq!(ast.errors().len(), 1);
assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "if"));
} }
#[test] #[test]
@ -795,9 +779,21 @@ mod tests {
let source = "{% for item in items %}{{ item.name }}"; let source = "{% for item in items %}{{ item.name }}";
let tokens = Lexer::new(source).tokenize().unwrap(); let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
let result = parser.parse(); let ast = parser.parse().unwrap();
println!("Error: {:?}", result); insta::assert_yaml_snapshot!(ast);
assert!(matches!(result, Err(ParserError::UnclosedTag(tag)) if tag == "for")); assert_eq!(ast.errors().len(), 1);
assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "for"));
}
#[test]
fn test_parse_unclosed_script() {
let source = "<script>console.log('test');";
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
assert_eq!(ast.errors().len(), 1);
assert!(matches!(&ast.errors()[0], AstError::UnclosedTag(tag) if tag == "script"));
} }
#[test] #[test]
@ -805,9 +801,9 @@ mod tests {
let source = "<style>body { color: blue; "; let source = "<style>body { color: blue; ";
let tokens = Lexer::new(source).tokenize().unwrap(); let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
let result = parser.parse(); let result = parser.parse().unwrap();
println!("Error: {:?}", result); assert_eq!(result.errors().len(), 1);
assert!(matches!(result, Err(ParserError::UnclosedTag(tag)) if tag == "style")); assert!(matches!(&result.errors()[0], AstError::UnclosedTag(tag) if tag == "style"));
} }
} }

View file

@ -0,0 +1,7 @@
---
source: crates/djls-template-ast/src/parser.rs
expression: ast
---
nodes: []
errors:
- UnclosedTag: for

View file

@ -0,0 +1,7 @@
---
source: crates/djls-template-ast/src/parser.rs
expression: ast
---
nodes: []
errors:
- UnclosedTag: if

View file

@ -0,0 +1,7 @@
---
source: crates/djls-template-ast/src/parser.rs
expression: ast
---
nodes: []
errors:
- UnclosedTag: div

View file

@ -0,0 +1,7 @@
---
source: crates/djls-template-ast/src/parser.rs
expression: ast
---
nodes: []
errors:
- UnclosedTag: script

View file

@ -4,7 +4,7 @@ expression: ast
--- ---
nodes: nodes:
- Html: - Html:
Doctype: "!DOCTYPE" Doctype: "!DOCTYPE html"
- Html: - Html:
Element: Element:
tag_name: html tag_name: html

View file

@ -4,5 +4,5 @@ expression: ast
--- ---
nodes: nodes:
- Html: - Html:
Doctype: "!DOCTYPE" Doctype: "!DOCTYPE html"
errors: [] errors: []