implement parser

This commit is contained in:
Josh Thomas 2024-10-16 09:37:04 -05:00
parent ff3de19aa5
commit 3e92f1d0e4
5 changed files with 829 additions and 0 deletions

440
src/ast.rs Normal file
View file

@ -0,0 +1,440 @@
use crate::error::{ASTError, NodeError};
use crate::token::Token;
#[derive(Debug, PartialEq)]
pub enum Node {
HTMLElement {
tag: String,
attributes: Vec<(String, AttributeValue)>,
children: Vec<Node>,
},
HTMLVoidElement {
tag: String,
attributes: Vec<(String, AttributeValue)>,
},
HTMLComment(String),
HTMLDoctype {
doctype: String,
},
Script {
attributes: Vec<(String, AttributeValue)>,
content: String,
},
Style {
attributes: Vec<(String, AttributeValue)>,
content: String,
},
DjangoVariable(String),
DjangoBlock {
name: String,
arguments: Vec<String>,
children: Vec<Node>,
},
DjangoComment(String),
Text(String),
}
#[derive(Debug, PartialEq)]
pub enum AttributeValue {
Value(String),
Boolean,
}
impl Node {
pub fn new_html_element(
tag: String,
attributes: Option<Vec<(String, AttributeValue)>>,
children: Option<Vec<Node>>,
) -> Result<Self, NodeError> {
if tag.is_empty() {
return Err(NodeError::NoTagName);
};
let attributes = attributes.unwrap_or_default();
let children = children.unwrap_or_default();
Ok(Node::HTMLElement {
tag,
attributes,
children,
})
}
pub fn new_html_void_element(
tag: String,
attributes: Option<Vec<(String, AttributeValue)>>,
) -> Result<Self, NodeError> {
if tag.is_empty() {
return Err(NodeError::NoTagName);
};
let attributes = attributes.unwrap_or_default();
Ok(Node::HTMLVoidElement { tag, attributes })
}
pub fn new_html_comment(content: String) -> Result<Self, NodeError> {
Ok(Node::HTMLComment(content))
}
pub fn new_html_doctype(doctype: String) -> Result<Self, NodeError> {
Ok(Node::HTMLDoctype { doctype })
}
pub fn new_script(
attributes: Option<Vec<(String, AttributeValue)>>,
content: String,
) -> Result<Self, NodeError> {
let attributes = attributes.unwrap_or_default();
Ok(Node::Script {
attributes,
content,
})
}
pub fn new_style(
attributes: Option<Vec<(String, AttributeValue)>>,
content: String,
) -> Result<Self, NodeError> {
let attributes = attributes.unwrap_or_default();
Ok(Node::Style {
attributes,
content,
})
}
pub fn new_django_variable(content: String) -> Result<Self, NodeError> {
Ok(Node::DjangoVariable(content))
}
pub fn new_django_block(
name: String,
arguments: Option<Vec<String>>,
children: Option<Vec<Node>>,
) -> Result<Self, NodeError> {
if name.is_empty() {
return Err(NodeError::NoBlockName);
};
let arguments = arguments.unwrap_or_default();
let children = children.unwrap_or_default();
Ok(Node::DjangoBlock {
name,
arguments,
children,
})
}
pub fn new_django_comment(content: String) -> Result<Self, NodeError> {
Ok(Node::DjangoComment(content))
}
pub fn new_text(content: String) -> Result<Self, NodeError> {
Ok(Node::Text(content))
}
}
#[derive(Debug, PartialEq)]
pub struct AST {
pub nodes: Vec<Node>,
}
impl AST {
pub fn new() -> Self {
AST { nodes: Vec::new() }
}
pub fn match_node(&self, token: Token) -> Result<(), ASTError> {
Ok(())
}
pub fn add_node(&mut self, node: Node) {
self.nodes.push(node);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_html_element() {
let node = Node::new_html_element("html".to_string(), None, None).unwrap();
if let Node::HTMLElement {
tag,
attributes,
children,
} = node
{
assert_eq!(tag, "html");
assert!(attributes.is_empty());
assert!(children.is_empty());
} else {
panic!("Expected an HTMLElement node");
}
}
#[test]
fn test_new_html_element_with_attributes_and_children() {
let attributes = vec![
(
"class".to_string(),
AttributeValue::Value("container".to_string()),
),
("disabled".to_string(), AttributeValue::Boolean),
];
let children = vec![
Node::new_html_element("div".to_string(), None, None).unwrap(),
Node::new_html_element("span".to_string(), None, None).unwrap(),
];
let node =
Node::new_html_element("div".to_string(), Some(attributes), Some(children)).unwrap();
if let Node::HTMLElement {
tag,
attributes,
children,
} = node
{
assert_eq!(tag, "div");
assert_eq!(attributes.len(), 2);
assert_eq!(attributes[0].0, "class");
assert_eq!(attributes[1].0, "disabled");
assert!(
matches!(&attributes[0].1, AttributeValue::Value(value) if value == "container")
);
assert!(matches!(&attributes[1].1, AttributeValue::Boolean));
assert_eq!(children.len(), 2);
assert!(matches!(&children[0], Node::HTMLElement { tag, .. } if tag == "div"));
assert!(matches!(&children[1], Node::HTMLElement { tag, .. } if tag == "span"));
} else {
panic!("Expected an HTMLElement node");
}
}
#[test]
fn test_new_html_element_empty_tag() {
let result = Node::new_html_element("".to_string(), None, None);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), NodeError::NoTagName));
}
#[test]
fn test_new_html_void_element() {
let node = Node::new_html_void_element("html".to_string(), None).unwrap();
if let Node::HTMLVoidElement { tag, attributes } = node {
assert_eq!(tag, "html");
assert!(attributes.is_empty());
} else {
panic!("Expected an HTMLVoidElement node");
}
}
#[test]
fn test_new_html_comment() {
let node = Node::new_html_comment("A comment".to_string()).unwrap();
if let Node::HTMLComment(content) = node {
assert_eq!(content, "A comment");
} else {
panic!("Expected an HTMLComment node");
}
}
#[test]
fn test_new_html_doctype() {
let node = Node::new_html_doctype("html".to_string()).unwrap();
if let Node::HTMLDoctype { doctype } = node {
assert_eq!(doctype, "html");
} else {
panic!("Expected an HTMLDoctype node");
}
}
#[test]
fn test_new_script() {
let node = Node::new_script(None, "console.log('hello');".to_string()).unwrap();
if let Node::Script {
attributes,
content,
} = node
{
assert!(attributes.is_empty());
assert_eq!(content, "console.log('hello');");
} else {
panic!("Expected a Script node");
}
}
#[test]
fn test_new_script_with_attributes() {
let attributes = vec![(
"src".to_string(),
AttributeValue::Value("javascript.js".to_string()),
)];
let node = Node::new_script(Some(attributes), "console.log('hello');".to_string()).unwrap();
if let Node::Script {
attributes,
content,
} = node
{
assert_eq!(attributes.len(), 1);
assert_eq!(attributes[0].0, "src");
assert!(
matches!(&attributes[0].1, AttributeValue::Value(value) if value == "javascript.js")
);
assert_eq!(content, "console.log('hello');");
} else {
panic!("Expected a Script node");
}
}
#[test]
fn test_new_style() {
let node = Node::new_style(None, "body { background-color: red; }".to_string()).unwrap();
if let Node::Style {
attributes,
content,
} = node
{
assert!(attributes.is_empty());
assert_eq!(content, "body { background-color: red; }");
} else {
panic!("Expected a Style node");
}
}
#[test]
fn test_new_style_with_attributes() {
let attributes = vec![(
"media".to_string(),
AttributeValue::Value("max-width: 500px".to_string()),
)];
let node = Node::new_style(
Some(attributes),
"body { background-color: red; }".to_string(),
)
.unwrap();
if let Node::Style {
attributes,
content,
} = node
{
assert_eq!(attributes.len(), 1);
assert_eq!(attributes[0].0, "media");
assert!(
matches!(&attributes[0].1, AttributeValue::Value(value) if value == "max-width: 500px")
);
assert_eq!(content, "body { background-color: red; }");
} else {
panic!("Expected a Style node");
}
}
#[test]
fn test_new_django_variable() {
let node = Node::new_django_variable("variable".to_string()).unwrap();
if let Node::DjangoVariable(content) = node {
assert_eq!(content, "variable");
} else {
panic!("Expected a DjangoVariable node");
}
}
#[test]
fn test_new_django_block() {
let node = Node::new_django_block("dj_block".to_string(), None, None).unwrap();
if let Node::DjangoBlock {
name,
arguments,
children,
} = node
{
assert_eq!(name, "dj_block");
assert!(arguments.is_empty());
assert!(children.is_empty());
} else {
panic!("Expected a DjangoBlock node");
}
}
#[test]
fn test_new_django_block_with_arguments_and_children() {
let arguments = vec![
"arg1".to_string(),
"arg2=variable".to_string(),
"arg3='string'".to_string(),
];
let children = vec![Node::new_html_element("div".to_string(), None, None).unwrap()];
let node = Node::new_django_block("dj_block".to_string(), Some(arguments), Some(children))
.unwrap();
if let Node::DjangoBlock {
name,
arguments,
children,
} = node
{
assert_eq!(name, "dj_block");
assert_eq!(arguments.len(), 3);
assert_eq!(arguments[0], "arg1");
assert_eq!(arguments[1], "arg2=variable");
assert_eq!(arguments[2], "arg3='string'");
assert_eq!(children.len(), 1);
assert!(matches!(&children[0], Node::HTMLElement { tag, .. } if tag == "div"));
} else {
panic!("Expected a DjangoBlock node");
}
}
#[test]
fn test_new_django_block_empty_name() {
let result = Node::new_django_block("".to_string(), None, None);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), NodeError::NoBlockName));
}
#[test]
fn test_new_django_comment() {
let node = Node::new_django_comment("A comment".to_string()).unwrap();
if let Node::DjangoComment(content) = node {
assert_eq!(content, "A comment");
} else {
panic!("Expected a DjangoComment node");
}
}
#[test]
fn test_new_text() {
let node = Node::new_text("Some text".to_string()).unwrap();
if let Node::Text(content) = node {
assert_eq!(content, "Some text");
} else {
panic!("Expected a Text node");
}
}
}

View file

@ -40,3 +40,28 @@ pub enum TokenError {
#[error("cannot call size, token type has dynamic size")]
DynamicTokenSize,
}
#[derive(Error, Debug)]
pub enum NodeError {
#[error("Tag name cannot be empty")]
NoTagName,
#[error("Block name cannot be empty")]
NoBlockName,
}
#[derive(Error, Debug)]
pub enum ParserError {
#[error("Token stream is empty")]
EmptyTokenStream,
#[error("At beginning of token stream")]
AtBeginningOfStream,
#[error("At end of token stream")]
AtEndOfStream,
#[error("Invalid token access")]
InvalidTokenAccess,
#[error("AST error: {0}")]
ASTError(#[from] ASTError),
}
#[derive(Error, Debug)]
pub enum ASTError {}

View file

@ -1,15 +1,20 @@
mod ast;
mod error;
mod lexer;
mod parser;
mod scanner;
mod token;
use lexer::Lexer;
use parser::Parser;
use std::error::Error;
pub fn compile(template: &str) -> Result<String, Box<dyn Error>> {
let mut lexer = Lexer::new(template);
let tokens = lexer.tokenize()?;
let ast = Parser::new(tokens.clone()).parse()?;
println!("{:?}", tokens);
println!("{:?}", ast);
todo!("Implement compilation process")
}

288
src/parser.rs Normal file
View file

@ -0,0 +1,288 @@
use crate::ast::AST;
use crate::error::ParserError;
use crate::scanner::{ParserState, Scanner};
use crate::token::{Token, TokenType};
pub struct Parser<'a> {
tokens: Vec<Token<'a>>,
state: ParserState,
}
impl<'a> Parser<'a> {
pub fn new(tokens: Vec<Token<'a>>) -> Self {
Parser {
tokens,
state: ParserState::new(),
}
}
pub fn parse(&mut self) -> Result<AST, ParserError> {
let ast = AST::new();
while !self.is_at_end() {
let token = self.advance()?;
let node = ast.match_node(token)?;
println!("{:?}", node);
// ast.add_node(node);
}
Ok(ast)
}
}
impl<'a> Scanner for Parser<'a> {
type Item = Token<'a>;
type Error = ParserError;
fn advance(&mut self) -> Result<Self::Item, Self::Error> {
let current_token = self.peek()?;
if !self.is_at_end() {
self.state.current += 1;
}
Ok(current_token)
}
fn peek(&self) -> Result<Self::Item, Self::Error> {
self.item_at(self.state.current)
}
fn peek_next(&self) -> Result<Self::Item, Self::Error> {
self.item_at(self.state.current + 1)
}
fn peek_previous(&self) -> Result<Self::Item, Self::Error> {
self.item_at(self.state.current - 1)
}
fn item_at(&self, index: usize) -> Result<Self::Item, Self::Error> {
if let Some(token) = self.tokens.get(index) {
Ok(token.clone())
} else {
let error = if self.tokens.is_empty() {
ParserError::EmptyTokenStream
} else if index < self.state.current {
ParserError::AtBeginningOfStream
} else if index >= self.tokens.len() {
ParserError::AtEndOfStream
} else {
ParserError::InvalidTokenAccess
};
Err(error)
}
}
fn is_at_end(&self) -> bool {
self.peek()
.map(|token| token.token_type == TokenType::Eof)
.unwrap_or(true)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_new() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let parser = Parser::new(tokens);
assert_eq!(parser.tokens.len(), 3);
assert_eq!(parser.state.current, 0);
}
#[test]
fn test_parser_parse() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
assert_eq!(parser.state.current, 2);
assert_eq!(ast.nodes, vec![]);
}
#[test]
fn test_scanner_advance() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
assert_eq!(parser.state.current, 0);
let token = parser.advance().unwrap();
assert_eq!(parser.state.current, 1);
assert_eq!(token, tokens[0]);
let token = parser.advance().unwrap();
assert_eq!(parser.state.current, 2);
assert_eq!(token, tokens[1]);
let eof_token = parser.advance().unwrap();
assert_eq!(parser.state.current, 2);
assert_eq!(eof_token, tokens[2]);
}
#[test]
fn test_scanner_advance_is_at_end() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
assert_eq!(parser.state.current, 0);
parser.state.current += 2;
let token = parser.tokens.get(parser.state.current).unwrap().clone();
assert_eq!(token, tokens[2]);
// TODO: should this error?
let next_token = parser.advance().unwrap();
assert_eq!(next_token, token);
}
#[test]
fn test_scanner_peek() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
let token = parser.peek().unwrap();
assert_eq!(token, tokens[0]);
parser.state.current += 2;
let token = parser.peek().unwrap();
assert_eq!(token, tokens[2]);
}
#[test]
fn test_scanner_peek_next() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
let current_token = parser.peek().unwrap();
assert_eq!(current_token, tokens[0]);
let next_token = parser.peek_next().unwrap();
assert_eq!(next_token, tokens[1]);
parser.state.current += 1;
let current_token = parser.peek().unwrap();
assert_eq!(current_token, tokens[1]);
let next_token = parser.peek_next().unwrap();
assert_eq!(next_token, tokens[2]);
}
#[test]
fn test_scanner_peek_previous() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
parser.state.current = 2;
let current_token = parser.peek().unwrap();
assert_eq!(current_token, tokens[2]);
let previous_token = parser.peek_previous().unwrap();
assert_eq!(previous_token, tokens[1]);
parser.state.current -= 1;
let current_token = parser.peek().unwrap();
assert_eq!(current_token, tokens[1]);
let previous_token = parser.peek_previous().unwrap();
assert_eq!(previous_token, tokens[0]);
}
#[test]
#[should_panic]
fn test_scanner_peek_previous_at_beginning() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let parser = Parser::new(tokens.clone());
assert_eq!(parser.state.current, 0);
parser.peek_previous().unwrap();
}
#[test]
fn test_scanner_item_at() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let parser = Parser::new(tokens.clone());
assert_eq!(parser.item_at(0).unwrap(), tokens[0]);
assert_eq!(parser.item_at(1).unwrap(), tokens[1]);
assert_eq!(parser.item_at(2).unwrap(), tokens[2]);
}
#[test]
#[should_panic]
fn test_scanner_item_at_empty() {
let tokens = vec![];
let parser = Parser::new(tokens.clone());
assert_eq!(parser.item_at(0).unwrap(), tokens[0]);
}
#[test]
#[should_panic]
fn test_scanner_item_at_end_of_input() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let parser = Parser::new(tokens.clone());
assert_eq!(parser.item_at(3).unwrap(), tokens[0]);
}
#[test]
fn test_scanner_is_at_end() {
let tokens = vec![
Token::new(TokenType::Text, "Text", 1),
Token::new(TokenType::Text, "More Text", 2),
Token::new(TokenType::Eof, "", 3),
];
let mut parser = Parser::new(tokens.clone());
assert!(!parser.is_at_end());
parser.state.current = 2;
assert!(parser.is_at_end());
}
}

View file

@ -1,5 +1,76 @@
use std::fmt::Debug;
use crate::error::ScannerError;
pub struct LexerState {
start: usize,
current: usize,
line: usize,
length: usize,
}
impl LexerState {
pub fn new(length: usize) -> Self {
LexerState {
start: 0,
current: 0,
line: 1,
length,
}
}
pub fn prepare(&mut self) {
self.start = self.current;
}
pub fn advance(&mut self, chars: usize, lines: usize) {
self.current += chars;
self.line += lines;
}
pub fn next(&self) -> usize {
self.current + 1
}
pub fn current(&self) -> usize {
self.current
}
pub fn previous(&self) -> usize {
self.current - 1
}
pub fn current_line(&self) -> usize {
self.line
}
pub fn last_line(&self) -> Result<usize, ScannerError> {
if self.is_at_end() {
Ok(self.line)
} else {
Err(ScannerError::NotAtEnd)
}
}
pub fn length(&self) -> usize {
self.length
}
pub fn is_at_end(&self) -> bool {
self.current >= self.length
}
}
pub struct ParserState {
pub current: usize,
}
impl ParserState {
pub fn new() -> Self {
ParserState { current: 0 }
}
}
pub trait Scanner {
type Item: Debug;
type Error: Debug + std::error::Error;