create djls-ast crate and implement lexer and parser (#8)

This commit is contained in:
Josh Thomas 2024-12-09 17:48:24 -06:00 committed by GitHub
parent 81199d1699
commit f96b3da951
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 2255 additions and 0 deletions

View file

@ -4,6 +4,7 @@ resolver = "2"
[workspace.dependencies] [workspace.dependencies]
djls = { path = "crates/djls" } djls = { path = "crates/djls" }
djls-ast = { path = "crates/djls-ast" }
djls-django = { path = "crates/djls-django" } djls-django = { path = "crates/djls-django" }
djls-python = { path = "crates/djls-python" } djls-python = { path = "crates/djls-python" }

View file

@ -0,0 +1,16 @@
[package]
name = "djls-ast"
version = "0.0.0"
edition = "2021"
[dependencies]
serde = { workspace = true }
thiserror = "2.0.6"
[dev-dependencies]
insta = { version = "1.41.1", features = ["yaml"] }
[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3

225
crates/djls-ast/src/ast.rs Normal file
View file

@ -0,0 +1,225 @@
use serde::Serialize;
use std::collections::BTreeMap;
use std::str::FromStr;
use thiserror::Error;
#[derive(Clone, Debug, Default, Serialize)]
pub struct Ast {
nodes: Vec<Node>,
}
impl Ast {
pub fn nodes(&self) -> &Vec<Node> {
&self.nodes
}
pub fn add_node(&mut self, node: Node) {
self.nodes.push(node);
}
pub fn finalize(&mut self) -> Result<Ast, AstError> {
if self.nodes.is_empty() {
return Err(AstError::EmptyAst);
}
Ok(self.clone())
}
}
#[derive(Clone, Debug, Serialize)]
pub enum Node {
Django(DjangoNode),
Html(HtmlNode),
Script(ScriptNode),
Style(StyleNode),
Text(String),
}
#[derive(Clone, Debug, Serialize)]
pub enum DjangoNode {
Comment(String),
Tag {
kind: DjangoTagKind,
bits: Vec<String>,
children: Vec<Node>,
},
Variable {
bits: Vec<String>,
filters: Vec<DjangoFilter>,
},
}
#[derive(Clone, Debug, Serialize)]
pub enum DjangoTagKind {
Autoescape,
Block,
Comment,
CsrfToken,
Cycle,
Debug,
Elif,
Else,
Empty,
Extends,
Filter,
FirstOf,
For,
If,
IfChanged,
Include,
Load,
Lorem,
Now,
Other(String),
Querystring, // 5.1
Regroup,
ResetCycle,
Spaceless,
TemplateTag,
Url,
Verbatim,
WidthRatio,
With,
}
impl DjangoTagKind {
const AUTOESCAPE: &'static str = "autoescape";
const BLOCK: &'static str = "block";
const COMMENT: &'static str = "comment";
const CSRF_TOKEN: &'static str = "csrf_token";
const CYCLE: &'static str = "cycle";
const DEBUG: &'static str = "debug";
const ELIF: &'static str = "elif";
const ELSE: &'static str = "else";
const EMPTY: &'static str = "empty";
const EXTENDS: &'static str = "extends";
const FILTER: &'static str = "filter";
const FIRST_OF: &'static str = "firstof";
const FOR: &'static str = "for";
const IF: &'static str = "if";
const IF_CHANGED: &'static str = "ifchanged";
const INCLUDE: &'static str = "include";
const LOAD: &'static str = "load";
const LOREM: &'static str = "lorem";
const NOW: &'static str = "now";
const QUERYSTRING: &'static str = "querystring";
const REGROUP: &'static str = "regroup";
const RESET_CYCLE: &'static str = "resetcycle";
const SPACELESS: &'static str = "spaceless";
const TEMPLATE_TAG: &'static str = "templatetag";
const URL: &'static str = "url";
const VERBATIM: &'static str = "verbatim";
const WIDTH_RATIO: &'static str = "widthratio";
const WITH: &'static str = "with";
}
impl FromStr for DjangoTagKind {
type Err = AstError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
return Err(AstError::EmptyTag);
}
match s {
Self::AUTOESCAPE => Ok(Self::Autoescape),
Self::BLOCK => Ok(Self::Block),
Self::COMMENT => Ok(Self::Comment),
Self::CSRF_TOKEN => Ok(Self::CsrfToken),
Self::CYCLE => Ok(Self::Cycle),
Self::DEBUG => Ok(Self::Debug),
Self::ELIF => Ok(Self::Elif),
Self::ELSE => Ok(Self::Else),
Self::EMPTY => Ok(Self::Empty),
Self::EXTENDS => Ok(Self::Extends),
Self::FILTER => Ok(Self::Filter),
Self::FIRST_OF => Ok(Self::FirstOf),
Self::FOR => Ok(Self::For),
Self::IF => Ok(Self::If),
Self::IF_CHANGED => Ok(Self::IfChanged),
Self::INCLUDE => Ok(Self::Include),
Self::LOAD => Ok(Self::Load),
Self::LOREM => Ok(Self::Lorem),
Self::NOW => Ok(Self::Now),
Self::QUERYSTRING => Ok(Self::Querystring),
Self::REGROUP => Ok(Self::Regroup),
Self::RESET_CYCLE => Ok(Self::ResetCycle),
Self::SPACELESS => Ok(Self::Spaceless),
Self::TEMPLATE_TAG => Ok(Self::TemplateTag),
Self::URL => Ok(Self::Url),
Self::VERBATIM => Ok(Self::Verbatim),
Self::WIDTH_RATIO => Ok(Self::WidthRatio),
Self::WITH => Ok(Self::With),
other => Ok(Self::Other(other.to_string())),
}
}
}
#[derive(Clone, Debug, Serialize)]
pub struct DjangoFilter {
name: String,
arguments: Vec<String>,
}
impl DjangoFilter {
pub fn new(name: String, arguments: Vec<String>) -> Self {
Self { name, arguments }
}
}
#[derive(Clone, Debug, Serialize)]
pub enum HtmlNode {
Comment(String),
Doctype(String),
Element {
tag_name: String,
attributes: Attributes,
children: Vec<Node>,
},
Void {
tag_name: String,
attributes: Attributes,
},
}
#[derive(Clone, Debug, Serialize)]
pub enum ScriptNode {
Comment {
content: String,
kind: ScriptCommentKind,
},
Element {
attributes: Attributes,
children: Vec<Node>,
},
}
#[derive(Clone, Debug, Serialize)]
pub enum ScriptCommentKind {
SingleLine, // //
MultiLine, // /* */
}
#[derive(Clone, Debug, Serialize)]
pub enum StyleNode {
Comment(String),
Element {
attributes: Attributes,
children: Vec<Node>,
},
}
#[derive(Clone, Debug, Serialize)]
pub enum AttributeValue {
Value(String),
Boolean,
}
pub type Attributes = BTreeMap<String, AttributeValue>;
#[derive(Error, Debug)]
pub enum AstError {
#[error("error parsing django tag, recieved empty tag name")]
EmptyTag,
#[error("empty ast")]
EmptyAst,
}

View file

@ -0,0 +1,414 @@
use crate::tokens::{Token, TokenStream, TokenType};
use thiserror::Error;
pub struct Lexer {
source: String,
chars: Vec<char>,
start: usize,
current: usize,
line: usize,
}
impl Lexer {
pub fn new(source: &str) -> Self {
Lexer {
source: String::from(source),
chars: source.chars().collect(),
start: 0,
current: 0,
line: 1,
}
}
pub fn tokenize(&mut self) -> Result<TokenStream, LexerError> {
let mut tokens = TokenStream::default();
while !self.is_at_end() {
let token = self.next_token()?;
tokens.add_token(token);
}
tokens.finalize(self.line);
Ok(tokens)
}
fn next_token(&mut self) -> Result<Token, LexerError> {
self.start = self.current;
let token_type = match self.peek()? {
'{' => match self.peek_next()? {
'%' => {
self.consume_n(2)?; // {%
let content = self.consume_until("%}")?;
self.consume_n(2)?; // %}
TokenType::DjangoBlock(content)
}
'{' => {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
let content = self.consume_until("#}")?;
self.consume_n(2)?; // #}
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
Ok(token)
}
fn peek(&self) -> Result<char, LexerError> {
self.peek_at(0)
}
fn peek_next(&self) -> Result<char, LexerError> {
self.peek_at(1)
}
fn peek_previous(&self) -> Result<char, LexerError> {
self.peek_at(-1)
}
fn peek_until(&self, end: &str) -> Result<bool, LexerError> {
let mut index = self.current;
let end_chars: Vec<char> = end.chars().collect();
while index < self.chars.len() {
if self.chars[index..].starts_with(&end_chars) {
return Ok(true);
}
index += 1;
}
Ok(false)
}
fn peek_at(&self, offset: isize) -> Result<char, LexerError> {
let index = self.current as isize + offset;
self.item_at(index as usize)
}
fn item_at(&self, index: usize) -> Result<char, LexerError> {
if index >= self.source.len() {
// Return a null character when past the end, a bit of a departure from
// idiomatic Rust code, but makes writing the matching above and testing
// much easier
Ok('\0')
} else {
Ok(self.source.chars().nth(index).unwrap())
}
}
fn matches(&mut self, pattern: &str) -> Result<bool, LexerError> {
let mut i = self.current;
for c in pattern.chars() {
if i >= self.chars.len() || self.chars[i] != c {
return Ok(false);
}
i += 1;
}
Ok(true)
}
fn is_at_end(&self) -> bool {
self.current >= self.source.len()
}
fn consume(&mut self) -> Result<char, LexerError> {
if self.is_at_end() {
return Err(LexerError::AtEndOfSource);
}
self.current += 1;
self.peek_previous()
}
fn consume_n(&mut self, count: usize) -> Result<String, LexerError> {
let start = self.current;
for _ in 0..count {
self.consume()?;
}
Ok(self.source[start..self.current].trim().to_string())
}
fn consume_chars(&mut self, s: &str) -> Result<char, LexerError> {
for c in s.chars() {
if c != self.peek()? {
return Err(LexerError::UnexpectedCharacter(c, self.line));
}
self.consume()?;
}
self.peek_previous()
}
fn consume_until(&mut self, s: &str) -> Result<String, LexerError> {
let start = self.current;
while !self.is_at_end() {
if self.chars[self.current..self.chars.len()]
.starts_with(s.chars().collect::<Vec<_>>().as_slice())
{
return Ok(self.source[start..self.current].trim().to_string());
}
self.consume()?;
}
Err(LexerError::UnexpectedEndOfInput)
}
}
#[derive(Error, Debug)]
pub enum LexerError {
#[error("empty token at line {0}")]
EmptyToken(usize),
#[error("unexpected character '{0}' at line {1}")]
UnexpectedCharacter(char, usize),
#[error("unexpected end of input")]
UnexpectedEndOfInput,
#[error("source is empty")]
EmptySource,
#[error("at beginning of source")]
AtBeginningOfSource,
#[error("at end of source")]
AtEndOfSource,
#[error("invalid character access")]
InvalidCharacterAccess,
#[error("unexpected token type '{0:?}'")]
UnexpectedTokenType(TokenType),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_html() {
let source = r#"<div class="container" id="main" disabled></div>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_django_variable() {
let source = "{{ user.name|default:\"Anonymous\"|title }}";
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_django_block() {
let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}";
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_comments() {
let source = r#"<!-- HTML comment -->
{# Django comment #}
<script>
// JS single line comment
/* JS multi-line
comment */
</script>
<style>
/* CSS comment */
</style>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_script() {
let source = r#"<script type="text/javascript">
// Single line comment
const x = 1;
/* Multi-line
comment */
console.log(x);
</script>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_style() {
let source = r#"<style type="text/css">
/* Header styles */
.header {
color: blue;
}
</style>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
#[test]
fn test_tokenize_error_cases() {
// Unterminated tokens
assert!(Lexer::new("{{ user.name").tokenize().is_err()); // No closing }}
assert!(Lexer::new("{% if").tokenize().is_err()); // No closing %}
assert!(Lexer::new("{#").tokenize().is_err()); // No closing #}
assert!(Lexer::new("<div").tokenize().is_err()); // No closing >
// Invalid characters or syntax within tokens
assert!(Lexer::new("{{}}").tokenize().is_ok()); // Empty but valid
assert!(Lexer::new("{% %}").tokenize().is_ok()); // Empty but valid
assert!(Lexer::new("{##}").tokenize().is_ok()); // Empty but valid
}
#[test]
fn test_tokenize_nested_delimiters() {
let source = r#"{{ user.name }}
{% if true %}
{# comment #}
<!-- html comment -->
<div>text</div>"#;
assert!(Lexer::new(source).tokenize().is_ok());
}
#[test]
fn test_tokenize_everything() {
let source = r#"<!DOCTYPE html>
<html>
<head>
<style type="text/css">
/* Style header */
.header { color: blue; }
</style>
<script type="text/javascript">
// Init app
const app = {
/* Config */
debug: true
};
</script>
</head>
<body>
<!-- Header section -->
<div class="header" id="main" data-value="123" disabled>
{% if user.is_authenticated %}
{# Welcome message #}
<h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
{% if user.is_staff %}
<span>Admin</span>
{% else %}
<span>User</span>
{% endif %}
{% endif %}
</div>
</body>
</html>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
}
}

View file

@ -0,0 +1,4 @@
mod ast;
mod lexer;
mod parser;
mod tokens;

View file

@ -0,0 +1,638 @@
use crate::ast::{
Ast, AstError, AttributeValue, DjangoFilter, DjangoNode, DjangoTagKind, HtmlNode, Node,
ScriptCommentKind, ScriptNode, StyleNode,
};
use crate::tokens::{Token, TokenStream, TokenType};
use std::collections::BTreeMap;
use std::str::FromStr;
use thiserror::Error;
pub struct Parser {
tokens: TokenStream,
current: usize,
}
impl Parser {
pub fn new(tokens: TokenStream) -> Self {
Parser { tokens, current: 0 }
}
pub fn parse(&mut self) -> Result<Ast, ParserError> {
let mut ast = Ast::default();
while !self.is_at_end() {
let node = self.next_node();
match node {
Ok(node) => {
ast.add_node(node);
}
Err(ParserError::AtEndOfStream) => {
if ast.nodes().is_empty() {
return Err(ParserError::UnexpectedEof);
}
break;
}
Err(_) => {
self.synchronize(&[
TokenType::DjangoBlock(String::new()),
TokenType::HtmlTagOpen(String::new()),
TokenType::HtmlTagVoid(String::new()),
TokenType::ScriptTagOpen(String::new()),
TokenType::StyleTagOpen(String::new()),
TokenType::Newline,
TokenType::Eof,
])?;
continue;
}
}
}
Ok(ast.finalize()?)
}
fn next_node(&mut self) -> Result<Node, ParserError> {
let token = self.peek()?;
let node = match token.token_type() {
TokenType::Comment(s, start, end) => self.parse_comment(s, start, end.as_deref()),
TokenType::DjangoBlock(s) => self.parse_django_block(s),
TokenType::DjangoVariable(s) => self.parse_django_variable(s),
TokenType::Eof => self.parse_eof(),
TokenType::HtmlTagClose(tag) => Err(ParserError::ClosingTagFound(tag.to_string())),
TokenType::HtmlTagOpen(s) => self.parse_html_tag_open(s),
TokenType::HtmlTagVoid(s) => self.parse_html_tag_void(s),
TokenType::Newline => self.parse_newline(),
TokenType::ScriptTagOpen(s) => self.parse_script_tag_open(s),
TokenType::ScriptTagClose(_) => Err(ParserError::ClosingTagFound("script".to_string())),
TokenType::StyleTagOpen(s) => self.parse_style_tag_open(s),
TokenType::StyleTagClose(_) => Err(ParserError::ClosingTagFound("style".to_string())),
TokenType::Text(s) => self.parse_text(s),
TokenType::Whitespace(_) => self.parse_whitespace(),
}?;
Ok(node)
}
fn parse_comment(
&mut self,
content: &str,
start: &str,
end: Option<&str>,
) -> Result<Node, ParserError> {
self.consume()?;
match start {
"{#" => Ok(Node::Django(DjangoNode::Comment(content.to_string()))),
"<!--" => Ok(Node::Html(HtmlNode::Comment(content.to_string()))),
"//" => Ok(Node::Script(ScriptNode::Comment {
content: content.to_string(),
kind: ScriptCommentKind::SingleLine,
})),
"/*" => {
// Look back for script/style context
let token_type = self
.peek_back(self.current)?
.iter()
.find_map(|token| match token.token_type() {
TokenType::ScriptTagOpen(_) => {
Some(TokenType::ScriptTagOpen(String::new()))
}
TokenType::StyleTagOpen(_) => Some(TokenType::StyleTagOpen(String::new())),
TokenType::ScriptTagClose(_) | TokenType::StyleTagClose(_) => None,
_ => None,
})
.ok_or(ParserError::InvalidMultLineComment)?;
match token_type {
TokenType::ScriptTagOpen(_) => Ok(Node::Script(ScriptNode::Comment {
content: content.to_string(),
kind: ScriptCommentKind::MultiLine,
})),
TokenType::StyleTagOpen(_) => {
Ok(Node::Style(StyleNode::Comment(content.to_string())))
}
_ => unreachable!(),
}
}
_ => Err(ParserError::UnexpectedToken(Token::new(
TokenType::Comment(
content.to_string(),
start.to_string(),
end.map(String::from),
),
0,
None,
))),
}
}
fn parse_django_block(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let bits: Vec<String> = s.split_whitespace().map(String::from).collect();
let kind = DjangoTagKind::from_str(&bits[0])?;
// If this is an end tag, signal it like we do with HTML closing tags
if bits[0].starts_with("end") {
return Err(ParserError::ClosingTagFound(bits[0].clone()));
}
let mut children = Vec::new();
let end_tag = format!("end{}", bits[0]);
while !self.is_at_end() {
match self.next_node() {
Ok(node) => {
println!("found django child node: {:?}", node);
children.push(node);
}
Err(ParserError::ClosingTagFound(tag)) => {
if tag == end_tag {
self.consume()?;
break;
}
// If it's not our end tag, keep collecting children
}
Err(e) => return Err(e),
}
}
Ok(Node::Django(DjangoNode::Tag {
kind,
bits,
children,
}))
}
fn parse_django_variable(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let parts: Vec<&str> = s.split('|').collect();
let bits: Vec<String> = parts[0].trim().split('.').map(String::from).collect();
let filters: Vec<DjangoFilter> = parts[1..]
.iter()
.map(|filter_str| {
let filter_parts: Vec<&str> = filter_str.trim().split(':').collect();
let name = filter_parts[0].to_string();
let arguments = if filter_parts.len() > 1 {
filter_parts[1]
.trim_matches('"')
.split(',')
.map(|arg| arg.trim().to_string())
.collect()
} else {
Vec::new()
};
DjangoFilter::new(name, arguments)
})
.collect();
Ok(Node::Django(DjangoNode::Variable { bits, filters }))
}
fn parse_eof(&mut self) -> Result<Node, ParserError> {
if self.is_at_end() {
self.consume()?;
self.next_node()
} else {
Err(ParserError::UnexpectedEof)
}
}
fn parse_html_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let mut parts = s.split_whitespace();
let tag_name = parts
.next()
.ok_or(ParserError::InvalidTokenAccess)?
.to_string();
let mut attributes = BTreeMap::new();
for attr in parts {
if let Some((key, value)) = attr.split_once('=') {
// Key-value attribute (class="container")
attributes.insert(
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
// Boolean attribute (disabled)
attributes.insert(attr.to_string(), AttributeValue::Boolean);
}
}
let mut children = Vec::new();
while !self.is_at_end() {
match self.next_node() {
Ok(node) => {
children.push(node);
}
Err(ParserError::ClosingTagFound(tag)) => {
if tag == tag_name {
self.consume()?;
break;
}
}
Err(e) => return Err(e),
}
}
Ok(Node::Html(HtmlNode::Element {
tag_name,
attributes,
children,
}))
}
fn parse_html_tag_void(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let mut parts = s.split_whitespace();
let tag_name = parts
.next()
.ok_or(ParserError::InvalidTokenAccess)?
.to_string();
let mut attributes = BTreeMap::new();
for attr in parts {
if let Some((key, value)) = attr.split_once('=') {
attributes.insert(
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
}
}
Ok(Node::Html(HtmlNode::Void {
tag_name,
attributes,
}))
}
fn parse_newline(&mut self) -> Result<Node, ParserError> {
self.consume()?;
self.next_node()
}
fn parse_script_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let parts = s.split_whitespace();
let mut attributes = BTreeMap::new();
for attr in parts {
if let Some((key, value)) = attr.split_once('=') {
attributes.insert(
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
}
}
let mut children = Vec::new();
while !self.is_at_end() {
match self.next_node() {
Ok(node) => {
children.push(node);
}
Err(ParserError::ClosingTagFound(tag)) => {
if tag == "script" {
self.consume()?;
break;
}
// If it's not our closing tag, keep collecting children
}
Err(e) => return Err(e),
}
}
Ok(Node::Script(ScriptNode::Element {
attributes,
children,
}))
}
fn parse_style_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
let mut parts = s.split_whitespace();
let _tag_name = parts
.next()
.ok_or(ParserError::InvalidTokenAccess)?
.to_string();
let mut attributes = BTreeMap::new();
for attr in parts {
if let Some((key, value)) = attr.split_once('=') {
attributes.insert(
key.to_string(),
AttributeValue::Value(value.trim_matches('"').to_string()),
);
} else {
attributes.insert(attr.to_string(), AttributeValue::Boolean);
}
}
let mut children = Vec::new();
while !self.is_at_end() {
match self.next_node() {
Ok(node) => {
children.push(node);
}
Err(ParserError::ClosingTagFound(tag)) => {
if tag == "style" {
self.consume()?;
break;
}
// If it's not our closing tag, keep collecting children
}
Err(e) => return Err(e),
}
}
Ok(Node::Style(StyleNode::Element {
attributes,
children,
}))
}
fn parse_text(&mut self, s: &str) -> Result<Node, ParserError> {
self.consume()?;
Ok(Node::Text(s.to_string()))
}
fn parse_whitespace(&mut self) -> Result<Node, ParserError> {
self.consume()?;
self.next_node()
}
fn peek(&self) -> Result<Token, ParserError> {
self.peek_at(0)
}
fn peek_next(&self) -> Result<Token, ParserError> {
self.peek_at(1)
}
fn peek_previous(&self) -> Result<Token, ParserError> {
self.peek_at(-1)
}
fn peek_forward(&self, steps: usize) -> Result<Vec<Token>, ParserError> {
(0..steps).map(|i| self.peek_at(i as isize)).collect()
}
fn peek_back(&self, steps: usize) -> Result<Vec<Token>, ParserError> {
(1..=steps).map(|i| self.peek_at(-(i as isize))).collect()
}
fn peek_at(&self, offset: isize) -> Result<Token, ParserError> {
let index = self.current as isize + offset;
self.item_at(index as usize)
}
fn item_at(&self, index: usize) -> Result<Token, ParserError> {
if let Some(token) = self.tokens.get(index) {
Ok(token.clone())
} else {
let error = if self.tokens.is_empty() {
ParserError::EmptyTokenStream
} else if index < self.current {
ParserError::AtBeginningOfStream
} else if index >= self.tokens.len() {
ParserError::AtEndOfStream
} else {
ParserError::InvalidTokenAccess
};
Err(error)
}
}
fn is_at_end(&self) -> bool {
self.current + 1 >= self.tokens.len()
}
fn consume(&mut self) -> Result<Token, ParserError> {
if self.is_at_end() {
return Err(ParserError::AtEndOfStream);
}
self.current += 1;
self.peek_previous()
}
fn backtrack(&mut self, steps: usize) -> Result<Token, ParserError> {
if self.current < steps {
return Err(ParserError::AtBeginningOfStream);
}
self.current -= steps;
self.peek_next()
}
fn lookahead(&self, types: &[TokenType]) -> Result<bool, ParserError> {
for (i, t) in types.iter().enumerate() {
if !self.peek_at(i as isize)?.is_token_type(t) {
return Ok(false);
}
}
Ok(true)
}
fn consume_if(&mut self, token_type: TokenType) -> Result<Token, ParserError> {
let token = self.peek()?;
if token.is_token_type(&token_type) {
return Err(ParserError::ExpectedTokenType(
self.peek()?.clone(),
token_type,
));
}
self.consume()?;
Ok(token)
}
fn consume_until(&mut self, end_type: TokenType) -> Result<Vec<Token>, ParserError> {
let mut consumed = Vec::new();
while !self.is_at_end() && self.peek()?.is_token_type(&end_type) {
let token = self.consume()?;
consumed.push(token);
}
Ok(consumed)
}
fn synchronize(&mut self, sync_types: &[TokenType]) -> Result<(), ParserError> {
while !self.is_at_end() {
if sync_types.contains(self.peek()?.token_type()) {
return Ok(());
}
self.consume()?;
}
Ok(())
}
}
#[derive(Error, Debug)]
pub enum ParserError {
#[error("token stream is empty")]
EmptyTokenStream,
#[error("at beginning of token stream")]
AtBeginningOfStream,
#[error("at end of token stream")]
AtEndOfStream,
#[error("invalid token access")]
InvalidTokenAccess,
#[error("unexpected token '{0:?}', expected type '{1:?}'")]
ExpectedTokenType(Token, TokenType),
#[error("unexpected token '{0:?}'")]
UnexpectedToken(Token),
#[error("unexpected end tag: {0}")]
UnexpectedEndTag(String),
#[error("multi-line comment outside of script or style context")]
InvalidMultLineComment,
#[error("unexpected end of file")]
UnexpectedEof,
#[error("found closing tag: {0}")]
ClosingTagFound(String),
#[error(transparent)]
Node(#[from] AstError),
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexer::Lexer;
#[test]
fn test_parse_comments() {
let source = r#"<!-- HTML comment -->
{# Django comment #}
<script>
// JS single line
/* JS multi
line */
</script>
<style>
/* CSS comment */
</style>"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_django_block() {
let source = r#"{% if user.is_staff %}Admin{% else %}User{% endif %}"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_django_variable() {
let source = r#"{{ user.name|default:"Anonymous"|title }}"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_html_tag() {
let source = r#"<div class="container" id="main" disabled></div>"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_script() {
let source = r#"<script type="text/javascript">
// Single line comment
const x = 1;
/* Multi-line
comment */
console.log(x);
</script>"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_style() {
let source = r#"<style type="text/css">
/* Header styles */
.header {
color: blue;
}
</style>"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
fn test_parse_full() {
let source = r#"<!DOCTYPE html>
<html>
<head>
<style type="text/css">
/* Style header */
.header { color: blue; }
</style>
<script type="text/javascript">
// Init app
const app = {
/* Config */
debug: true
};
</script>
</head>
<body>
<!-- Header section -->
<div class="header" id="main" data-value="123" disabled>
{% if user.is_authenticated %}
{# Welcome message #}
<h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
{% if user.is_staff %}
<span>Admin</span>
{% else %}
<span>User</span>
{% endif %}
{% endif %}
</div>
</body>
</html>"#;
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
insta::assert_yaml_snapshot!(ast);
}
#[test]
fn test_parse_unexpected_eof() {
let source = "<div>\n";
let tokens = Lexer::new(source).tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse();
assert!(matches!(ast, Err(ParserError::UnexpectedEof)));
}
}

View file

@ -0,0 +1,94 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
Comment:
- HTML comment
- "<!--"
- "-->"
line: 1
start: 0
- token_type: Newline
line: 1
start: 21
- token_type:
Comment:
- Django comment
- "{#"
- "#}"
line: 2
start: 22
- token_type: Newline
line: 2
start: 42
- token_type:
ScriptTagOpen: script
line: 3
start: 43
- token_type: Newline
line: 3
start: 51
- token_type:
Whitespace: 4
line: 4
start: 52
- token_type:
Comment:
- JS single line comment
- //
- ~
line: 4
start: 56
- token_type: Newline
line: 4
start: 81
- token_type:
Whitespace: 4
line: 5
start: 82
- token_type:
Comment:
- "JS multi-line\n comment"
- /*
- "*/"
line: 5
start: 86
- token_type: Newline
line: 5
start: 120
- token_type:
HtmlTagClose: script
line: 6
start: 121
- token_type: Newline
line: 6
start: 130
- token_type:
StyleTagOpen: style
line: 7
start: 131
- token_type: Newline
line: 7
start: 138
- token_type:
Whitespace: 4
line: 8
start: 139
- token_type:
Comment:
- CSS comment
- /*
- "*/"
line: 8
start: 143
- token_type: Newline
line: 8
start: 160
- token_type:
HtmlTagClose: style
line: 9
start: 161
- token_type: Eof
line: 9
start: ~

View file

@ -0,0 +1,27 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
DjangoBlock: if user.is_staff
line: 1
start: 0
- token_type:
Text: Admin
line: 1
start: 22
- token_type:
DjangoBlock: else
line: 1
start: 27
- token_type:
Text: User
line: 1
start: 37
- token_type:
DjangoBlock: endif
line: 1
start: 41
- token_type: Eof
line: 1
start: ~

View file

@ -0,0 +1,11 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
DjangoVariable: "user.name|default:\"Anonymous\"|title"
line: 1
start: 0
- token_type: Eof
line: 1
start: ~

View file

@ -0,0 +1,369 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
HtmlTagOpen: "!DOCTYPE html"
line: 1
start: 0
- token_type: Newline
line: 1
start: 15
- token_type:
HtmlTagOpen: html
line: 2
start: 16
- token_type: Newline
line: 2
start: 22
- token_type:
HtmlTagOpen: head
line: 3
start: 23
- token_type: Newline
line: 3
start: 29
- token_type:
Whitespace: 4
line: 4
start: 30
- token_type:
StyleTagOpen: "style type=\"text/css\""
line: 4
start: 34
- token_type: Newline
line: 4
start: 57
- token_type:
Whitespace: 8
line: 5
start: 58
- token_type:
Comment:
- Style header
- /*
- "*/"
line: 5
start: 66
- token_type: Newline
line: 5
start: 84
- token_type:
Whitespace: 8
line: 6
start: 85
- token_type:
Text: ".header "
line: 6
start: 93
- token_type:
Text: "{"
line: 6
start: 101
- token_type:
Whitespace: 1
line: 6
start: 102
- token_type:
Text: "color: blue; }"
line: 6
start: 103
- token_type: Newline
line: 6
start: 117
- token_type:
Whitespace: 4
line: 7
start: 118
- token_type:
HtmlTagClose: style
line: 7
start: 122
- token_type: Newline
line: 7
start: 130
- token_type:
Whitespace: 4
line: 8
start: 131
- token_type:
ScriptTagOpen: "script type=\"text/javascript\""
line: 8
start: 135
- token_type: Newline
line: 8
start: 166
- token_type:
Whitespace: 8
line: 9
start: 167
- token_type:
Comment:
- Init app
- //
- ~
line: 9
start: 175
- token_type: Newline
line: 9
start: 186
- token_type:
Whitespace: 8
line: 10
start: 187
- token_type:
Text: "const app = "
line: 10
start: 195
- token_type:
Text: "{"
line: 10
start: 207
- token_type: Newline
line: 10
start: 208
- token_type:
Whitespace: 12
line: 11
start: 209
- token_type:
Comment:
- Config
- /*
- "*/"
line: 11
start: 221
- token_type: Newline
line: 11
start: 233
- token_type:
Whitespace: 12
line: 12
start: 234
- token_type:
Text: "debug: true"
line: 12
start: 246
- token_type: Newline
line: 12
start: 257
- token_type:
Whitespace: 8
line: 13
start: 258
- token_type:
Text: "};"
line: 13
start: 266
- token_type: Newline
line: 13
start: 268
- token_type:
Whitespace: 4
line: 14
start: 269
- token_type:
HtmlTagClose: script
line: 14
start: 273
- token_type: Newline
line: 14
start: 282
- token_type:
HtmlTagClose: head
line: 15
start: 283
- token_type: Newline
line: 15
start: 290
- token_type:
HtmlTagOpen: body
line: 16
start: 291
- token_type: Newline
line: 16
start: 297
- token_type:
Whitespace: 4
line: 17
start: 298
- token_type:
Comment:
- Header section
- "<!--"
- "-->"
line: 17
start: 302
- token_type: Newline
line: 17
start: 325
- token_type:
Whitespace: 4
line: 18
start: 326
- token_type:
HtmlTagOpen: "div class=\"header\" id=\"main\" data-value=\"123\" disabled"
line: 18
start: 330
- token_type: Newline
line: 18
start: 386
- token_type:
Whitespace: 8
line: 19
start: 387
- token_type:
DjangoBlock: if user.is_authenticated
line: 19
start: 395
- token_type: Newline
line: 19
start: 425
- token_type:
Whitespace: 12
line: 20
start: 426
- token_type:
Comment:
- Welcome message
- "{#"
- "#}"
line: 20
start: 438
- token_type: Newline
line: 20
start: 459
- token_type:
Whitespace: 12
line: 21
start: 460
- token_type:
HtmlTagOpen: h1
line: 21
start: 472
- token_type:
Text: "Welcome, "
line: 21
start: 476
- token_type:
DjangoVariable: "user.name|default:\"Guest\"|title"
line: 21
start: 485
- token_type:
Text: "!"
line: 21
start: 522
- token_type:
HtmlTagClose: h1
line: 21
start: 523
- token_type: Newline
line: 21
start: 528
- token_type:
Whitespace: 12
line: 22
start: 529
- token_type:
DjangoBlock: if user.is_staff
line: 22
start: 541
- token_type: Newline
line: 22
start: 563
- token_type:
Whitespace: 16
line: 23
start: 564
- token_type:
HtmlTagOpen: span
line: 23
start: 580
- token_type:
Text: Admin
line: 23
start: 586
- token_type:
HtmlTagClose: span
line: 23
start: 591
- token_type: Newline
line: 23
start: 598
- token_type:
Whitespace: 12
line: 24
start: 599
- token_type:
DjangoBlock: else
line: 24
start: 611
- token_type: Newline
line: 24
start: 621
- token_type:
Whitespace: 16
line: 25
start: 622
- token_type:
HtmlTagOpen: span
line: 25
start: 638
- token_type:
Text: User
line: 25
start: 644
- token_type:
HtmlTagClose: span
line: 25
start: 648
- token_type: Newline
line: 25
start: 655
- token_type:
Whitespace: 12
line: 26
start: 656
- token_type:
DjangoBlock: endif
line: 26
start: 668
- token_type: Newline
line: 26
start: 679
- token_type:
Whitespace: 8
line: 27
start: 680
- token_type:
DjangoBlock: endif
line: 27
start: 688
- token_type: Newline
line: 27
start: 699
- token_type:
Whitespace: 4
line: 28
start: 700
- token_type:
HtmlTagClose: div
line: 28
start: 704
- token_type: Newline
line: 28
start: 710
- token_type:
HtmlTagClose: body
line: 29
start: 711
- token_type: Newline
line: 29
start: 718
- token_type:
HtmlTagClose: html
line: 30
start: 719
- token_type: Eof
line: 30
start: ~

View file

@ -0,0 +1,15 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
HtmlTagOpen: "div class=\"container\" id=\"main\" disabled"
line: 1
start: 0
- token_type:
HtmlTagClose: div
line: 1
start: 42
- token_type: Eof
line: 1
start: ~

View file

@ -0,0 +1,68 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
ScriptTagOpen: "script type=\"text/javascript\""
line: 1
start: 0
- token_type: Newline
line: 1
start: 31
- token_type:
Whitespace: 4
line: 2
start: 32
- token_type:
Comment:
- Single line comment
- //
- ~
line: 2
start: 36
- token_type: Newline
line: 2
start: 58
- token_type:
Whitespace: 4
line: 3
start: 59
- token_type:
Text: const x = 1;
line: 3
start: 63
- token_type: Newline
line: 3
start: 75
- token_type:
Whitespace: 4
line: 4
start: 76
- token_type:
Comment:
- "Multi-line\n comment"
- /*
- "*/"
line: 4
start: 80
- token_type: Newline
line: 4
start: 111
- token_type:
Whitespace: 4
line: 5
start: 112
- token_type:
Text: console.log(x);
line: 5
start: 116
- token_type: Newline
line: 5
start: 131
- token_type:
HtmlTagClose: script
line: 6
start: 132
- token_type: Eof
line: 6
start: ~

View file

@ -0,0 +1,69 @@
---
source: crates/djls-ast/src/lexer.rs
expression: tokens
---
- token_type:
StyleTagOpen: "style type=\"text/css\""
line: 1
start: 0
- token_type: Newline
line: 1
start: 23
- token_type:
Whitespace: 4
line: 2
start: 24
- token_type:
Comment:
- Header styles
- /*
- "*/"
line: 2
start: 28
- token_type: Newline
line: 2
start: 47
- token_type:
Whitespace: 4
line: 3
start: 48
- token_type:
Text: ".header "
line: 3
start: 52
- token_type:
Text: "{"
line: 3
start: 60
- token_type: Newline
line: 3
start: 61
- token_type:
Whitespace: 8
line: 4
start: 62
- token_type:
Text: "color: blue;"
line: 4
start: 70
- token_type: Newline
line: 4
start: 82
- token_type:
Whitespace: 4
line: 5
start: 83
- token_type:
Text: "}"
line: 5
start: 87
- token_type: Newline
line: 5
start: 88
- token_type:
HtmlTagClose: style
line: 6
start: 89
- token_type: Eof
line: 6
start: ~

View file

@ -0,0 +1,28 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Html:
Comment: HTML comment
- Django:
Comment: Django comment
- Script:
Element:
attributes:
script: Boolean
children:
- Script:
Comment:
content: JS single line
kind: SingleLine
- Script:
Comment:
content: "JS multi\n line"
kind: MultiLine
- Style:
Element:
attributes: {}
children:
- Style:
Comment: CSS comment

View file

@ -0,0 +1,20 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Django:
Tag:
kind: If
bits:
- if
- user.is_staff
children:
- Text: Admin
- Django:
Tag:
kind: Else
bits:
- else
children:
- Text: User

View file

@ -0,0 +1,16 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Django:
Variable:
bits:
- user
- name
filters:
- name: default
arguments:
- Anonymous
- name: title
arguments: []

View file

@ -0,0 +1,15 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Html:
Element:
tag_name: div
attributes:
class:
Value: container
disabled: Boolean
id:
Value: main
children: []

View file

@ -0,0 +1,22 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Script:
Element:
attributes:
script: Boolean
type:
Value: text/javascript
children:
- Script:
Comment:
content: Single line comment
kind: SingleLine
- Text: const x = 1;
- Script:
Comment:
content: "Multi-line\n comment"
kind: MultiLine
- Text: console.log(x);

View file

@ -0,0 +1,17 @@
---
source: crates/djls-ast/src/parser.rs
expression: ast
---
nodes:
- Style:
Element:
attributes:
type:
Value: text/css
children:
- Style:
Comment: Header styles
- Text: ".header "
- Text: "{"
- Text: "color: blue;"
- Text: "}"

View file

@ -0,0 +1,186 @@
use serde::Serialize;
use std::fmt;
use std::ops::{Deref, DerefMut};
#[derive(Clone, Debug, Serialize, PartialEq)]
pub enum TokenType {
Comment(String, String, Option<String>),
DjangoBlock(String),
DjangoVariable(String),
Eof,
HtmlTagOpen(String),
HtmlTagClose(String),
HtmlTagVoid(String),
Newline,
ScriptTagOpen(String),
ScriptTagClose(String),
StyleTagOpen(String),
StyleTagClose(String),
Text(String),
Whitespace(usize),
}
impl TokenType {
pub fn len(&self) -> Option<usize> {
match self {
TokenType::DjangoBlock(s)
| TokenType::DjangoVariable(s)
| TokenType::HtmlTagOpen(s)
| TokenType::HtmlTagClose(s)
| TokenType::HtmlTagVoid(s)
| TokenType::ScriptTagOpen(s)
| TokenType::ScriptTagClose(s)
| TokenType::StyleTagOpen(s)
| TokenType::StyleTagClose(s)
| TokenType::Text(s) => Some(s.len()),
TokenType::Comment(content, start, end) => {
Some(content.len() + start.len() + end.as_ref().map_or(0, |e| e.len()))
}
TokenType::Whitespace(len) => Some(len.clone()),
TokenType::Newline => Some(1),
TokenType::Eof => None,
}
}
pub fn lexeme(&self) -> &str {
match self {
TokenType::DjangoBlock(s)
| TokenType::DjangoVariable(s)
| TokenType::HtmlTagOpen(s)
| TokenType::HtmlTagClose(s)
| TokenType::HtmlTagVoid(s)
| TokenType::ScriptTagOpen(s)
| TokenType::ScriptTagClose(s)
| TokenType::StyleTagOpen(s)
| TokenType::StyleTagClose(s)
| TokenType::Text(s) => s,
TokenType::Comment(content, _, _) => content, // Just return the content
TokenType::Whitespace(_) => " ",
TokenType::Newline => "\n",
TokenType::Eof => "",
}
}
}
impl fmt::Display for TokenType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use TokenType::*;
match self {
Comment(content, start, end) => match end {
Some(end) => write!(f, "{}{}{}", start, content, end),
None => write!(f, "{}{}", start, content),
},
DjangoBlock(s) => write!(f, "{{% {} %}}", s),
DjangoVariable(s) => write!(f, "{{{{ {} }}}}", s),
Eof => Ok(()),
HtmlTagOpen(s) => write!(f, "<{}>", s),
HtmlTagClose(s) => write!(f, "</{}>", s),
HtmlTagVoid(s) => write!(f, "<{}/>", s),
Newline => f.write_str("\n"),
ScriptTagOpen(s) => write!(f, "<script{}>", s),
ScriptTagClose(_) => f.write_str("</script>"),
StyleTagOpen(s) => write!(f, "<style{}>", s),
StyleTagClose(_) => f.write_str("</style>"),
Text(s) => f.write_str(s),
Whitespace(len) => f.write_str(&" ".repeat(*len)),
}
}
}
#[derive(Clone, Debug, Serialize, PartialEq)]
pub struct Token {
token_type: TokenType,
line: usize,
start: Option<usize>,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, start: Option<usize>) -> Self {
Self {
token_type,
line,
start,
}
}
pub fn lexeme_from_source<'a>(&self, source: &'a str) -> Option<&'a str> {
match (self.start, self.token_type.len()) {
(Some(start), Some(len)) => Some(&source[start..start + len]),
_ => None,
}
}
pub fn lexeme(&self) -> &str {
self.token_type.lexeme()
}
pub fn token_type(&self) -> &TokenType {
&self.token_type
}
pub fn is_token_type(&self, token_type: &TokenType) -> bool {
&self.token_type == token_type
}
}
#[derive(Clone, Debug, Default, Serialize)]
pub struct TokenStream(Vec<Token>);
impl TokenStream {
pub fn tokens(&self) -> &Vec<Token> {
&self.0
}
pub fn add_token(&mut self, token: Token) {
self.0.push(token);
}
pub fn finalize(&mut self, line: usize) -> TokenStream {
let eof_token = Token {
token_type: TokenType::Eof,
line,
start: None,
};
self.add_token(eof_token);
self.clone()
}
}
impl AsRef<[Token]> for TokenStream {
fn as_ref(&self) -> &[Token] {
&self.0
}
}
impl Deref for TokenStream {
type Target = Vec<Token>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for TokenStream {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl IntoIterator for TokenStream {
type Item = Token;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'a> IntoIterator for &'a TokenStream {
type Item = &'a Token;
type IntoIter = std::slice::Iter<'a, Token>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}