This commit is contained in:
Josh Thomas 2025-01-06 16:06:47 -06:00
parent b10df9b8ca
commit a078faabf7
14 changed files with 214 additions and 218 deletions

View file

@ -97,7 +97,7 @@ impl From<Token> for Span {
let token_start = token.start().unwrap_or(0); let token_start = token.start().unwrap_or(0);
match token.token_type() { match token.token_type() {
TokenType::Comment(_, start, _) => token_start + start.len() as u32, TokenType::Comment(_, start, _) => token_start + start.len() as u32,
TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) => token_start + 2, TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) => token_start + 3,
_ => token_start, _ => token_start,
} }
}; };

View file

@ -22,140 +22,136 @@ impl Lexer {
pub fn tokenize(&mut self) -> Result<TokenStream, LexerError> { pub fn tokenize(&mut self) -> Result<TokenStream, LexerError> {
let mut tokens = TokenStream::default(); let mut tokens = TokenStream::default();
while !self.is_at_end() { while !self.is_at_end() {
let token = self.next_token()?; self.start = self.current;
let token_type = match self.peek()? {
'{' => match self.peek_next()? {
'%' => {
self.consume_n(2)?; // {%
let content = self.consume_until("%}")?;
self.consume_n(2)?; // %}
TokenType::DjangoBlock(content)
}
'{' => {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
let content = self.consume_until("#}")?;
self.consume_n(2)?; // #}
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
tokens.add_token(token); tokens.add_token(token);
} }
tokens.finalize(self.line); tokens.finalize(self.line);
Ok(tokens) Ok(tokens)
} }
fn next_token(&mut self) -> Result<Token, LexerError> {
self.start = self.current;
let token_type = match self.peek()? {
'{' => match self.peek_next()? {
'%' => {
self.consume_n(2)?; // {%
let content = self.consume_until("%}")?;
self.consume_n(2)?; // %}
TokenType::DjangoBlock(content)
}
'{' => {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
let content = self.consume_until("#}")?;
self.consume_n(2)?; // #}
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
Ok(token)
}
fn peek(&self) -> Result<char, LexerError> { fn peek(&self) -> Result<char, LexerError> {
self.peek_at(0) self.peek_at(0)
} }

View file

@ -11,7 +11,7 @@ nodes:
- Comment: - Comment:
content: Django comment content: Django comment
span: span:
start: 21 start: 23
length: 14 length: 14
line_offsets: line_offsets:
- 0 - 0

View file

@ -13,10 +13,10 @@ nodes:
- ">" - ">"
- "0" - "0"
span: span:
start: 0 start: 3
length: 8 length: 8
tag_span: tag_span:
start: 0 start: 3
length: 8 length: 8
assignment: ~ assignment: ~
nodes: nodes:
@ -35,10 +35,10 @@ nodes:
- "<" - "<"
- "0" - "0"
span: span:
start: 22 start: 25
length: 10 length: 10
tag_span: tag_span:
start: 22 start: 25
length: 10 length: 10
assignment: ~ assignment: ~
nodes: nodes:
@ -54,10 +54,10 @@ nodes:
bits: bits:
- else - else
span: span:
start: 46 start: 49
length: 4 length: 4
tag_span: tag_span:
start: 46 start: 49
length: 4 length: 4
assignment: ~ assignment: ~
nodes: nodes:
@ -73,10 +73,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 60 start: 63
length: 5 length: 5
tag_span: tag_span:
start: 60 start: 63
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in - in
- items - items
span: span:
start: 0 start: 3
length: 17 length: 17
tag_span: tag_span:
start: 0 start: 3
length: 17 length: 17
assignment: ~ assignment: ~
nodes: nodes:
@ -34,10 +34,10 @@ nodes:
bits: bits:
- empty - empty
span: span:
start: 33 start: 36
length: 5 length: 5
tag_span: tag_span:
start: 33 start: 36
length: 5 length: 5
assignment: ~ assignment: ~
nodes: nodes:
@ -53,10 +53,10 @@ nodes:
bits: bits:
- endfor - endfor
span: span:
start: 52 start: 55
length: 6 length: 6
tag_span: tag_span:
start: 52 start: 55
length: 6 length: 6
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -11,10 +11,10 @@ nodes:
- if - if
- user.is_authenticated - user.is_authenticated
span: span:
start: 0 start: 3
length: 24 length: 24
tag_span: tag_span:
start: 0 start: 3
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:
@ -30,10 +30,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 37 start: 40
length: 5 length: 5
tag_span: tag_span:
start: 37 start: 40
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -11,8 +11,8 @@ nodes:
- name: title - name: title
args: [] args: []
span: span:
start: 4 start: 10
length: 15 length: 5
span: span:
start: 3 start: 3
length: 15 length: 15

View file

@ -11,18 +11,18 @@ nodes:
args: args:
- "'nothing'" - "'nothing'"
span: span:
start: 4 start: 6
length: 35 length: 17
- name: title - name: title
args: [] args: []
span: span:
start: 4 start: 24
length: 35 length: 5
- name: upper - name: upper
args: [] args: []
span: span:
start: 4 start: 30
length: 35 length: 5
span: span:
start: 3 start: 3
length: 35 length: 35

View file

@ -16,10 +16,10 @@ nodes:
- if - if
- user.is_authenticated - user.is_authenticated
span: span:
start: 9 start: 12
length: 24 length: 24
tag_span: tag_span:
start: 9 start: 12
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:
@ -31,14 +31,14 @@ nodes:
- name: title - name: title
args: [] args: []
span: span:
start: 48 start: 54
length: 31 length: 5
- name: default - name: default
args: args:
- "'Guest'" - "'Guest'"
span: span:
start: 48 start: 60
length: 31 length: 15
span: span:
start: 47 start: 47
length: 31 length: 31
@ -52,10 +52,10 @@ nodes:
- in - in
- user.groups - user.groups
span: span:
start: 86 start: 89
length: 24 length: 24
tag_span: tag_span:
start: 86 start: 89
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:
@ -67,10 +67,10 @@ nodes:
- if - if
- forloop.first - forloop.first
span: span:
start: 125 start: 128
length: 16 length: 16
tag_span: tag_span:
start: 125 start: 128
length: 16 length: 16
assignment: ~ assignment: ~
nodes: nodes:
@ -86,10 +86,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 148 start: 151
length: 5 length: 5
tag_span: tag_span:
start: 148 start: 151
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -110,10 +110,10 @@ nodes:
- not - not
- forloop.last - forloop.last
span: span:
start: 193 start: 196
length: 19 length: 19
tag_span: tag_span:
start: 193 start: 196
length: 19 length: 19
assignment: ~ assignment: ~
nodes: nodes:
@ -129,10 +129,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 220 start: 223
length: 5 length: 5
tag_span: tag_span:
start: 220 start: 223
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -144,10 +144,10 @@ nodes:
- if - if
- forloop.last - forloop.last
span: span:
start: 240 start: 243
length: 15 length: 15
tag_span: tag_span:
start: 240 start: 243
length: 15 length: 15
assignment: ~ assignment: ~
nodes: nodes:
@ -163,10 +163,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 262 start: 265
length: 5 length: 5
tag_span: tag_span:
start: 262 start: 265
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -177,10 +177,10 @@ nodes:
bits: bits:
- empty - empty
span: span:
start: 278 start: 281
length: 5 length: 5
tag_span: tag_span:
start: 278 start: 281
length: 5 length: 5
assignment: ~ assignment: ~
nodes: nodes:
@ -196,10 +196,10 @@ nodes:
bits: bits:
- endfor - endfor
span: span:
start: 314 start: 317
length: 6 length: 6
tag_span: tag_span:
start: 314 start: 317
length: 6 length: 6
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -210,10 +210,10 @@ nodes:
bits: bits:
- else - else
span: span:
start: 327 start: 330
length: 4 length: 4
tag_span: tag_span:
start: 327 start: 330
length: 4 length: 4
assignment: ~ assignment: ~
nodes: nodes:
@ -229,10 +229,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 348 start: 351
length: 5 length: 5
tag_span: tag_span:
start: 348 start: 351
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in - in
- items - items
span: span:
start: 0 start: 3
length: 17 length: 17
tag_span: tag_span:
start: 0 start: 3
length: 17 length: 17
assignment: ~ assignment: ~
nodes: nodes:
@ -28,10 +28,10 @@ nodes:
- if - if
- item.active - item.active
span: span:
start: 23 start: 26
length: 14 length: 14
tag_span: tag_span:
start: 23 start: 26
length: 14 length: 14
assignment: ~ assignment: ~
nodes: nodes:
@ -50,10 +50,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 58 start: 61
length: 5 length: 5
tag_span: tag_span:
start: 58 start: 61
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -64,10 +64,10 @@ nodes:
bits: bits:
- endfor - endfor
span: span:
start: 69 start: 72
length: 6 length: 6
tag_span: tag_span:
start: 69 start: 72
length: 6 length: 6
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -21,17 +21,17 @@ nodes:
- if - if
- user.is_authenticated - user.is_authenticated
span: span:
start: 48 start: 51
length: 24 length: 24
tag_span: tag_span:
start: 48 start: 51
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:
- Comment: - Comment:
content: This if is unclosed which does matter content: This if is unclosed which does matter
span: span:
start: 87 start: 89
length: 37 length: 37
- Text: - Text:
content: "<p>Welcome" content: "<p>Welcome"
@ -59,7 +59,7 @@ nodes:
- Comment: - Comment:
content: "This div is unclosed which doesn't matter" content: "This div is unclosed which doesn't matter"
span: span:
start: 196 start: 198
length: 41 length: 41
- Block: - Block:
Block: Block:
@ -71,10 +71,10 @@ nodes:
- in - in
- items - items
span: span:
start: 252 start: 255
length: 17 length: 17
tag_span: tag_span:
start: 252 start: 255
length: 17 length: 17
assignment: ~ assignment: ~
nodes: nodes:
@ -102,10 +102,10 @@ nodes:
bits: bits:
- endfor - endfor
span: span:
start: 320 start: 323
length: 6 length: 6
tag_span: tag_span:
start: 320 start: 323
length: 6 length: 6
assignment: ~ assignment: ~
assignments: ~ assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in - in
- items - items
span: span:
start: 0 start: 3
length: 17 length: 17
tag_span: tag_span:
start: 0 start: 3
length: 17 length: 17
assignment: ~ assignment: ~
nodes: nodes:

View file

@ -11,10 +11,10 @@ nodes:
- if - if
- user.is_authenticated - user.is_authenticated
span: span:
start: 0 start: 3
length: 24 length: 24
tag_span: tag_span:
start: 0 start: 3
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:

View file

@ -101,17 +101,17 @@ nodes:
- if - if
- user.is_authenticated - user.is_authenticated
span: span:
start: 463 start: 466
length: 24 length: 24
tag_span: tag_span:
start: 463 start: 466
length: 24 length: 24
assignment: ~ assignment: ~
nodes: nodes:
- Comment: - Comment:
content: Welcome message content: Welcome message
span: span:
start: 510 start: 512
length: 15 length: 15
- Text: - Text:
content: "<h1>Welcome," content: "<h1>Welcome,"
@ -126,14 +126,14 @@ nodes:
- name: title - name: title
args: [] args: []
span: span:
start: 565 start: 571
length: 31 length: 5
- name: default - name: default
args: args:
- "'Guest'" - "'Guest'"
span: span:
start: 565 start: 577
length: 31 length: 15
span: span:
start: 564 start: 564
length: 31 length: 31
@ -150,10 +150,10 @@ nodes:
- if - if
- user.is_staff - user.is_staff
span: span:
start: 621 start: 624
length: 16 length: 16
tag_span: tag_span:
start: 621 start: 624
length: 16 length: 16
assignment: ~ assignment: ~
nodes: nodes:
@ -169,10 +169,10 @@ nodes:
bits: bits:
- else - else
span: span:
start: 699 start: 702
length: 4 length: 4
tag_span: tag_span:
start: 699 start: 702
length: 4 length: 4
assignment: ~ assignment: ~
nodes: nodes:
@ -188,10 +188,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 764 start: 767
length: 5 length: 5
tag_span: tag_span:
start: 764 start: 767
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~
@ -202,10 +202,10 @@ nodes:
bits: bits:
- endif - endif
span: span:
start: 788 start: 791
length: 5 length: 5
tag_span: tag_span:
start: 788 start: 791
length: 5 length: 5
assignment: ~ assignment: ~
assignments: ~ assignments: ~