This commit is contained in:
Josh Thomas 2025-01-06 16:06:47 -06:00
parent b10df9b8ca
commit a078faabf7
14 changed files with 214 additions and 218 deletions

View file

@ -97,7 +97,7 @@ impl From<Token> for Span {
let token_start = token.start().unwrap_or(0);
match token.token_type() {
TokenType::Comment(_, start, _) => token_start + start.len() as u32,
TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) => token_start + 2,
TokenType::DjangoBlock(_) | TokenType::DjangoVariable(_) => token_start + 3,
_ => token_start,
}
};

View file

@ -22,140 +22,136 @@ impl Lexer {
pub fn tokenize(&mut self) -> Result<TokenStream, LexerError> {
let mut tokens = TokenStream::default();
while !self.is_at_end() {
let token = self.next_token()?;
self.start = self.current;
let token_type = match self.peek()? {
'{' => match self.peek_next()? {
'%' => {
self.consume_n(2)?; // {%
let content = self.consume_until("%}")?;
self.consume_n(2)?; // %}
TokenType::DjangoBlock(content)
}
'{' => {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
let content = self.consume_until("#}")?;
self.consume_n(2)?; // #}
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
tokens.add_token(token);
}
tokens.finalize(self.line);
Ok(tokens)
}
fn next_token(&mut self) -> Result<Token, LexerError> {
self.start = self.current;
let token_type = match self.peek()? {
'{' => match self.peek_next()? {
'%' => {
self.consume_n(2)?; // {%
let content = self.consume_until("%}")?;
self.consume_n(2)?; // %}
TokenType::DjangoBlock(content)
}
'{' => {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
let content = self.consume_until("#}")?;
self.consume_n(2)?; // #}
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
Ok(token)
}
fn peek(&self) -> Result<char, LexerError> {
self.peek_at(0)
}

View file

@ -11,7 +11,7 @@ nodes:
- Comment:
content: Django comment
span:
start: 21
start: 23
length: 14
line_offsets:
- 0

View file

@ -13,10 +13,10 @@ nodes:
- ">"
- "0"
span:
start: 0
start: 3
length: 8
tag_span:
start: 0
start: 3
length: 8
assignment: ~
nodes:
@ -35,10 +35,10 @@ nodes:
- "<"
- "0"
span:
start: 22
start: 25
length: 10
tag_span:
start: 22
start: 25
length: 10
assignment: ~
nodes:
@ -54,10 +54,10 @@ nodes:
bits:
- else
span:
start: 46
start: 49
length: 4
tag_span:
start: 46
start: 49
length: 4
assignment: ~
nodes:
@ -73,10 +73,10 @@ nodes:
bits:
- endif
span:
start: 60
start: 63
length: 5
tag_span:
start: 60
start: 63
length: 5
assignment: ~
assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in
- items
span:
start: 0
start: 3
length: 17
tag_span:
start: 0
start: 3
length: 17
assignment: ~
nodes:
@ -34,10 +34,10 @@ nodes:
bits:
- empty
span:
start: 33
start: 36
length: 5
tag_span:
start: 33
start: 36
length: 5
assignment: ~
nodes:
@ -53,10 +53,10 @@ nodes:
bits:
- endfor
span:
start: 52
start: 55
length: 6
tag_span:
start: 52
start: 55
length: 6
assignment: ~
assignments: ~

View file

@ -11,10 +11,10 @@ nodes:
- if
- user.is_authenticated
span:
start: 0
start: 3
length: 24
tag_span:
start: 0
start: 3
length: 24
assignment: ~
nodes:
@ -30,10 +30,10 @@ nodes:
bits:
- endif
span:
start: 37
start: 40
length: 5
tag_span:
start: 37
start: 40
length: 5
assignment: ~
assignments: ~

View file

@ -11,8 +11,8 @@ nodes:
- name: title
args: []
span:
start: 4
length: 15
start: 10
length: 5
span:
start: 3
length: 15

View file

@ -11,18 +11,18 @@ nodes:
args:
- "'nothing'"
span:
start: 4
length: 35
start: 6
length: 17
- name: title
args: []
span:
start: 4
length: 35
start: 24
length: 5
- name: upper
args: []
span:
start: 4
length: 35
start: 30
length: 5
span:
start: 3
length: 35

View file

@ -16,10 +16,10 @@ nodes:
- if
- user.is_authenticated
span:
start: 9
start: 12
length: 24
tag_span:
start: 9
start: 12
length: 24
assignment: ~
nodes:
@ -31,14 +31,14 @@ nodes:
- name: title
args: []
span:
start: 48
length: 31
start: 54
length: 5
- name: default
args:
- "'Guest'"
span:
start: 48
length: 31
start: 60
length: 15
span:
start: 47
length: 31
@ -52,10 +52,10 @@ nodes:
- in
- user.groups
span:
start: 86
start: 89
length: 24
tag_span:
start: 86
start: 89
length: 24
assignment: ~
nodes:
@ -67,10 +67,10 @@ nodes:
- if
- forloop.first
span:
start: 125
start: 128
length: 16
tag_span:
start: 125
start: 128
length: 16
assignment: ~
nodes:
@ -86,10 +86,10 @@ nodes:
bits:
- endif
span:
start: 148
start: 151
length: 5
tag_span:
start: 148
start: 151
length: 5
assignment: ~
assignments: ~
@ -110,10 +110,10 @@ nodes:
- not
- forloop.last
span:
start: 193
start: 196
length: 19
tag_span:
start: 193
start: 196
length: 19
assignment: ~
nodes:
@ -129,10 +129,10 @@ nodes:
bits:
- endif
span:
start: 220
start: 223
length: 5
tag_span:
start: 220
start: 223
length: 5
assignment: ~
assignments: ~
@ -144,10 +144,10 @@ nodes:
- if
- forloop.last
span:
start: 240
start: 243
length: 15
tag_span:
start: 240
start: 243
length: 15
assignment: ~
nodes:
@ -163,10 +163,10 @@ nodes:
bits:
- endif
span:
start: 262
start: 265
length: 5
tag_span:
start: 262
start: 265
length: 5
assignment: ~
assignments: ~
@ -177,10 +177,10 @@ nodes:
bits:
- empty
span:
start: 278
start: 281
length: 5
tag_span:
start: 278
start: 281
length: 5
assignment: ~
nodes:
@ -196,10 +196,10 @@ nodes:
bits:
- endfor
span:
start: 314
start: 317
length: 6
tag_span:
start: 314
start: 317
length: 6
assignment: ~
assignments: ~
@ -210,10 +210,10 @@ nodes:
bits:
- else
span:
start: 327
start: 330
length: 4
tag_span:
start: 327
start: 330
length: 4
assignment: ~
nodes:
@ -229,10 +229,10 @@ nodes:
bits:
- endif
span:
start: 348
start: 351
length: 5
tag_span:
start: 348
start: 351
length: 5
assignment: ~
assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in
- items
span:
start: 0
start: 3
length: 17
tag_span:
start: 0
start: 3
length: 17
assignment: ~
nodes:
@ -28,10 +28,10 @@ nodes:
- if
- item.active
span:
start: 23
start: 26
length: 14
tag_span:
start: 23
start: 26
length: 14
assignment: ~
nodes:
@ -50,10 +50,10 @@ nodes:
bits:
- endif
span:
start: 58
start: 61
length: 5
tag_span:
start: 58
start: 61
length: 5
assignment: ~
assignments: ~
@ -64,10 +64,10 @@ nodes:
bits:
- endfor
span:
start: 69
start: 72
length: 6
tag_span:
start: 69
start: 72
length: 6
assignment: ~
assignments: ~

View file

@ -21,17 +21,17 @@ nodes:
- if
- user.is_authenticated
span:
start: 48
start: 51
length: 24
tag_span:
start: 48
start: 51
length: 24
assignment: ~
nodes:
- Comment:
content: This if is unclosed which does matter
span:
start: 87
start: 89
length: 37
- Text:
content: "<p>Welcome"
@ -59,7 +59,7 @@ nodes:
- Comment:
content: "This div is unclosed which doesn't matter"
span:
start: 196
start: 198
length: 41
- Block:
Block:
@ -71,10 +71,10 @@ nodes:
- in
- items
span:
start: 252
start: 255
length: 17
tag_span:
start: 252
start: 255
length: 17
assignment: ~
nodes:
@ -102,10 +102,10 @@ nodes:
bits:
- endfor
span:
start: 320
start: 323
length: 6
tag_span:
start: 320
start: 323
length: 6
assignment: ~
assignments: ~

View file

@ -13,10 +13,10 @@ nodes:
- in
- items
span:
start: 0
start: 3
length: 17
tag_span:
start: 0
start: 3
length: 17
assignment: ~
nodes:

View file

@ -11,10 +11,10 @@ nodes:
- if
- user.is_authenticated
span:
start: 0
start: 3
length: 24
tag_span:
start: 0
start: 3
length: 24
assignment: ~
nodes:

View file

@ -101,17 +101,17 @@ nodes:
- if
- user.is_authenticated
span:
start: 463
start: 466
length: 24
tag_span:
start: 463
start: 466
length: 24
assignment: ~
nodes:
- Comment:
content: Welcome message
span:
start: 510
start: 512
length: 15
- Text:
content: "<h1>Welcome,"
@ -126,14 +126,14 @@ nodes:
- name: title
args: []
span:
start: 565
length: 31
start: 571
length: 5
- name: default
args:
- "'Guest'"
span:
start: 565
length: 31
start: 577
length: 15
span:
start: 564
length: 31
@ -150,10 +150,10 @@ nodes:
- if
- user.is_staff
span:
start: 621
start: 624
length: 16
tag_span:
start: 621
start: 624
length: 16
assignment: ~
nodes:
@ -169,10 +169,10 @@ nodes:
bits:
- else
span:
start: 699
start: 702
length: 4
tag_span:
start: 699
start: 702
length: 4
assignment: ~
nodes:
@ -188,10 +188,10 @@ nodes:
bits:
- endif
span:
start: 764
start: 767
length: 5
tag_span:
start: 764
start: 767
length: 5
assignment: ~
assignments: ~
@ -202,10 +202,10 @@ nodes:
bits:
- endif
span:
start: 788
start: 791
length: 5
tag_span:
start: 788
start: 791
length: 5
assignment: ~
assignments: ~