revert changes to lexer

This commit is contained in:
Josh Thomas 2025-01-06 09:36:29 -06:00
parent 725a603483
commit bc54814229
17 changed files with 439 additions and 468 deletions

View file

@ -45,7 +45,7 @@ impl Lexer {
self.consume_n(2)?; // {{
let content = self.consume_until("}}")?;
self.consume_n(2)?; // }}
TokenType::DjangoVariable(content.trim().to_string())
TokenType::DjangoVariable(content)
}
'#' => {
self.consume_n(2)?; // {#
@ -54,42 +54,106 @@ impl Lexer {
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("{".to_string())
self.consume()?; // {
TokenType::Text(String::from("{"))
}
},
'\n' => {
self.consume()?;
let token = TokenType::Newline;
self.line += 1;
token
}
' ' | '\t' | '\r' => {
let mut count = 1;
self.consume()?;
while let Ok(c) = self.peek() {
if c != ' ' && c != '\t' && c != '\r' {
break;
}
self.consume()?;
count += 1;
'<' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // </
let tag = self.consume_until(">")?;
self.consume()?; // >
TokenType::HtmlTagClose(tag)
}
'!' if self.matches("<!--")? => {
self.consume_n(4)?; // <!--
let content = self.consume_until("-->")?;
self.consume_n(3)?; // -->
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
}
_ => {
self.consume()?; // consume <
let tag = self.consume_until(">")?;
self.consume()?; // consume >
if tag.starts_with("script") {
TokenType::ScriptTagOpen(tag)
} else if tag.starts_with("style") {
TokenType::StyleTagOpen(tag)
} else if tag.ends_with("/") {
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
} else {
TokenType::HtmlTagOpen(tag)
}
}
},
'/' => match self.peek_next()? {
'/' => {
self.consume_n(2)?; // //
let content = self.consume_until("\n")?;
TokenType::Comment(content, "//".to_string(), None)
}
'*' => {
self.consume_n(2)?; // /*
let content = self.consume_until("*/")?;
self.consume_n(2)?; // */
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
}
_ => {
self.consume()?;
TokenType::Text("/".to_string())
}
},
c if c.is_whitespace() => {
if c == '\n' || c == '\r' {
self.consume()?; // \r or \n
if c == '\r' && self.peek()? == '\n' {
self.consume()?; // \n of \r\n
}
TokenType::Newline
} else {
self.consume()?; // Consume the first whitespace
while !self.is_at_end() && self.peek()?.is_whitespace() {
if self.peek()? == '\n' || self.peek()? == '\r' {
break;
}
self.consume()?;
}
let whitespace_count = self.current - self.start;
TokenType::Whitespace(whitespace_count)
}
TokenType::Whitespace(count)
}
_ => {
let mut text = String::new();
while !self.is_at_end() {
let c = self.peek()?;
if c == '{' || c == '\n' || c == ' ' || c == '\t' || c == '\r' {
if c == '{' || c == '<' || c == '\n' {
break;
}
text.push(self.consume()?);
text.push(c);
self.consume()?;
}
TokenType::Text(text)
}
};
Ok(Token::new(token_type, self.line, Some(self.start)))
let token = Token::new(token_type, self.line, Some(self.start));
match self.peek_previous()? {
'\n' => self.line += 1,
'\r' => {
self.line += 1;
if self.peek()? == '\n' {
self.current += 1;
}
}
_ => {}
}
Ok(token)
}
fn peek(&self) -> Result<char, LexerError> {
@ -246,7 +310,15 @@ mod tests {
#[test]
fn test_tokenize_comments() {
let source = r#"<!-- HTML comment -->
{# Django comment #}"#;
{# Django comment #}
<script>
// JS single line comment
/* JS multi-line
comment */
</script>
<style>
/* CSS comment */
</style>"#;
let mut lexer = Lexer::new(source);
let tokens = lexer.tokenize().unwrap();
insta::assert_yaml_snapshot!(tokens);
@ -285,7 +357,7 @@ mod tests {
assert!(Lexer::new("{{ user.name").tokenize().is_err()); // No closing }}
assert!(Lexer::new("{% if").tokenize().is_err()); // No closing %}
assert!(Lexer::new("{#").tokenize().is_err()); // No closing #}
assert!(Lexer::new("<div").tokenize().is_ok()); // No closing >, but HTML is treated as text
assert!(Lexer::new("<div").tokenize().is_err()); // No closing >
// Invalid characters or syntax within tokens
assert!(Lexer::new("{{}}").tokenize().is_ok()); // Empty but valid

View file

@ -3,35 +3,14 @@ source: crates/djls-template-ast/src/lexer.rs
expression: tokens
---
- token_type:
Text: "<!--"
Comment:
- HTML comment
- "<!--"
- "-->"
line: 1
start: 0
- token_type:
Whitespace: 1
line: 1
start: 4
- token_type:
Text: HTML
line: 1
start: 5
- token_type:
Whitespace: 1
line: 1
start: 9
- token_type:
Text: comment
line: 1
start: 10
- token_type:
Whitespace: 1
line: 1
start: 17
- token_type:
Text: "-->"
line: 1
start: 18
- token_type: Newline
line: 2
line: 1
start: 21
- token_type:
Comment:
@ -40,6 +19,76 @@ expression: tokens
- "#}"
line: 2
start: 22
- token_type: Eof
- token_type: Newline
line: 2
start: 42
- token_type:
ScriptTagOpen: script
line: 3
start: 43
- token_type: Newline
line: 3
start: 51
- token_type:
Whitespace: 4
line: 4
start: 52
- token_type:
Comment:
- JS single line comment
- //
- ~
line: 4
start: 56
- token_type: Newline
line: 4
start: 81
- token_type:
Whitespace: 4
line: 5
start: 82
- token_type:
Comment:
- "JS multi-line\n comment"
- /*
- "*/"
line: 5
start: 86
- token_type: Newline
line: 5
start: 120
- token_type:
HtmlTagClose: script
line: 6
start: 121
- token_type: Newline
line: 6
start: 130
- token_type:
StyleTagOpen: style
line: 7
start: 131
- token_type: Newline
line: 7
start: 138
- token_type:
Whitespace: 4
line: 8
start: 139
- token_type:
Comment:
- CSS comment
- /*
- "*/"
line: 8
start: 143
- token_type: Newline
line: 8
start: 160
- token_type:
HtmlTagClose: style
line: 9
start: 161
- token_type: Eof
line: 9
start: ~

View file

@ -3,100 +3,59 @@ source: crates/djls-template-ast/src/lexer.rs
expression: tokens
---
- token_type:
Text: "<!DOCTYPE"
HtmlTagOpen: "!DOCTYPE html"
line: 1
start: 0
- token_type:
Whitespace: 1
line: 1
start: 9
- token_type:
Text: html>
line: 1
start: 10
- token_type: Newline
line: 2
line: 1
start: 15
- token_type:
Text: "<html>"
HtmlTagOpen: html
line: 2
start: 16
- token_type: Newline
line: 3
line: 2
start: 22
- token_type:
Text: "<head>"
HtmlTagOpen: head
line: 3
start: 23
- token_type: Newline
line: 4
line: 3
start: 29
- token_type:
Whitespace: 4
line: 4
start: 30
- token_type:
Text: "<style"
StyleTagOpen: "style type=\"text/css\""
line: 4
start: 34
- token_type:
Whitespace: 1
line: 4
start: 40
- token_type:
Text: "type=\"text/css\">"
line: 4
start: 41
- token_type: Newline
line: 5
line: 4
start: 57
- token_type:
Whitespace: 8
line: 5
start: 58
- token_type:
Text: /*
Comment:
- Style header
- /*
- "*/"
line: 5
start: 66
- token_type:
Whitespace: 1
line: 5
start: 68
- token_type:
Text: Style
line: 5
start: 69
- token_type:
Whitespace: 1
line: 5
start: 74
- token_type:
Text: header
line: 5
start: 75
- token_type:
Whitespace: 1
line: 5
start: 81
- token_type:
Text: "*/"
line: 5
start: 82
- token_type: Newline
line: 6
line: 5
start: 84
- token_type:
Whitespace: 8
line: 6
start: 85
- token_type:
Text: ".header"
Text: ".header "
line: 6
start: 93
- token_type:
Whitespace: 1
line: 6
start: 100
- token_type:
Text: "{"
line: 6
@ -106,165 +65,87 @@ expression: tokens
line: 6
start: 102
- token_type:
Text: "color:"
Text: "color: blue; }"
line: 6
start: 103
- token_type:
Whitespace: 1
line: 6
start: 109
- token_type:
Text: blue;
line: 6
start: 110
- token_type:
Whitespace: 1
line: 6
start: 115
- token_type:
Text: "}"
line: 6
start: 116
- token_type: Newline
line: 7
line: 6
start: 117
- token_type:
Whitespace: 4
line: 7
start: 118
- token_type:
Text: "</style>"
HtmlTagClose: style
line: 7
start: 122
- token_type: Newline
line: 8
line: 7
start: 130
- token_type:
Whitespace: 4
line: 8
start: 131
- token_type:
Text: "<script"
ScriptTagOpen: "script type=\"text/javascript\""
line: 8
start: 135
- token_type:
Whitespace: 1
line: 8
start: 142
- token_type:
Text: "type=\"text/javascript\">"
line: 8
start: 143
- token_type: Newline
line: 9
line: 8
start: 166
- token_type:
Whitespace: 8
line: 9
start: 167
- token_type:
Text: //
Comment:
- Init app
- //
- ~
line: 9
start: 175
- token_type:
Whitespace: 1
line: 9
start: 177
- token_type:
Text: Init
line: 9
start: 178
- token_type:
Whitespace: 1
line: 9
start: 182
- token_type:
Text: app
line: 9
start: 183
- token_type: Newline
line: 10
line: 9
start: 186
- token_type:
Whitespace: 8
line: 10
start: 187
- token_type:
Text: const
Text: "const app = "
line: 10
start: 195
- token_type:
Whitespace: 1
line: 10
start: 200
- token_type:
Text: app
line: 10
start: 201
- token_type:
Whitespace: 1
line: 10
start: 204
- token_type:
Text: "="
line: 10
start: 205
- token_type:
Whitespace: 1
line: 10
start: 206
- token_type:
Text: "{"
line: 10
start: 207
- token_type: Newline
line: 11
line: 10
start: 208
- token_type:
Whitespace: 12
line: 11
start: 209
- token_type:
Text: /*
Comment:
- Config
- /*
- "*/"
line: 11
start: 221
- token_type:
Whitespace: 1
line: 11
start: 223
- token_type:
Text: Config
line: 11
start: 224
- token_type:
Whitespace: 1
line: 11
start: 230
- token_type:
Text: "*/"
line: 11
start: 231
- token_type: Newline
line: 12
line: 11
start: 233
- token_type:
Whitespace: 12
line: 12
start: 234
- token_type:
Text: "debug:"
Text: "debug: true"
line: 12
start: 246
- token_type:
Whitespace: 1
line: 12
start: 252
- token_type:
Text: "true"
line: 12
start: 253
- token_type: Newline
line: 13
line: 12
start: 257
- token_type:
Whitespace: 8
@ -275,110 +156,57 @@ expression: tokens
line: 13
start: 266
- token_type: Newline
line: 14
line: 13
start: 268
- token_type:
Whitespace: 4
line: 14
start: 269
- token_type:
Text: "</script>"
HtmlTagClose: script
line: 14
start: 273
- token_type: Newline
line: 15
line: 14
start: 282
- token_type:
Text: "</head>"
HtmlTagClose: head
line: 15
start: 283
- token_type: Newline
line: 16
line: 15
start: 290
- token_type:
Text: "<body>"
HtmlTagOpen: body
line: 16
start: 291
- token_type: Newline
line: 17
line: 16
start: 297
- token_type:
Whitespace: 4
line: 17
start: 298
- token_type:
Text: "<!--"
Comment:
- Header section
- "<!--"
- "-->"
line: 17
start: 302
- token_type:
Whitespace: 1
line: 17
start: 306
- token_type:
Text: Header
line: 17
start: 307
- token_type:
Whitespace: 1
line: 17
start: 313
- token_type:
Text: section
line: 17
start: 314
- token_type:
Whitespace: 1
line: 17
start: 321
- token_type:
Text: "-->"
line: 17
start: 322
- token_type: Newline
line: 18
line: 17
start: 325
- token_type:
Whitespace: 4
line: 18
start: 326
- token_type:
Text: "<div"
HtmlTagOpen: "div class=\"header\" id=\"main\" data-value=\"123\" disabled"
line: 18
start: 330
- token_type:
Whitespace: 1
line: 18
start: 334
- token_type:
Text: "class=\"header\""
line: 18
start: 335
- token_type:
Whitespace: 1
line: 18
start: 349
- token_type:
Text: "id=\"main\""
line: 18
start: 350
- token_type:
Whitespace: 1
line: 18
start: 359
- token_type:
Text: "data-value=\"123\""
line: 18
start: 360
- token_type:
Whitespace: 1
line: 18
start: 376
- token_type:
Text: disabled>
line: 18
start: 377
- token_type: Newline
line: 19
line: 18
start: 386
- token_type:
Whitespace: 8
@ -389,7 +217,7 @@ expression: tokens
line: 19
start: 395
- token_type: Newline
line: 20
line: 19
start: 425
- token_type:
Whitespace: 12
@ -403,30 +231,34 @@ expression: tokens
line: 20
start: 438
- token_type: Newline
line: 21
line: 20
start: 459
- token_type:
Whitespace: 12
line: 21
start: 460
- token_type:
Text: "<h1>Welcome,"
HtmlTagOpen: h1
line: 21
start: 472
- token_type:
Whitespace: 1
Text: "Welcome, "
line: 21
start: 484
start: 476
- token_type:
DjangoVariable: "user.name|default:\"Guest\"|title"
line: 21
start: 485
- token_type:
Text: "!</h1>"
Text: "!"
line: 21
start: 522
- token_type:
HtmlTagClose: h1
line: 21
start: 523
- token_type: Newline
line: 22
line: 21
start: 528
- token_type:
Whitespace: 12
@ -437,18 +269,26 @@ expression: tokens
line: 22
start: 541
- token_type: Newline
line: 23
line: 22
start: 563
- token_type:
Whitespace: 16
line: 23
start: 564
- token_type:
Text: "<span>Admin</span>"
HtmlTagOpen: span
line: 23
start: 580
- token_type:
Text: Admin
line: 23
start: 586
- token_type:
HtmlTagClose: span
line: 23
start: 591
- token_type: Newline
line: 24
line: 23
start: 598
- token_type:
Whitespace: 12
@ -459,18 +299,26 @@ expression: tokens
line: 24
start: 611
- token_type: Newline
line: 25
line: 24
start: 621
- token_type:
Whitespace: 16
line: 25
start: 622
- token_type:
Text: "<span>User</span>"
HtmlTagOpen: span
line: 25
start: 638
- token_type:
Text: User
line: 25
start: 644
- token_type:
HtmlTagClose: span
line: 25
start: 648
- token_type: Newline
line: 26
line: 25
start: 655
- token_type:
Whitespace: 12
@ -481,7 +329,7 @@ expression: tokens
line: 26
start: 668
- token_type: Newline
line: 27
line: 26
start: 679
- token_type:
Whitespace: 8
@ -492,28 +340,28 @@ expression: tokens
line: 27
start: 688
- token_type: Newline
line: 28
line: 27
start: 699
- token_type:
Whitespace: 4
line: 28
start: 700
- token_type:
Text: "</div>"
HtmlTagClose: div
line: 28
start: 704
- token_type: Newline
line: 29
line: 28
start: 710
- token_type:
Text: "</body>"
HtmlTagClose: body
line: 29
start: 711
- token_type: Newline
line: 30
line: 29
start: 718
- token_type:
Text: "</html>"
HtmlTagClose: html
line: 30
start: 719
- token_type: Eof

View file

@ -3,33 +3,13 @@ source: crates/djls-template-ast/src/lexer.rs
expression: tokens
---
- token_type:
Text: "<div"
HtmlTagOpen: "div class=\"container\" id=\"main\" disabled"
line: 1
start: 0
- token_type:
Whitespace: 1
HtmlTagClose: div
line: 1
start: 4
- token_type:
Text: "class=\"container\""
line: 1
start: 5
- token_type:
Whitespace: 1
line: 1
start: 22
- token_type:
Text: "id=\"main\""
line: 1
start: 23
- token_type:
Whitespace: 1
line: 1
start: 32
- token_type:
Text: disabled></div>
line: 1
start: 33
start: 42
- token_type: Eof
line: 1
start: ~

View file

@ -3,143 +3,66 @@ source: crates/djls-template-ast/src/lexer.rs
expression: tokens
---
- token_type:
Text: "<script"
ScriptTagOpen: "script type=\"text/javascript\""
line: 1
start: 0
- token_type:
Whitespace: 1
line: 1
start: 7
- token_type:
Text: "type=\"text/javascript\">"
line: 1
start: 8
- token_type: Newline
line: 2
line: 1
start: 31
- token_type:
Whitespace: 4
line: 2
start: 32
- token_type:
Text: //
Comment:
- Single line comment
- //
- ~
line: 2
start: 36
- token_type:
Whitespace: 1
line: 2
start: 38
- token_type:
Text: Single
line: 2
start: 39
- token_type:
Whitespace: 1
line: 2
start: 45
- token_type:
Text: line
line: 2
start: 46
- token_type:
Whitespace: 1
line: 2
start: 50
- token_type:
Text: comment
line: 2
start: 51
- token_type: Newline
line: 3
line: 2
start: 58
- token_type:
Whitespace: 4
line: 3
start: 59
- token_type:
Text: const
Text: const x = 1;
line: 3
start: 63
- token_type:
Whitespace: 1
line: 3
start: 68
- token_type:
Text: x
line: 3
start: 69
- token_type:
Whitespace: 1
line: 3
start: 70
- token_type:
Text: "="
line: 3
start: 71
- token_type:
Whitespace: 1
line: 3
start: 72
- token_type:
Text: 1;
line: 3
start: 73
- token_type: Newline
line: 4
line: 3
start: 75
- token_type:
Whitespace: 4
line: 4
start: 76
- token_type:
Text: /*
Comment:
- "Multi-line\n comment"
- /*
- "*/"
line: 4
start: 80
- token_type:
Whitespace: 1
line: 4
start: 82
- token_type:
Text: Multi-line
line: 4
start: 83
- token_type: Newline
line: 5
start: 93
- token_type:
Whitespace: 7
line: 5
start: 94
- token_type:
Text: comment
line: 5
start: 101
- token_type:
Whitespace: 1
line: 5
start: 108
- token_type:
Text: "*/"
line: 5
start: 109
- token_type: Newline
line: 6
line: 4
start: 111
- token_type:
Whitespace: 4
line: 6
line: 5
start: 112
- token_type:
Text: console.log(x);
line: 6
line: 5
start: 116
- token_type: Newline
line: 7
line: 5
start: 131
- token_type:
Text: "</script>"
line: 7
HtmlTagClose: script
line: 6
start: 132
- token_type: Eof
line: 7
line: 6
start: ~

View file

@ -3,92 +3,51 @@ source: crates/djls-template-ast/src/lexer.rs
expression: tokens
---
- token_type:
Text: "<style"
StyleTagOpen: "style type=\"text/css\""
line: 1
start: 0
- token_type:
Whitespace: 1
line: 1
start: 6
- token_type:
Text: "type=\"text/css\">"
line: 1
start: 7
- token_type: Newline
line: 2
line: 1
start: 23
- token_type:
Whitespace: 4
line: 2
start: 24
- token_type:
Text: /*
Comment:
- Header styles
- /*
- "*/"
line: 2
start: 28
- token_type:
Whitespace: 1
line: 2
start: 30
- token_type:
Text: Header
line: 2
start: 31
- token_type:
Whitespace: 1
line: 2
start: 37
- token_type:
Text: styles
line: 2
start: 38
- token_type:
Whitespace: 1
line: 2
start: 44
- token_type:
Text: "*/"
line: 2
start: 45
- token_type: Newline
line: 3
line: 2
start: 47
- token_type:
Whitespace: 4
line: 3
start: 48
- token_type:
Text: ".header"
Text: ".header "
line: 3
start: 52
- token_type:
Whitespace: 1
line: 3
start: 59
- token_type:
Text: "{"
line: 3
start: 60
- token_type: Newline
line: 4
line: 3
start: 61
- token_type:
Whitespace: 8
line: 4
start: 62
- token_type:
Text: "color:"
Text: "color: blue;"
line: 4
start: 70
- token_type:
Whitespace: 1
line: 4
start: 76
- token_type:
Text: blue;
line: 4
start: 77
- token_type: Newline
line: 5
line: 4
start: 82
- token_type:
Whitespace: 4
@ -99,10 +58,10 @@ expression: tokens
line: 5
start: 87
- token_type: Newline
line: 6
line: 5
start: 88
- token_type:
Text: "</style>"
HtmlTagClose: style
line: 6
start: 89
- token_type: Eof

View file

@ -3,11 +3,11 @@ source: crates/djls-template-ast/src/parser.rs
expression: ast
---
nodes:
- Text:
content: "<!-- HTML comment -->"
- Comment:
content: HTML comment
span:
start: 0
length: 21
length: 19
- Comment:
content: Django comment
span:

View file

@ -0,0 +1,20 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 643
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0
- 24
- 44
- 79
- 131
- 170
- 184
- 244
- 276
- 312
- 333
- 366

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 581
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 614
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 623
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,38 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 688
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0
- 16
- 23
- 34
- 66
- 97
- 134
- 151
- 191
- 215
- 241
- 270
- 298
- 313
- 331
- 343
- 354
- 386
- 451
- 494
- 532
- 605
- 644
- 683
- 710
- 748
- 776
- 800
- 815
- 827

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 425
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 434
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,9 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 443
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0

View file

@ -0,0 +1,14 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 540
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0
- 32
- 59
- 76
- 113
- 133

View file

@ -0,0 +1,14 @@
---
source: crates/djls-template-ast/src/parser.rs
assertion_line: 557
expression: ast
snapshot_kind: text
---
nodes: []
line_offsets:
- 0
- 24
- 48
- 62
- 83
- 89