mirror of
https://github.com/joshuadavidthomas/django-template-ast.git
synced 2025-09-09 02:00:32 +00:00
add a few more Django and HTML specific tokens (#23)
This commit is contained in:
parent
f00192a8b7
commit
b9d61b4478
2 changed files with 104 additions and 43 deletions
74
src/lexer.rs
74
src/lexer.rs
|
@ -19,8 +19,9 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
fn match_token_type(&mut self, c: char) -> Result<TokenType, LexerError> {
|
||||
match c {
|
||||
'(' | ')' | '[' | ']' | ',' | '.' | '-' | '+' | ':' | ';' | '/' | '*' | '|' | '\''
|
||||
| '"' => self.single_char(c),
|
||||
'(' | ')' | '[' | ']' | ',' | '-' | '+' | ':' | ';' | '*' | '|' | '\'' | '"' => {
|
||||
self.single_char(c)
|
||||
}
|
||||
'{' => self.left_brace(),
|
||||
'}' => self.right_brace(),
|
||||
'%' => self.percent(),
|
||||
|
@ -29,6 +30,8 @@ impl<'a> Lexer<'a> {
|
|||
'=' => self.equal(),
|
||||
'<' => self.left_angle(),
|
||||
'>' => self.right_angle(),
|
||||
'/' => self.slash(),
|
||||
'.' => self.dot(),
|
||||
' ' | '\r' | '\t' | '\n' => self.whitespace(c),
|
||||
_ => self.text(),
|
||||
}
|
||||
|
@ -41,12 +44,10 @@ impl<'a> Lexer<'a> {
|
|||
'[' => TokenType::LeftBracket,
|
||||
']' => TokenType::RightBracket,
|
||||
',' => TokenType::Comma,
|
||||
'.' => TokenType::Dot,
|
||||
'-' => TokenType::Minus,
|
||||
'+' => TokenType::Plus,
|
||||
':' => TokenType::Colon,
|
||||
';' => TokenType::Semicolon,
|
||||
'/' => TokenType::Slash,
|
||||
'*' => TokenType::Star,
|
||||
'|' => TokenType::Pipe,
|
||||
'\'' => TokenType::SingleQuote,
|
||||
|
@ -122,9 +123,20 @@ impl<'a> Lexer<'a> {
|
|||
fn left_angle(&mut self) -> Result<TokenType, LexerError> {
|
||||
let token_type = if self.advance_if_matches('=')? {
|
||||
TokenType::LeftAngleEqual
|
||||
} else if self.advance_if_matches('!')? {
|
||||
let start_pos = self.state.current;
|
||||
self.advance_while(|c| c == '-')?;
|
||||
|
||||
if self.state.current - start_pos >= 2 {
|
||||
TokenType::LeftAngleBangMinusMinus
|
||||
} else {
|
||||
self.state.current = start_pos;
|
||||
TokenType::LeftAngle
|
||||
}
|
||||
} else {
|
||||
TokenType::LeftAngle
|
||||
};
|
||||
|
||||
Ok(token_type)
|
||||
}
|
||||
|
||||
|
@ -137,6 +149,26 @@ impl<'a> Lexer<'a> {
|
|||
Ok(token_type)
|
||||
}
|
||||
|
||||
fn slash(&mut self) -> Result<TokenType, LexerError> {
|
||||
let token_type = if self.advance_if_matches('>')? {
|
||||
TokenType::SlashRightAngle
|
||||
} else if self.advance_if_matches('/')? {
|
||||
TokenType::DoubleSlash
|
||||
} else {
|
||||
TokenType::Slash
|
||||
};
|
||||
Ok(token_type)
|
||||
}
|
||||
|
||||
fn dot(&mut self) -> Result<TokenType, LexerError> {
|
||||
let token_type = if self.advance_if_matches('.')? {
|
||||
TokenType::DoubleDot
|
||||
} else {
|
||||
TokenType::Dot
|
||||
};
|
||||
Ok(token_type)
|
||||
}
|
||||
|
||||
fn whitespace(&mut self, mut c: char) -> Result<TokenType, LexerError> {
|
||||
while !self.is_at_end() && self.peek()?.is_whitespace() {
|
||||
match c {
|
||||
|
@ -298,12 +330,12 @@ mod tests {
|
|||
F: Fn(&mut Lexer, Option<char>) -> Result<TokenType, LexerError>,
|
||||
{
|
||||
for (input, expected) in test_cases {
|
||||
println!("Testing input: {:?}", input);
|
||||
let mut chars = input.chars();
|
||||
let first_char = chars.next().unwrap();
|
||||
let second_char = chars.next();
|
||||
let rest: String = chars.collect();
|
||||
|
||||
let source = second_char.map_or(String::new(), |c| c.to_string());
|
||||
let mut lexer = Lexer::new(&source);
|
||||
let mut lexer = Lexer::new(&rest);
|
||||
|
||||
match method(&mut lexer, Some(first_char)) {
|
||||
Ok(token_type) => assert_eq!(token_type, expected, "Input: {}", input),
|
||||
|
@ -351,13 +383,16 @@ mod tests {
|
|||
("==", TokenType::DoubleEqual),
|
||||
("<=", TokenType::LeftAngleEqual),
|
||||
(">=", TokenType::RightAngleEqual),
|
||||
("..", TokenType::DoubleDot),
|
||||
("<!--", TokenType::LeftAngleBangMinusMinus),
|
||||
("/>", TokenType::SlashRightAngle),
|
||||
("//", TokenType::DoubleSlash),
|
||||
(" ", TokenType::Whitespace),
|
||||
("\r", TokenType::Whitespace),
|
||||
("\t", TokenType::Whitespace),
|
||||
("\n", TokenType::Whitespace),
|
||||
(" ", TokenType::Whitespace),
|
||||
(" \n", TokenType::Whitespace),
|
||||
(" \r\n", TokenType::Whitespace),
|
||||
("a", TokenType::Text),
|
||||
("1", TokenType::Text),
|
||||
("Hello", TokenType::Text),
|
||||
|
@ -425,6 +460,10 @@ mod tests {
|
|||
let test_cases = vec![
|
||||
("<", TokenType::LeftAngle),
|
||||
("<=", TokenType::LeftAngleEqual),
|
||||
("<!--", TokenType::LeftAngleBangMinusMinus),
|
||||
("<!", TokenType::LeftAngle),
|
||||
("<!-", TokenType::LeftAngle),
|
||||
("<!---", TokenType::LeftAngleBangMinusMinus),
|
||||
];
|
||||
|
||||
assert_token_type(test_cases, |lexer, _| lexer.left_angle());
|
||||
|
@ -440,6 +479,24 @@ mod tests {
|
|||
assert_token_type(test_cases, |lexer, _| lexer.right_angle());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_slash() {
|
||||
let test_cases = vec![
|
||||
("/", TokenType::Slash),
|
||||
("/>", TokenType::SlashRightAngle),
|
||||
("//", TokenType::DoubleSlash),
|
||||
];
|
||||
|
||||
assert_token_type(test_cases, |lexer, _| lexer.slash());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dot() {
|
||||
let test_cases = vec![(".", TokenType::Dot), ("..", TokenType::DoubleDot)];
|
||||
|
||||
assert_token_type(test_cases, |lexer, _| lexer.dot());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace() {
|
||||
let test_cases = vec![
|
||||
|
@ -449,7 +506,6 @@ mod tests {
|
|||
("\n", TokenType::Whitespace),
|
||||
(" ", TokenType::Whitespace),
|
||||
(" \n", TokenType::Whitespace),
|
||||
(" \r\n", TokenType::Whitespace),
|
||||
];
|
||||
|
||||
assert_token_type(test_cases, |lexer, c| lexer.whitespace(c.unwrap()));
|
||||
|
|
73
src/token.rs
73
src/token.rs
|
@ -3,40 +3,45 @@ use std::fmt::Debug;
|
|||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TokenType {
|
||||
LeftParen, // (
|
||||
RightParen, // )
|
||||
LeftBrace, // {
|
||||
RightBrace, // }
|
||||
LeftBracket, // [
|
||||
RightBracket, // ]
|
||||
LeftAngle, // <
|
||||
RightAngle, // >
|
||||
Comma, // ,
|
||||
Dot, // .
|
||||
Minus, // -
|
||||
Plus, // +
|
||||
Colon, // :
|
||||
Semicolon, // ;
|
||||
Slash, // /
|
||||
Star, // *
|
||||
Bang, // !
|
||||
Equal, // =
|
||||
Pipe, // |
|
||||
Percent, // %
|
||||
Hash, // #
|
||||
SingleQuote, // '
|
||||
DoubleQuote, // "
|
||||
DoubleLeftBrace, // {{
|
||||
DoubleRightBrace, // }}
|
||||
LeftBracePercent, // {%
|
||||
PercentRightBrace, // %}
|
||||
LeftBraceHash, // {#
|
||||
HashRightBrace, // #}
|
||||
BangEqual, // !=
|
||||
DoubleEqual, // ==
|
||||
LeftAngleEqual, // <=
|
||||
RightAngleEqual, // =>
|
||||
Whitespace, // special token to account for whitespace
|
||||
LeftParen, // (
|
||||
RightParen, // )
|
||||
LeftBrace, // {
|
||||
RightBrace, // }
|
||||
LeftBracket, // [
|
||||
RightBracket, // ]
|
||||
LeftAngle, // <
|
||||
RightAngle, // >
|
||||
Comma, // ,
|
||||
Dot, // .
|
||||
Minus, // -
|
||||
Plus, // +
|
||||
Colon, // :
|
||||
Semicolon, // ;
|
||||
Slash, // /
|
||||
Star, // *
|
||||
Bang, // !
|
||||
Equal, // =
|
||||
Pipe, // |
|
||||
Percent, // %
|
||||
Hash, // #
|
||||
SingleQuote, // '
|
||||
DoubleQuote, // "
|
||||
DoubleLeftBrace, // {{
|
||||
DoubleRightBrace, // }}
|
||||
LeftBracePercent, // {%
|
||||
PercentRightBrace, // %}
|
||||
LeftBraceHash, // {#
|
||||
HashRightBrace, // #}
|
||||
BangEqual, // !=
|
||||
DoubleEqual, // ==
|
||||
LeftAngleEqual, // <=
|
||||
RightAngleEqual, // =>
|
||||
DoubleDot, // ..
|
||||
LeftAngleBangMinusMinus, // <!--
|
||||
MinusMinusRightAngle, // -->
|
||||
SlashRightAngle, // />
|
||||
DoubleSlash, // //
|
||||
Whitespace, // special token to account for whitespace
|
||||
Text,
|
||||
Eof,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue