From ff3de19aa52b3c86cb6d7abf7d2a87aee5296c15 Mon Sep 17 00:00:00 2001 From: Josh Thomas Date: Tue, 22 Oct 2024 18:21:35 -0500 Subject: [PATCH] add new token for /> and tests (#27) --- src/lexer.rs | 136 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/token.rs | 23 +++++++++ 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index a77ab88..68050d5 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -126,6 +126,7 @@ impl Lexer { fn left_angle(&self) -> Result { let token_type = match (self.peek_next()?, self.peek_at(2)?, self.peek_at(3)?) { ('=', _, _) => TokenType::LeftAngleEqual, + ('/', _, _) => TokenType::LeftAngleSlash, ('!', '-', '-') => TokenType::LeftAngleBangDashDash, (c, _, _) if c.is_whitespace() || c.is_alphabetic() || c == '\0' => { TokenType::LeftAngle @@ -191,11 +192,17 @@ impl Lexer { .take_while(|&c| c.is_whitespace() && c != '\0') .map(|c| c.len_utf8()) .sum(), - TokenType::Text => remaining_source - .chars() - .take_while(|&c| !c.is_whitespace() && c != '\0') - .map(|c| c.len_utf8()) - .sum(), + TokenType::Text => { + const TOKEN_BOUNDARIES: &[char] = &['>', '=', '\'', '"']; + + remaining_source + .chars() + .take_while(|&c| { + !c.is_whitespace() && c != '\0' && !TOKEN_BOUNDARIES.contains(&c) + }) + .map(|c| c.len_utf8()) + .sum() + } _ => return Err(LexerError::UnexpectedTokenType(token_type)), }, }; @@ -253,6 +260,125 @@ impl Lexer { mod tests { use super::*; + #[test] + fn test_tokenize() { + let test_cases = vec![ + ( + "", + vec![ + TokenType::LeftAngle, + TokenType::Text, + TokenType::RightAngle, + TokenType::Eof, + ], + ), + ( + "", + vec![ + TokenType::LeftAngleSlash, + TokenType::Text, + TokenType::RightAngle, + TokenType::Eof, + ], + ), + ( + "", + vec![ + TokenType::LeftAngle, + TokenType::Text, + TokenType::Text, + TokenType::Equal, + TokenType::SingleQuote, + TokenType::Text, + TokenType::SingleQuote, + TokenType::SlashRightAngle, + TokenType::Eof, + ], + ), + ( + "{{ variable }}", + vec![ + TokenType::DoubleLeftBrace, + TokenType::Text, + TokenType::DoubleRightBrace, + TokenType::Eof, + ], + ), + ( + "{% if condition %}", + vec![ + TokenType::LeftBracePercent, + TokenType::Text, + TokenType::Text, + TokenType::PercentRightBrace, + TokenType::Eof, + ], + ), + ( + "{# A comment #}", + vec![ + TokenType::LeftBraceHash, + TokenType::Text, + TokenType::Text, + TokenType::HashRightBrace, + TokenType::Eof, + ], + ), + ( + "{{ value|default:'default' }}", + vec![ + TokenType::DoubleLeftBrace, + TokenType::Text, + TokenType::SingleQuote, + TokenType::Text, + TokenType::SingleQuote, + TokenType::DoubleRightBrace, + TokenType::Eof, + ], + ), + ( + r#"'{% url "api:index" %}'"#, + vec![ + TokenType::SingleQuote, + TokenType::LeftBracePercent, + TokenType::Text, + TokenType::DoubleQuote, + TokenType::Text, + TokenType::DoubleQuote, + TokenType::PercentRightBrace, + TokenType::SingleQuote, + TokenType::Eof, + ], + ), + ]; + + for (input, expected_token_types) in test_cases { + println!("Testing input: {:?}", input); + + let mut lexer = Lexer::new(input); + let tokens = lexer.tokenize().unwrap(); + + println!("tokens: {:?}", tokens); + + assert_eq!( + tokens.len(), + expected_token_types.len(), + "Number of tokens doesn't match for input: {}", + input + ); + + for (token, expected_type) in tokens.iter().zip(expected_token_types.iter()) { + assert_eq!( + token.token_type, *expected_type, + "Token type mismatch for input: {}", + input + ); + } + + println!("---"); + } + } + #[test] fn test_token_from_source() { let line = 1; diff --git a/src/token.rs b/src/token.rs index ae8fb86..5d269d0 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,6 +1,8 @@ use crate::error::TokenError; +use std::fmt; use std::fmt::Debug; use std::ops::{Deref, DerefMut}; +use std::string::ToString; #[derive(Debug, Clone, Copy, PartialEq)] pub enum TokenType { @@ -30,6 +32,7 @@ pub enum TokenType { RightAngleEqual, // => LeftAngleBangDashDash, // + LeftAngleSlash, // DoubleSlash, // // SlashStar, // /* @@ -67,6 +70,7 @@ impl TokenType { | TokenType::DoubleEqual | TokenType::LeftAngleEqual | TokenType::RightAngleEqual + | TokenType::LeftAngleSlash | TokenType::SlashRightAngle | TokenType::DoubleSlash | TokenType::SlashStar @@ -86,6 +90,12 @@ pub struct Token { pub line: usize, } +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.lexeme) + } +} + impl<'a> Token { pub fn new(token_type: TokenType, lexeme: &'a str, line: usize) -> Self { Token { @@ -127,6 +137,19 @@ impl<'a> Token { } } +pub trait TokenVecToString { + fn to_string(&self) -> String; +} + +impl TokenVecToString for Vec { + fn to_string(&self) -> String { + self.iter() + .map(|token| token.to_string()) + .collect::>() + .join(" ") + } +} + #[derive(Clone, Debug)] pub struct TokenStream { tokens: Vec,