add new token for /> and tests (#27)

This commit is contained in:
Josh Thomas 2024-10-22 18:21:35 -05:00 committed by GitHub
parent d76c4f8cc3
commit ff3de19aa5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 154 additions and 5 deletions

View file

@ -126,6 +126,7 @@ impl Lexer {
fn left_angle(&self) -> Result<TokenType, LexerError> {
let token_type = match (self.peek_next()?, self.peek_at(2)?, self.peek_at(3)?) {
('=', _, _) => TokenType::LeftAngleEqual,
('/', _, _) => TokenType::LeftAngleSlash,
('!', '-', '-') => TokenType::LeftAngleBangDashDash,
(c, _, _) if c.is_whitespace() || c.is_alphabetic() || c == '\0' => {
TokenType::LeftAngle
@ -191,11 +192,17 @@ impl Lexer {
.take_while(|&c| c.is_whitespace() && c != '\0')
.map(|c| c.len_utf8())
.sum(),
TokenType::Text => remaining_source
.chars()
.take_while(|&c| !c.is_whitespace() && c != '\0')
.map(|c| c.len_utf8())
.sum(),
TokenType::Text => {
const TOKEN_BOUNDARIES: &[char] = &['>', '=', '\'', '"'];
remaining_source
.chars()
.take_while(|&c| {
!c.is_whitespace() && c != '\0' && !TOKEN_BOUNDARIES.contains(&c)
})
.map(|c| c.len_utf8())
.sum()
}
_ => return Err(LexerError::UnexpectedTokenType(token_type)),
},
};
@ -253,6 +260,125 @@ impl Lexer {
mod tests {
use super::*;
#[test]
fn test_tokenize() {
let test_cases = vec![
(
"<html>",
vec![
TokenType::LeftAngle,
TokenType::Text,
TokenType::RightAngle,
TokenType::Eof,
],
),
(
"</body>",
vec![
TokenType::LeftAngleSlash,
TokenType::Text,
TokenType::RightAngle,
TokenType::Eof,
],
),
(
"<img src='https://example.com/image.jpg' />",
vec![
TokenType::LeftAngle,
TokenType::Text,
TokenType::Text,
TokenType::Equal,
TokenType::SingleQuote,
TokenType::Text,
TokenType::SingleQuote,
TokenType::SlashRightAngle,
TokenType::Eof,
],
),
(
"{{ variable }}",
vec![
TokenType::DoubleLeftBrace,
TokenType::Text,
TokenType::DoubleRightBrace,
TokenType::Eof,
],
),
(
"{% if condition %}",
vec![
TokenType::LeftBracePercent,
TokenType::Text,
TokenType::Text,
TokenType::PercentRightBrace,
TokenType::Eof,
],
),
(
"{# A comment #}",
vec![
TokenType::LeftBraceHash,
TokenType::Text,
TokenType::Text,
TokenType::HashRightBrace,
TokenType::Eof,
],
),
(
"{{ value|default:'default' }}",
vec![
TokenType::DoubleLeftBrace,
TokenType::Text,
TokenType::SingleQuote,
TokenType::Text,
TokenType::SingleQuote,
TokenType::DoubleRightBrace,
TokenType::Eof,
],
),
(
r#"'{% url "api:index" %}'"#,
vec![
TokenType::SingleQuote,
TokenType::LeftBracePercent,
TokenType::Text,
TokenType::DoubleQuote,
TokenType::Text,
TokenType::DoubleQuote,
TokenType::PercentRightBrace,
TokenType::SingleQuote,
TokenType::Eof,
],
),
];
for (input, expected_token_types) in test_cases {
println!("Testing input: {:?}", input);
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize().unwrap();
println!("tokens: {:?}", tokens);
assert_eq!(
tokens.len(),
expected_token_types.len(),
"Number of tokens doesn't match for input: {}",
input
);
for (token, expected_type) in tokens.iter().zip(expected_token_types.iter()) {
assert_eq!(
token.token_type, *expected_type,
"Token type mismatch for input: {}",
input
);
}
println!("---");
}
}
#[test]
fn test_token_from_source() {
let line = 1;

View file

@ -1,6 +1,8 @@
use crate::error::TokenError;
use std::fmt;
use std::fmt::Debug;
use std::ops::{Deref, DerefMut};
use std::string::ToString;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TokenType {
@ -30,6 +32,7 @@ pub enum TokenType {
RightAngleEqual, // =>
LeftAngleBangDashDash, // <!--
DashDashRightAngle, // -->
LeftAngleSlash, // </
SlashRightAngle, // />
DoubleSlash, // //
SlashStar, // /*
@ -67,6 +70,7 @@ impl TokenType {
| TokenType::DoubleEqual
| TokenType::LeftAngleEqual
| TokenType::RightAngleEqual
| TokenType::LeftAngleSlash
| TokenType::SlashRightAngle
| TokenType::DoubleSlash
| TokenType::SlashStar
@ -86,6 +90,12 @@ pub struct Token {
pub line: usize,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.lexeme)
}
}
impl<'a> Token {
pub fn new(token_type: TokenType, lexeme: &'a str, line: usize) -> Self {
Token {
@ -127,6 +137,19 @@ impl<'a> Token {
}
}
pub trait TokenVecToString {
fn to_string(&self) -> String;
}
impl TokenVecToString for Vec<Token> {
fn to_string(&self) -> String {
self.iter()
.map(|token| token.to_string())
.collect::<Vec<String>>()
.join(" ")
}
}
#[derive(Clone, Debug)]
pub struct TokenStream {
tokens: Vec<Token>,