move token matching from lexer to token

2025-09-09 18:20:31 +00:00 · 2024-10-16 23:14:44 -05:00 · 2024-10-16 23:14:44 -05:00 · da5c34fd0f
commit da5c34fd0f
parent b4c7688f65
4 changed files with 603 additions and 597 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -2,16 +2,53 @@ use thiserror::Error;
 #[derive(Error, Debug)]
 pub enum LexerError {
-    #[error("Empty token at line {line:?}")]
+    #[error("empty token at line {line:?}")]
    EmptyToken { line: usize },
-    #[error("Unexpected character '{character}' at line {line}")]
+    #[error("unexpected character '{character}' at line {line}")]
    UnexpectedCharacter { character: char, line: usize },
-    #[error("Source is empty")]
+    #[error("source is empty")]
    EmptySource,
-    #[error("At beginning of source")]
+    #[error("at beginning of source")]
    AtBeginningOfSource,
-    #[error("At end of source")]
+    #[error("at end of source")]
    AtEndOfSource,
-    #[error("Invalid character access")]
+    #[error("invalid character access")]
    InvalidCharacterAccess,
    #[error(transparent)] // Display the inner TokenError directly
    TokenError(#[from] TokenError), // This automatically implements From<TokenError>
 }
 #[derive(Error, Debug)]
 pub enum TokenError {
    #[error("unexpected character '{character}'")]
    UnexpectedCharacter { character: char },
    #[error("string did not match a token")]
    NoTokenMatch,
    #[error("unexpected end of input, expected string literal")]
    UnexpectedEndOfInput,
 }
 #[derive(Error, Debug)]
 pub enum NodeError {
    #[error("Tag name cannot be empty")]
    NoTagName,
    #[error("Block name cannot be empty")]
    NoBlockName,
 }
 #[derive(Error, Debug)]
 pub enum ParserError {
    #[error("Token stream is empty")]
    EmptyTokenStream,
    #[error("At beginning of token stream")]
    AtBeginningOfStream,
    #[error("At end of token stream")]
    AtEndOfStream,
    #[error("Invalid token access")]
    InvalidTokenAccess,
    #[error("AST error: {0}")]
    ASTError(#[from] ASTError),
 }
 #[derive(Error, Debug)]
 pub enum ASTError {}
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -1,6 +1,6 @@
 use crate::error::LexerError;
 use crate::scanner::{LexerState, Scanner};
-use crate::token::{Token, TokenType, Tokenizer};
+use crate::token::{Token, TokenType};
 pub struct Lexer<'a> {
    source: &'a str,
@ -17,227 +17,33 @@ impl<'a> Lexer<'a> {
        }
    }
-    fn match_token_type(&mut self, c: char) -> Result<TokenType, LexerError> {
+    pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
        match c {
            ',' | '.' | '+' | ':' | '|' | '\'' | '"' => self.single_char(c),
            '{' => self.left_brace(),
            '}' => self.right_brace(),
            '%' => self.percent(),
            '#' => self.hash(),
            '!' => self.bang(),
            '=' => self.equal(),
            '<' => self.left_angle(),
            '>' => self.right_angle(),
            '/' => self.slash(),
            '-' => self.dash(),
            '*' => self.star(),
            ' ' | '\r' | '\t' | '\n' => self.whitespace(c),
            _ => self.text(),
        }
    }
    fn single_char(&mut self, c: char) -> Result<TokenType, LexerError> {
        let token_type = match c {
            ',' => TokenType::Comma,
            '.' => TokenType::Dot,
            '+' => TokenType::Plus,
            ':' => TokenType::Colon,
            '|' => TokenType::Pipe,
            '\'' => TokenType::SingleQuote,
            '"' => TokenType::DoubleQuote,
            _ => {
                return Err(LexerError::UnexpectedCharacter {
                    character: c,
                    line: self.state.line,
                })
            }
        };
        Ok(token_type)
    }
    fn left_brace(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('{')? {
            TokenType::DoubleLeftBrace
        } else if self.advance_if_matches('%')? {
            TokenType::LeftBracePercent
        } else if self.advance_if_matches('#')? {
            TokenType::LeftBraceHash
        } else {
            self.text()?
        };
        Ok(token_type)
    }
    fn right_brace(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('}')? {
            TokenType::DoubleRightBrace
        } else {
            self.text()?
        };
        Ok(token_type)
    }
    fn percent(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('}')? {
            TokenType::PercentRightBrace
        } else {
            TokenType::Percent
        };
        Ok(token_type)
    }
    fn hash(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('}')? {
            TokenType::HashRightBrace
        } else {
            self.text()?
        };
        Ok(token_type)
    }
    fn bang(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('=')? {
            TokenType::BangEqual
        } else {
            TokenType::Bang
        };
        Ok(token_type)
    }
    fn equal(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('=')? {
            TokenType::DoubleEqual
        } else {
            TokenType::Equal
        };
        Ok(token_type)
    }
    fn left_angle(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('=')? {
            TokenType::LeftAngleEqual
        } else if self.advance_if_matches('!')? {
            let start_pos = self.state.current;
            self.advance_while(|c| c == '-')?;
            if self.state.current - start_pos >= 2 {
                TokenType::LeftAngleBangDashDash
            } else {
                self.state.current = start_pos;
                TokenType::LeftAngle
            }
        } else {
            TokenType::LeftAngle
        };
        Ok(token_type)
    }
    fn right_angle(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('=')? {
            TokenType::RightAngleEqual
        } else {
            TokenType::RightAngle
        };
        Ok(token_type)
    }
    fn slash(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('>')? {
            TokenType::SlashRightAngle
        } else if self.advance_if_matches('/')? {
            TokenType::DoubleSlash
        } else if self.advance_if_matches('*')? {
            TokenType::SlashStar
        } else {
            TokenType::Slash
        };
        Ok(token_type)
    }
    fn dash(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('-')? {
            if self.advance_if_matches('>')? {
                TokenType::DashDashRightAngle
            } else {
                self.text()?
            }
        } else {
            TokenType::Dash
        };
        Ok(token_type)
    }
    fn star(&mut self) -> Result<TokenType, LexerError> {
        let token_type = if self.advance_if_matches('/')? {
            TokenType::StarSlash
        } else {
            self.text()?
        };
        Ok(token_type)
    }
    fn whitespace(&mut self, mut c: char) -> Result<TokenType, LexerError> {
        while !self.is_at_end() && self.peek()?.is_whitespace() {
            match c {
                '\n' => {
                    self.state.line += 1;
                }
                '\r' if self.peek()? == '\n' => {
                    self.advance()?;
                    self.state.line += 1;
                }
                ' ' | '\t' | '\r' => {}
                _ => {
                    return Err(LexerError::UnexpectedCharacter {
                        character: c,
                        line: self.state.line,
                    })
                }
            }
            c = self.advance()?;
        }
        Ok(TokenType::Whitespace)
    }
    fn text(&mut self) -> Result<TokenType, LexerError> {
        self.advance_while(|c| !Self::is_token_boundary(c))?;
        Ok(TokenType::Text)
    }
    fn advance_if_matches(&mut self, expected: char) -> Result<bool, LexerError> {
        if self.is_at_end() || self.peek()? != expected {
            Ok(false)
        } else {
            self.state.current += 1;
            Ok(true)
        }
    }
    fn advance_while<F>(&mut self, condition: F) -> Result<(), LexerError>
    where
        F: Fn(char) -> bool,
    {
        while !self.is_at_end() {
-            let current_char = self.peek()?;
+            self.state.start = self.state.current;
-            if !condition(current_char) {
+            let (token, size, lines_consumed) = self.next_token()?;
-                break;
+            self.add_token(token);
-            }
+
-            if current_char == '\n' {
+            self.state.current += size;
-                self.state.line += 1;
+            self.state.line += lines_consumed;
            }
            self.advance()?;
        }
-        Ok(())
+
        self.add_token(Token::new(TokenType::Eof, "", self.state.line));
        Ok(self.tokens.clone())
    }
-    fn is_token_boundary(c: char) -> bool {
+    fn next_token(&mut self) -> Result<(Token<'a>, usize, usize), LexerError> {
-        const TOKEN_BOUNDARIES: &[char] = &[
+        self.advance()?;
-            '(', ')', '[', ']', '{', '}', ',', '.', '-', '+', ':', ';', '*', '|', '%', '#', '!',
+        let remaining_source = &self.source[self.state.current..];
            '=', '<', '>', '/', ' ', '\r', '\t', '\n', '"', '\'',
        ];
-        TOKEN_BOUNDARIES.contains(&c)
+        let (token, size, lines_traversed) = Token::from_input(remaining_source, self.state.line)?;
        Ok((token, size, lines_traversed))
    }
    fn add_token(&mut self, token: Token<'a>) {
        if token.token_type != TokenType::Whitespace {
            self.tokens.push(token);
        }
    }
 }
@ -284,367 +90,3 @@ impl<'a> Scanner for Lexer<'a> {
        self.state.current >= self.source.len()
    }
 }
 impl<'a> Tokenizer<'a> for Lexer<'a> {
    type Token = Token<'a>;
    type TokenType = TokenType;
    fn tokenize(&mut self) -> Result<Vec<Self::Token>, Self::Error> {
        while !self.is_at_end() {
            self.state.start = self.state.current;
            let (token_type, text) = self.next_token()?;
            self.add_token(token_type, text);
        }
        self.add_token(TokenType::Eof, "");
        Ok(self.tokens.clone())
    }
    fn next_token(&mut self) -> Result<(Self::TokenType, &'a str), Self::Error> {
        let c = self.advance()?;
        let token_type = self.match_token_type(c)?;
        let text = &self.source[self.state.start..self.state.current];
        Ok((token_type, text))
    }
    fn add_token(&mut self, token_type: Self::TokenType, text: &'a str) {
        if token_type != TokenType::Whitespace {
            self.tokens
                .push(Token::new(token_type, text, self.state.line));
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    mod lexer {
        use super::*;
        #[test]
        fn test_lexer_new() {
            let lexer = Lexer::new("");
            assert_eq!(lexer.source, "");
            assert_eq!(lexer.tokens.len(), 0);
            assert_eq!(lexer.state.start, 0);
            assert_eq!(lexer.state.current, 0);
            assert_eq!(lexer.state.line, 1);
        }
        fn assert_token_type<F>(test_cases: Vec<(&str, TokenType)>, method: F)
        where
            F: Fn(&mut Lexer, Option<char>) -> Result<TokenType, LexerError>,
        {
            for (input, expected) in test_cases {
                println!("Testing input: {:?}", input);
                let mut chars = input.chars();
                let first_char = chars.next().unwrap();
                let rest: String = chars.collect();
                let mut lexer = Lexer::new(&rest);
                match method(&mut lexer, Some(first_char)) {
                    Ok(token_type) => assert_eq!(token_type, expected, "Input: {}", input),
                    Err(e) => panic!(
                        "Expected {:?}, but got Err({:?}) for input: {}",
                        expected, e, input
                    ),
                }
            }
        }
        #[test]
        fn test_match_token_type() {
            let test_cases = vec![
                ("<", TokenType::LeftAngle),
                (">", TokenType::RightAngle),
                (",", TokenType::Comma),
                (".", TokenType::Dot),
                ("-", TokenType::Dash),
                ("+", TokenType::Plus),
                (":", TokenType::Colon),
                ("/", TokenType::Slash),
                ("!", TokenType::Bang),
                ("=", TokenType::Equal),
                ("|", TokenType::Pipe),
                ("%", TokenType::Percent),
                ("'", TokenType::SingleQuote),
                ("\"", TokenType::DoubleQuote),
                ("{{", TokenType::DoubleLeftBrace),
                ("}}", TokenType::DoubleRightBrace),
                ("{%", TokenType::LeftBracePercent),
                ("%}", TokenType::PercentRightBrace),
                ("{#", TokenType::LeftBraceHash),
                ("#}", TokenType::HashRightBrace),
                ("!=", TokenType::BangEqual),
                ("==", TokenType::DoubleEqual),
                ("<=", TokenType::LeftAngleEqual),
                (">=", TokenType::RightAngleEqual),
                ("<!--", TokenType::LeftAngleBangDashDash),
                ("-->", TokenType::DashDashRightAngle),
                ("/>", TokenType::SlashRightAngle),
                ("//", TokenType::DoubleSlash),
                ("/*", TokenType::SlashStar),
                ("*/", TokenType::StarSlash),
                (" ", TokenType::Whitespace),
                ("\r", TokenType::Whitespace),
                ("\t", TokenType::Whitespace),
                ("\n", TokenType::Whitespace),
                ("  ", TokenType::Whitespace),
                (" \n", TokenType::Whitespace),
                ("a", TokenType::Text),
                ("1", TokenType::Text),
                ("Hello", TokenType::Text),
            ];
            assert_token_type(test_cases, |lexer, c| lexer.match_token_type(c.unwrap()));
        }
        #[test]
        fn test_left_brace() {
            let test_cases = vec![
                ("{{", TokenType::DoubleLeftBrace),
                ("{%", TokenType::LeftBracePercent),
                ("{#", TokenType::LeftBraceHash),
                ("{", TokenType::Text),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.left_brace());
        }
        #[test]
        fn test_right_brace() {
            let test_cases = vec![("}}", TokenType::DoubleRightBrace), ("}", TokenType::Text)];
            assert_token_type(test_cases, |lexer, _| lexer.right_brace());
        }
        #[test]
        fn test_percent() {
            let test_cases = vec![
                ("%", TokenType::Percent),
                ("%}", TokenType::PercentRightBrace),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.percent());
        }
        #[test]
        fn test_bang() {
            let test_cases = vec![("!", TokenType::Bang), ("!=", TokenType::BangEqual)];
            assert_token_type(test_cases, |lexer, _| lexer.bang());
        }
        #[test]
        fn test_equal() {
            let test_cases = vec![("=", TokenType::Equal), ("==", TokenType::DoubleEqual)];
            assert_token_type(test_cases, |lexer, _| lexer.equal());
        }
        #[test]
        fn test_left_angle() {
            let test_cases = vec![
                ("<", TokenType::LeftAngle),
                ("<=", TokenType::LeftAngleEqual),
                ("<!--", TokenType::LeftAngleBangDashDash),
                ("<!", TokenType::LeftAngle),
                ("<!-", TokenType::LeftAngle),
                ("<!---", TokenType::LeftAngleBangDashDash),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.left_angle());
        }
        #[test]
        fn test_right_angle() {
            let test_cases = vec![
                (">", TokenType::RightAngle),
                (">=", TokenType::RightAngleEqual),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.right_angle());
        }
        #[test]
        fn test_slash() {
            let test_cases = vec![
                ("/", TokenType::Slash),
                ("/>", TokenType::SlashRightAngle),
                ("//", TokenType::DoubleSlash),
                ("/*", TokenType::SlashStar),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.slash());
        }
        #[test]
        fn test_dash() {
            let test_cases = vec![
                ("-", TokenType::Dash),
                ("-->", TokenType::DashDashRightAngle),
                ("--", TokenType::Text),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.dash());
        }
        #[test]
        fn test_star() {
            let test_cases = vec![("*/", TokenType::StarSlash), ("*", TokenType::Text)];
            assert_token_type(test_cases, |lexer, _| lexer.star());
        }
        #[test]
        fn test_whitespace() {
            let test_cases = vec![
                (" ", TokenType::Whitespace),
                ("\r", TokenType::Whitespace),
                ("\t", TokenType::Whitespace),
                ("\n", TokenType::Whitespace),
                ("  ", TokenType::Whitespace),
                (" \n", TokenType::Whitespace),
            ];
            assert_token_type(test_cases, |lexer, c| lexer.whitespace(c.unwrap()));
        }
        #[test]
        fn test_text() {
            let test_cases = vec![
                ("a", TokenType::Text),
                ("1", TokenType::Text),
                ("Hello", TokenType::Text),
            ];
            assert_token_type(test_cases, |lexer, _| lexer.text());
        }
    }
    fn tokenize(input: &str) -> Vec<Token> {
        let mut lexer = Lexer::new(input);
        match lexer.tokenize() {
            Ok(tokens) => {
                // Debug print all tokens
                for token in tokens.iter() {
                    println!("{:?}", token)
                }
                tokens
            }
            Err(e) => {
                eprintln!("Tokenization error: {:?}", e);
                eprintln!("Input that caused the error: {}", input);
                panic!("Tokenization failed. See error output above.");
            }
        }
    }
    #[test]
    fn test_opening_tag() {
        let tokens = tokenize("<html>");
        assert_eq!(tokens[0].token_type, TokenType::LeftAngle);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::RightAngle);
    }
    #[test]
    fn test_closing_tag() {
        let tokens = tokenize("</body>");
        assert_eq!(tokens[0].token_type, TokenType::LeftAngle);
        assert_eq!(tokens[1].token_type, TokenType::Slash);
        assert_eq!(tokens[2].token_type, TokenType::Text);
        assert_eq!(tokens[3].token_type, TokenType::RightAngle);
    }
    #[test]
    fn test_html_attribute() {
        let tokens = tokenize(r#"<a href="link">"#);
        assert_eq!(tokens[0].token_type, TokenType::LeftAngle);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::Text);
        assert_eq!(tokens[3].token_type, TokenType::Equal);
        assert_eq!(tokens[4].token_type, TokenType::DoubleQuote);
        assert_eq!(tokens[5].token_type, TokenType::Text);
        assert_eq!(tokens[6].token_type, TokenType::DoubleQuote);
        assert_eq!(tokens[7].token_type, TokenType::RightAngle);
    }
    #[test]
    fn test_django_variable() {
        let tokens = tokenize("{{ variable }}");
        assert_eq!(tokens[0].token_type, TokenType::DoubleLeftBrace);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::DoubleRightBrace);
    }
    #[test]
    fn test_django_templatetag() {
        let tokens = tokenize("{% if condition %}");
        assert_eq!(tokens[0].token_type, TokenType::LeftBracePercent);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::Text);
        assert_eq!(tokens[3].token_type, TokenType::PercentRightBrace);
    }
    #[test]
    fn test_django_comment() {
        let tokens = tokenize("{# This is a comment #}");
        assert_eq!(tokens[0].token_type, TokenType::LeftBraceHash);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::Text);
        assert_eq!(tokens[3].token_type, TokenType::Text);
        assert_eq!(tokens[4].token_type, TokenType::Text);
        assert_eq!(tokens[5].token_type, TokenType::HashRightBrace);
    }
    #[test]
    fn test_django_filter() {
        let tokens = tokenize("{{ value|default:'default' }}");
        assert_eq!(tokens[0].token_type, TokenType::DoubleLeftBrace);
        assert_eq!(tokens[1].token_type, TokenType::Text);
        assert_eq!(tokens[2].token_type, TokenType::Pipe);
        assert_eq!(tokens[3].token_type, TokenType::Text);
        assert_eq!(tokens[4].token_type, TokenType::Colon);
        assert_eq!(tokens[5].token_type, TokenType::SingleQuote);
        assert_eq!(tokens[6].token_type, TokenType::Text);
        assert_eq!(tokens[7].token_type, TokenType::SingleQuote);
        assert_eq!(tokens[8].token_type, TokenType::DoubleRightBrace);
    }
    #[test]
    fn test_quoted_django_templatetag() {
        let tokens = tokenize(r#"'{% url "api:index" %}'"#);
        assert_eq!(tokens[0].token_type, TokenType::SingleQuote);
        assert_eq!(tokens[1].token_type, TokenType::LeftBracePercent);
        assert_eq!(tokens[2].token_type, TokenType::Text);
        assert_eq!(tokens[3].token_type, TokenType::DoubleQuote);
        assert_eq!(tokens[4].token_type, TokenType::Text);
        assert_eq!(tokens[5].token_type, TokenType::Colon);
        assert_eq!(tokens[6].token_type, TokenType::Text);
        assert_eq!(tokens[7].token_type, TokenType::DoubleQuote);
        assert_eq!(tokens[8].token_type, TokenType::PercentRightBrace);
        assert_eq!(tokens[9].token_type, TokenType::SingleQuote);
    }
    #[test]
    fn test_multiline_template() {
        let template = r#"\
        {% if user.is_authenticated %}
            Hello, {{ user.name }}!
        {% else %}
            Please log in.
        {% endif %}
    "#;
        let tokens = tokenize(template);
        assert_eq!(tokens[0].line, 1);
        assert_eq!(tokens[6].line, 2);
        assert_eq!(tokens[14].line, 3);
        assert_eq!(tokens[17].line, 4);
        assert_eq!(tokens[21].line, 5);
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -5,10 +5,11 @@ mod token;
 use lexer::Lexer;
 use std::error::Error;
 use token::Tokenizer;
 pub fn compile(template: &str) -> Result<String, Box<dyn Error>> {
-    let tokens = Lexer::new(template).tokenize()?;
+    let mut lexer = Lexer::new(template);
    let tokens = lexer.tokenize()?;
    let ast = Parser::new(tokens.clone()).parse()?;
    println!("{:?}", tokens);
    todo!("Implement compilation process")
 }
--- a/src/token.rs
+++ b/src/token.rs
@ -1,4 +1,4 @@
-use crate::scanner::Scanner;
+use crate::error::TokenError;
 use std::fmt::Debug;
 #[derive(Debug, Clone, PartialEq)]
@ -38,7 +38,275 @@ pub enum TokenType {
    Eof,
 }
-#[derive(Debug, Clone)]
+impl TokenType {
    fn single_char(c: char) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size = 1;
        token_type = match c {
            ',' => Self::Comma,
            '.' => Self::Dot,
            '+' => Self::Plus,
            ':' => Self::Colon,
            '|' => Self::Pipe,
            '\'' => Self::SingleQuote,
            '"' => Self::DoubleQuote,
            '/' => Self::Slash,
            '%' => Self::Percent,
            _ => return Err(TokenError::UnexpectedCharacter { character: c }),
        };
        Ok((token_type, size))
    }
    fn left_brace(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("{{") {
            token_type = Self::DoubleLeftBrace;
            size = 2;
        } else if s.starts_with("{%") {
            token_type = Self::LeftBracePercent;
            size = 2;
        } else if s.starts_with("{#") {
            token_type = Self::LeftBraceHash;
            size = 2;
        } else {
            token_type = Self::Text;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn right_brace(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("}}") {
            token_type = Self::DoubleRightBrace;
            size = 2;
        } else {
            token_type = Self::Text;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn percent(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("%}") {
            token_type = Self::PercentRightBrace;
            size = 2;
        } else {
            token_type = Self::Percent;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn hash(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("#}") {
            token_type = Self::HashRightBrace;
            size = 2;
        } else {
            token_type = Self::Text;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn bang(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("!=") {
            token_type = Self::BangEqual;
            size = 2;
        } else {
            token_type = Self::Bang;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn equal(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("==") {
            token_type = Self::DoubleEqual;
            size = 2;
        } else {
            token_type = Self::Equal;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn left_angle(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("<=") {
            token_type = Self::LeftAngleEqual;
            size = 2;
        } else if s.starts_with("<!--") {
            token_type = Self::LeftAngleBangDashDash;
            size = 5;
        } else {
            token_type = Self::LeftAngle;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn right_angle(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with(">=") {
            token_type = Self::RightAngleEqual;
            size = 2;
        } else {
            token_type = Self::RightAngle;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn slash(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("/>") {
            token_type = Self::SlashRightAngle;
            size = 2;
        } else if s.starts_with("//") {
            token_type = Self::DoubleSlash;
            size = 2;
        } else if s.starts_with("/*") {
            token_type = Self::SlashStar;
            size = 2;
        } else if s.starts_with("*/") {
            token_type = Self::StarSlash;
            size = 2;
        } else {
            token_type = Self::Slash;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn dash(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if let Some(rest) = s.strip_prefix("--") {
            if rest.starts_with(">") {
                token_type = Self::DashDashRightAngle;
                size = 3;
            } else {
                token_type = Self::Text;
                size = 2;
            }
        } else {
            token_type = Self::Dash;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn star(s: &str) -> Result<(Self, usize), TokenError> {
        let token_type;
        let size;
        if s.starts_with("*/") {
            token_type = Self::StarSlash;
            size = 2;
        } else {
            token_type = Self::Text;
            size = 1;
        }
        Ok((token_type, size))
    }
    fn whitespace(s: &str) -> Result<(Self, usize, usize), TokenError> {
        let mut size = 0;
        let mut lines = 0;
        let mut chars = s.chars().peekable();
        while let Some(&c) = chars.peek() {
            match c {
                ' ' | '\t' => {}
                '\n' => {
                    lines += 1;
                }
                '\r' => {
                    chars.next();
                    if chars.peek() == Some(&'\n') {
                        chars.next();
                    }
                    lines += 1;
                }
                _ => break,
            }
            size += c.len_utf8();
            chars.next();
        }
        if size > 0 {
            Ok((Self::Whitespace, size, lines))
        } else {
            Err(TokenError::NoTokenMatch)
        }
    }
    fn text(s: &str) -> Result<(Self, usize), TokenError> {
        let mut size = 0;
        for (i, c) in s.chars().enumerate() {
            if Self::is_token_boundary(c) {
                break;
            }
            size = i + 1;
        }
        if size > 0 {
            Ok((Self::Text, size))
        } else {
            Err(TokenError::NoTokenMatch)
        }
    }
    fn is_token_boundary(c: char) -> bool {
        const TOKEN_BOUNDARIES: &[char] = &[
            '(', ')', '[', ']', '{', '}', ',', '.', '-', '+', ':', ';', '*', '|', '%', '#', '!',
            '=', '<', '>', '/', ' ', '\r', '\t', '\n', '"', '\'',
        ];
        TOKEN_BOUNDARIES.contains(&c)
    }
 }
 #[derive(Clone, Debug, PartialEq)]
 pub struct Token<'a> {
    pub token_type: TokenType,
    pub lexeme: &'a str,
@ -53,13 +321,271 @@ impl<'a> Token<'a> {
            line,
        }
    }
    pub fn from_input(input: &'a str, line: usize) -> Result<(Self, usize, usize), TokenError> {
        let c = input.chars().next().ok_or(TokenError::NoTokenMatch)?;
        if c.is_whitespace() {
            let (token_type, size, lines_consumed) = TokenType::whitespace(input)?;
            return Ok((
                Self::new(token_type, &input[..size.min(input.len())], line),
                size,
                lines_consumed,
            ));
        }
        let (token_type, size) = match c {
            ',' | '.' | '+' | ':' | '|' | '\'' | '"' => TokenType::single_char(c)?,
            '{' => TokenType::left_brace(input)?,
            '}' => TokenType::right_brace(input)?,
            '%' => TokenType::percent(input)?,
            '#' => TokenType::hash(input)?,
            '!' => TokenType::bang(input)?,
            '=' => TokenType::equal(input)?,
            '<' => TokenType::left_angle(input)?,
            '>' => TokenType::right_angle(input)?,
            '/' => TokenType::slash(input)?,
            '-' => TokenType::dash(input)?,
            '*' => TokenType::star(input)?,
            _ => TokenType::text(input)?,
        };
        Ok((
            Self::new(token_type, &input[..size.min(input.len())], line),
            size,
            0,
        ))
    }
 }
-pub trait Tokenizer<'a>: Scanner {
+#[cfg(test)]
-    type Token: Debug;
+mod tests {
-    type TokenType: Debug;
+    use super::*;
-    fn tokenize(&mut self) -> Result<Vec<Self::Token>, Self::Error>;
+    fn assert_token_instance<F>(test_cases: Vec<(&str, TokenType)>, method: F)
-    fn next_token(&mut self) -> Result<(Self::TokenType, &'a str), Self::Error>;
+    where
-    fn add_token(&mut self, token_type: Self::TokenType, text: &'a str);
+        F: Fn(&str) -> Result<(Token<'_>, usize, usize), TokenError>,
    {
        for (input, expected_token_type) in test_cases {
            println!("Testing input: {:?}", input);
            // Call the token-based method
            match method(input) {
                Ok((token, _size_consumed, _lines_consumed)) => {
                    assert_eq!(token.token_type, expected_token_type, "Input: {}", input);
                }
                Err(e) => panic!(
                    "Expected {:?}, but got Err({:?}) for input: {}",
                    expected_token_type, e, input,
                ),
            }
        }
    }
    #[test]
    fn test_match_token() {
        let test_cases = vec![
            ("<", TokenType::LeftAngle),
            (">", TokenType::RightAngle),
            (",", TokenType::Comma),
            (".", TokenType::Dot),
            ("-", TokenType::Dash),
            ("+", TokenType::Plus),
            (":", TokenType::Colon),
            ("/", TokenType::Slash),
            ("!", TokenType::Bang),
            ("=", TokenType::Equal),
            ("|", TokenType::Pipe),
            ("%", TokenType::Percent),
            ("'", TokenType::SingleQuote),
            ("\"", TokenType::DoubleQuote),
            ("{{", TokenType::DoubleLeftBrace),
            ("}}", TokenType::DoubleRightBrace),
            ("{%", TokenType::LeftBracePercent),
            ("%}", TokenType::PercentRightBrace),
            ("{#", TokenType::LeftBraceHash),
            ("#}", TokenType::HashRightBrace),
            ("!=", TokenType::BangEqual),
            ("==", TokenType::DoubleEqual),
            ("<=", TokenType::LeftAngleEqual),
            (">=", TokenType::RightAngleEqual),
            ("<!--", TokenType::LeftAngleBangDashDash),
            ("-->", TokenType::DashDashRightAngle),
            ("/>", TokenType::SlashRightAngle),
            ("//", TokenType::DoubleSlash),
            ("/*", TokenType::SlashStar),
            ("*/", TokenType::StarSlash),
            (" ", TokenType::Whitespace),
            ("\r", TokenType::Whitespace),
            ("\t", TokenType::Whitespace),
            ("\n", TokenType::Whitespace),
            ("  ", TokenType::Whitespace),
            (" \n", TokenType::Whitespace),
            ("a", TokenType::Text),
            ("1", TokenType::Text),
            ("Hello", TokenType::Text),
        ];
        assert_token_instance(test_cases, |input| Token::from_input(input, 0));
    }
    fn assert_token_type<F>(test_cases: Vec<(&str, TokenType)>, method: F)
    where
        F: Fn(&str) -> Result<(TokenType, usize), TokenError>,
    {
        for (input, expected_token_type) in test_cases {
            println!("Testing input: {:?}", input);
            match method(input) {
                Ok((token_type, _size_consumed)) => {
                    assert_eq!(token_type, expected_token_type, "Input: {}", input);
                }
                Err(e) => panic!(
                    "Expected {:?}, but got Err({:?}) for input: {}",
                    expected_token_type, e, input,
                ),
            }
        }
    }
    #[test]
    fn test_left_brace() {
        let test_cases = vec![
            ("{{", TokenType::DoubleLeftBrace),
            ("{%", TokenType::LeftBracePercent),
            ("{#", TokenType::LeftBraceHash),
            ("{", TokenType::Text),
        ];
        assert_token_type(test_cases, TokenType::left_brace);
    }
    #[test]
    fn test_right_brace() {
        let test_cases = vec![("}}", TokenType::DoubleRightBrace), ("}", TokenType::Text)];
        assert_token_type(test_cases, TokenType::right_brace);
    }
    #[test]
    fn test_percent() {
        let test_cases = vec![
            ("%", TokenType::Percent),
            ("%}", TokenType::PercentRightBrace),
        ];
        assert_token_type(test_cases, TokenType::percent);
    }
    #[test]
    fn test_bang() {
        let test_cases = vec![("!", TokenType::Bang), ("!=", TokenType::BangEqual)];
        assert_token_type(test_cases, TokenType::bang);
    }
    #[test]
    fn test_equal() {
        let test_cases = vec![("=", TokenType::Equal), ("==", TokenType::DoubleEqual)];
        assert_token_type(test_cases, TokenType::equal);
    }
    #[test]
    fn test_left_angle() {
        let test_cases = vec![
            ("<", TokenType::LeftAngle),
            ("<=", TokenType::LeftAngleEqual),
            ("<!--", TokenType::LeftAngleBangDashDash),
            ("<!", TokenType::LeftAngle),
            ("<!-", TokenType::LeftAngle),
            ("<!---", TokenType::LeftAngleBangDashDash),
        ];
        assert_token_type(test_cases, TokenType::left_angle);
    }
    #[test]
    fn test_right_angle() {
        let test_cases = vec![
            (">", TokenType::RightAngle),
            (">=", TokenType::RightAngleEqual),
        ];
        assert_token_type(test_cases, TokenType::right_angle);
    }
    #[test]
    fn test_slash() {
        let test_cases = vec![
            ("/", TokenType::Slash),
            ("/>", TokenType::SlashRightAngle),
            ("//", TokenType::DoubleSlash),
            ("/*", TokenType::SlashStar),
        ];
        assert_token_type(test_cases, TokenType::slash);
    }
    #[test]
    fn test_dash() {
        let test_cases = vec![
            ("-", TokenType::Dash),
            ("-->", TokenType::DashDashRightAngle),
            ("--", TokenType::Text),
        ];
        assert_token_type(test_cases, TokenType::dash);
    }
    #[test]
    fn test_star() {
        let test_cases = vec![("*/", TokenType::StarSlash), ("*", TokenType::Text)];
        assert_token_type(test_cases, TokenType::star);
    }
    #[test]
    fn test_text() {
        let test_cases = vec![
            ("a", TokenType::Text),
            ("1", TokenType::Text),
            ("Hello", TokenType::Text),
        ];
        assert_token_type(test_cases, TokenType::text);
    }
    fn assert_whitespace_token_type<F>(test_cases: Vec<(&str, usize)>, method: F)
    where
        F: Fn(&str) -> Result<(TokenType, usize, usize), TokenError>,
    {
        for (input, expected_lines) in test_cases {
            println!("Testing input: {:?}", input);
            // Call the token matcher
            match method(input) {
                Ok((token_type, _size_consumed, lines_consumed)) => {
                    assert_eq!(token_type, TokenType::Whitespace, "Input: {}", input);
                    assert_eq!(lines_consumed, expected_lines, "Input: {}", input);
                }
                Err(e) => panic!(
                    "Expected Whitespace, but got Err({:?}) for input: {}",
                    e, input
                ),
            }
        }
    }
    #[test]
    fn test_whitespace_token_type() {
        let test_cases = vec![
            (" ", 0),
            ("\n", 1),
            ("\t", 0),
            ("\r", 1),
            (" \n", 1),
            ("\r\n", 1),
        ];
        assert_whitespace_token_type(test_cases, TokenType::whitespace);
    }
 }