move scanner state to separate struct (#6)

This commit is contained in:
Josh Thomas 2024-10-13 17:58:52 -05:00 committed by GitHub
parent 03c0c19dd9
commit aa7913b988
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 37 additions and 21 deletions

View file

@ -1,7 +1,7 @@
use std::fmt; use std::fmt;
use std::fmt::Debug; use std::fmt::Debug;
use crate::scanner::Scanner; use crate::scanner::{Scanner, ScannerState};
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum TokenType { pub enum TokenType {
@ -92,9 +92,7 @@ impl std::error::Error for LexerError {}
pub struct Lexer { pub struct Lexer {
source: String, source: String,
tokens: Vec<Token>, tokens: Vec<Token>,
start: usize, state: ScannerState,
current: usize,
line: usize,
} }
impl Lexer { impl Lexer {
@ -102,9 +100,7 @@ impl Lexer {
Lexer { Lexer {
source, source,
tokens: Vec::new(), tokens: Vec::new(),
start: 0, state: ScannerState::new(),
current: 0,
line: 1,
} }
} }
@ -149,7 +145,7 @@ impl Lexer {
'|' => TokenType::Pipe, '|' => TokenType::Pipe,
'\'' => TokenType::SingleQuote, '\'' => TokenType::SingleQuote,
'"' => TokenType::DoubleQuote, '"' => TokenType::DoubleQuote,
_ => return Err(LexerError::UnexpectedCharacter(c, self.line)), _ => return Err(LexerError::UnexpectedCharacter(c, self.state.line)),
}; };
Ok(token_type) Ok(token_type)
} }
@ -245,7 +241,7 @@ impl Lexer {
fn handle_whitespace(&mut self) -> Result<TokenType, LexerError> { fn handle_whitespace(&mut self) -> Result<TokenType, LexerError> {
while !self.is_at_end() && self.peek().is_whitespace() { while !self.is_at_end() && self.peek().is_whitespace() {
if self.peek() == '\n' { if self.peek() == '\n' {
self.line += 1; self.state.line += 1;
} }
self.advance(); self.advance();
} }
@ -255,8 +251,8 @@ impl Lexer {
fn handle_text(&mut self) -> Result<TokenType, LexerError> { fn handle_text(&mut self) -> Result<TokenType, LexerError> {
self.advance_while(|c| !Self::is_token_boundary(c)); self.advance_while(|c| !Self::is_token_boundary(c));
if self.start == self.current { if self.state.start == self.state.current {
Err(LexerError::EmptyToken(self.line)) Err(LexerError::EmptyToken(self.state.line))
} else { } else {
Ok(TokenType::Text) Ok(TokenType::Text)
} }
@ -266,7 +262,7 @@ impl Lexer {
if self.is_at_end() || self.peek() != expected { if self.is_at_end() || self.peek() != expected {
false false
} else { } else {
self.current += 1; self.state.current += 1;
true true
} }
} }
@ -277,7 +273,7 @@ impl Lexer {
{ {
while !self.is_at_end() && condition(self.peek()) { while !self.is_at_end() && condition(self.peek()) {
if self.peek() == '\n' { if self.peek() == '\n' {
self.line += 1; self.state.line += 1;
} }
self.advance(); self.advance();
} }
@ -300,12 +296,12 @@ impl Tokenizer for Lexer {
fn tokenize(&mut self) -> Result<Vec<Self::Token>, Self::Error> { fn tokenize(&mut self) -> Result<Vec<Self::Token>, Self::Error> {
while !self.is_at_end() { while !self.is_at_end() {
self.start = self.current; self.state.start = self.state.current;
self.scan_token()?; self.scan_token()?;
} }
self.tokens self.tokens
.push(Token::new(TokenType::Eof, String::new(), self.line)); .push(Token::new(TokenType::Eof, String::new(), self.state.line));
Ok(self.tokens.clone()) Ok(self.tokens.clone())
} }
@ -314,9 +310,10 @@ impl Tokenizer for Lexer {
} }
fn add_token(&mut self, token_type: Self::TokenType) { fn add_token(&mut self, token_type: Self::TokenType) {
let text = self.source[self.start..self.current].to_string(); let text = self.source[self.state.start..self.state.current].to_string();
if token_type != TokenType::Whitespace { if token_type != TokenType::Whitespace {
self.tokens.push(Token::new(token_type, text, self.line)); self.tokens
.push(Token::new(token_type, text, self.state.line));
} }
} }
} }
@ -326,20 +323,23 @@ impl Scanner for Lexer {
fn advance(&mut self) -> Self::Item { fn advance(&mut self) -> Self::Item {
let current_char = self.peek(); let current_char = self.peek();
self.current += 1; self.state.current += 1;
current_char current_char
} }
fn peek(&self) -> Self::Item { fn peek(&self) -> Self::Item {
self.source.chars().nth(self.current).unwrap_or('\0') self.source.chars().nth(self.state.current).unwrap_or('\0')
} }
fn peek_next(&self) -> Self::Item { fn peek_next(&self) -> Self::Item {
self.source.chars().nth(self.current + 1).unwrap_or('\0') self.source
.chars()
.nth(self.state.current + 1)
.unwrap_or('\0')
} }
fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
self.current >= self.source.len() self.state.current >= self.source.len()
} }
} }

View file

@ -1,5 +1,21 @@
use std::fmt::Debug; use std::fmt::Debug;
pub struct ScannerState {
pub start: usize,
pub current: usize,
pub line: usize,
}
impl ScannerState {
pub fn new() -> Self {
ScannerState {
start: 0,
current: 0,
line: 1,
}
}
}
pub trait Scanner { pub trait Scanner {
type Item: Debug; type Item: Debug;