simple example of custom tokenizer

This commit is contained in:
Andy Grove 2018-02-09 06:53:49 -07:00
parent fcf6b1150e
commit f56846098e
6 changed files with 145 additions and 29 deletions

View file

@ -1,24 +1,40 @@
use std::cmp::PartialEq;
use std::fmt::Debug;
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug)]
pub struct Position {
line: usize,
col: usize
}
impl Position {
pub fn new(line: usize, col: usize) -> Self {
Position { line, col }
}
}
#[derive(Debug)]
pub enum TokenizerError<T> {
UnexpectedChar(char,Position),
UnexpectedEof(Position),
UnterminatedStringLiteral(Position),
Custom(T)
}
/// SQL Tokens
#[derive(Debug)]
pub enum SQLToken<T> {
#[derive(Debug,PartialEq)]
pub enum SQLToken<T: Debug + PartialEq> {
Whitespace(char),
Keyword(String), //TODO: &str ?
Identifier(String), //TODO: &str ?
Literal(String), //TODO: need to model different types of literal
Plus,
Minus,
Mult,
Divide,
Eq,
Not,
NotEq,
Gt,
GtEq,
@ -31,9 +47,32 @@ pub enum SQLToken<T> {
Custom(T)
}
pub trait SQLTokenizer<S, T> {
/// return a reference to the next token without consuming it (look ahead)
fn peek_token(&mut self) -> Result<Option<SQLToken<S>>, TokenizerError<T>>;
pub trait SQLTokenizer<S, TE>
where S: Debug + PartialEq {
/// return a reference to the next token and advance the index
fn next_token(&mut self) -> Result<Option<SQLToken<S>>, TokenizerError<T>>;
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<S>>, TokenizerError<TE>>;
}
pub fn tokenize<S,TE>(sql: &str, tokenizer: &mut SQLTokenizer<S,TE>) -> Result<Vec<SQLToken<S>>, TokenizerError<TE>>
where S: Debug + PartialEq
{
let mut peekable = sql.chars().peekable();
let mut tokens : Vec<SQLToken<S>> = vec![];
loop {
match tokenizer.next_token(&mut peekable)? {
Some(SQLToken::Whitespace(_)) => { /* ignore */ },
Some(token) => {
println!("Token: {:?}", token);
tokens.push(token)
},
None => break
}
}
Ok(tokens)
}