simple example of custom tokenizer

This commit is contained in:
Andy Grove 2018-02-09 06:53:49 -07:00
parent fcf6b1150e
commit f56846098e
6 changed files with 145 additions and 29 deletions

View file

@ -1,17 +1,23 @@
use std::str::Chars;
use std::iter::Peekable;
extern crate datafusion_sql;
use datafusion_sql::tokenizer::*;
use datafusion_sql::generic_tokenizer::*;
use datafusion_sql::parser::*;
#[derive(Debug)]
///
/// This example demonstrates building a custom ACME parser that extends the generic parser
/// by adding support for a factorial operator !!
///
#[derive(Debug,PartialEq)]
enum AcmeToken {
/// Factorial operator `!!`
Factorial
}
#[derive(Debug)]
enum AcmeOperator {
Factorial
@ -19,21 +25,35 @@ enum AcmeOperator {
#[derive(Debug)]
enum AcmeTokenizerError {
}
struct AcmeTokenizer {
//chars: &'a Chars
generic: GenericTokenizer
}
/// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else
impl SQLTokenizer<AcmeToken, AcmeTokenizerError> for AcmeTokenizer {
fn peek_token(&mut self) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeTokenizerError>> {
Ok(Some(SQLToken::Custom(AcmeToken::Factorial)))
}
fn next_token(&mut self) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeTokenizerError>> {
Ok(Some(SQLToken::Custom(AcmeToken::Factorial)))
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeTokenizerError>> {
match chars.peek() {
Some(&ch) => match ch {
'!' => {
chars.next(); // consume the first `!`
match chars.peek() {
Some(&ch) => match ch {
'!' => {
chars.next(); // consume the second `!`
Ok(Some(SQLToken::Custom(AcmeToken::Factorial)))
},
_ => Err(TokenizerError::UnexpectedChar(ch,Position::new(0,0)))
},
None => Ok(Some(SQLToken::Not))
}
},
_ => self.generic.next_token(chars)
}
_ => self.generic.next_token(chars)
}
}
}
@ -43,9 +63,13 @@ fn main() {
let sql = "1 + !! 5 * 2";
let mut tokenizer = AcmeTokenizer { };
let mut acme_tokenizer = AcmeTokenizer {
generic: GenericTokenizer { }
};
println!("token = {:?}", tokenizer.peek_token().unwrap());
let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap();
println!("tokens = {:?}", tokens);