From 7ff413332731185a9aecc93a3c0193d9fd6a064e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 1 Sep 2018 12:59:18 -0600 Subject: [PATCH] Refactoring --- examples/acme_parser.rs | 88 ++++++++++++++++++----------------------- src/ansi/parser.rs | 24 +++++++---- src/ansi/tokenizer.rs | 24 +++++++---- src/parser.rs | 9 ++++- src/tokenizer.rs | 56 ++++++++++++++------------ 5 files changed, 108 insertions(+), 93 deletions(-) diff --git a/examples/acme_parser.rs b/examples/acme_parser.rs index 92d3bb8e..152215fa 100644 --- a/examples/acme_parser.rs +++ b/examples/acme_parser.rs @@ -1,10 +1,9 @@ -use std::str::Chars; -use std::iter::Peekable; -use std::rc::Rc; +use std::sync::{Arc, Mutex}; extern crate datafusion_sql; use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer; +use datafusion_sql::ansi::parser::ANSISQLParser; use datafusion_sql::tokenizer::*; use datafusion_sql::parser::*; @@ -26,29 +25,30 @@ enum AcmeExpr { } struct AcmeTokenizer { - generic: ANSISQLTokenizer + ansi_tokenizer: Arc>> } /// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else impl SQLTokenizer for AcmeTokenizer { - fn precedence(&self, token: &SQLToken) -> usize { + fn precedence(&self, _token: &SQLToken) -> usize { unimplemented!() } - fn peek_token(&self, chars: &mut Peekable) -> Result>, TokenizerError> { + fn peek_token(&mut self) -> Result>, TokenizerError> { unimplemented!() } - fn next_token(&self, chars: &mut Peekable) -> Result>, TokenizerError> { - match chars.peek() { + fn next_token(&mut self) -> Result>, TokenizerError> { + let mut arc = self.ansi_tokenizer.lock().unwrap(); + match arc.peek_char() { Some(&ch) => match ch { '!' => { - chars.next(); // consume the first `!` - match chars.peek() { + arc.next_char(); // consume the first `!` + match arc.peek_char() { Some(&ch) => match ch { '!' => { - chars.next(); // consume the second `!` + arc.next_char(); // consume the second `!` Ok(Some(SQLToken::Custom(AcmeToken::Factorial))) }, _ => Err(TokenizerError::UnexpectedChar(ch,Position::new(0,0))) @@ -56,34 +56,35 @@ impl SQLTokenizer for AcmeTokenizer { None => Ok(Some(SQLToken::Not)) } } - _ => self.generic.next_token(chars) + _ => arc.next_token() } - _ => self.generic.next_token(chars) + _ => arc.next_token() } } + + fn peek_char(&mut self) -> Option<&char> { + unimplemented!() + } + + fn next_char(&mut self) -> Option<&char> { + unimplemented!() + } } struct AcmeParser { - tokenizer: Rc> + ansi_parser: Arc>> } -// -//impl<'a> AcmeParser<'a> { -// -// pub fn new(sql: &'a str) -> Self { -// AcmeParser { -// chars: sql.chars().peekable() -// } -// } -//} impl SQLParser for AcmeParser { fn parse_prefix(&mut self) -> Result>, ParserError> { - unimplemented!() + //TODO: add custom overrides + self.ansi_parser.lock().unwrap().parse_prefix() } - fn parse_infix(&mut self, left: &SQLExpr, _precedence: usize) -> Result>>, ParserError> { - unimplemented!() + fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result>>, ParserError> { + //TODO: add custom overrides + self.ansi_parser.lock().unwrap().parse_infix(left, precedence) } } @@ -91,35 +92,22 @@ fn main() { let sql = "1 + !! 5 * 2"; -// let acme_parser = AcmeParser::new(sql); + // ANSI SQL tokenizer + let ansi_tokenizer = Arc::new(Mutex::new(ANSISQLTokenizer { chars: sql.chars().peekable() })); + // Custom ACME tokenizer + let mut acme_tokenizer = Arc::new(Mutex::new(AcmeTokenizer { + ansi_tokenizer: ansi_tokenizer.clone() + })); - //acme_parser + // Custom ACME parser + let acme_parser: Arc>> = Arc::new(Mutex::new(AcmeParser { + ansi_parser: Arc::new(Mutex::new(ANSISQLParser::new(acme_tokenizer))) + })); - let mut acme_tokenizer: Rc> = Rc::new(AcmeTokenizer { - generic: ANSISQLTokenizer { } - }); - - let mut acme_parser: Rc> = Rc::new(AcmeParser { - tokenizer: acme_tokenizer.clone() - }); - -// let mut pratt_parser = Rc::new(PrattParser { -// chars: sql.chars().peekable(), -// tokenizer: acme_tokenizer.clone(), -// parser: acme_parser.clone() -// }); - - let mut chars = sql.chars().peekable(); - - let expr = parse_expr(acme_tokenizer, acme_parser, &mut chars); + let expr = parse_expr(acme_parser).unwrap(); println!("Parsed: {:?}", expr); -// -// let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap(); -// -// println!("tokens = {:?}", tokens); - } diff --git a/src/ansi/parser.rs b/src/ansi/parser.rs index 3a81e7b4..8ed91029 100644 --- a/src/ansi/parser.rs +++ b/src/ansi/parser.rs @@ -1,22 +1,30 @@ use std::cmp::PartialEq; use std::fmt::Debug; -use std::iter::Peekable; -use std::str::Chars; +//use std::iter::Peekable; +//use std::str::Chars; + +use std::sync::{Arc, Mutex}; use super::super::tokenizer::*; use super::super::parser::*; -pub struct ANSISQLParser<'a, TokenType> { - chars: Peekable>, - tokenizer: SQLTokenizer +pub struct ANSISQLParser { + tokenizer: Arc>> } -impl<'a, TokenType, ExprType> SQLParser for ANSISQLParser<'a, TokenType> - where TokenType: Debug + PartialEq, ExprType: Debug + PartialEq { +impl ANSISQLParser where TokenType: Debug + PartialEq { + + pub fn new(tokenizer: Arc>>) -> Self { + ANSISQLParser { tokenizer: tokenizer.clone() } + } +} + +impl SQLParser for ANSISQLParser + where TokenType: Debug + PartialEq, ExprType: Debug { fn parse_prefix(&mut self) -> Result>, ParserError> { - match self.tokenizer.peek_token(&mut self.chars)? { + match self.tokenizer.lock().unwrap().peek_token()? { Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() { "INSERT" => unimplemented!(), "UPDATE" => unimplemented!(), diff --git a/src/ansi/tokenizer.rs b/src/ansi/tokenizer.rs index 4ce44024..ef744f27 100644 --- a/src/ansi/tokenizer.rs +++ b/src/ansi/tokenizer.rs @@ -5,30 +5,32 @@ use std::str::Chars; use super::super::tokenizer::*; -pub struct ANSISQLTokenizer {} +pub struct ANSISQLTokenizer<'a> { + pub chars: Peekable> +} -impl SQLTokenizer for ANSISQLTokenizer +impl<'a, TokenType> SQLTokenizer for ANSISQLTokenizer<'a> where TokenType: Debug + PartialEq { fn precedence(&self, _token: &SQLToken) -> usize { unimplemented!() } - fn peek_token(&self, _chars: &mut Peekable) -> Result>, TokenizerError> { + fn peek_token(&mut self) -> Result>, TokenizerError> { unimplemented!() } - fn next_token(&self, chars: &mut Peekable) -> Result>, TokenizerError> { - match chars.next() { + fn next_token(&mut self) -> Result>, TokenizerError> { + match self.chars.next() { Some(ch) => match ch { ' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))), '0' ... '9' => { let mut s = String::new(); s.push(ch); - while let Some(&ch) = chars.peek() { + while let Some(&ch) = self.chars.peek() { match ch { '0' ... '9' => { - chars.next(); // consume + self.chars.next(); // consume s.push(ch); }, _ => break @@ -45,5 +47,13 @@ impl SQLTokenizer for ANSISQLTokenizer None => Ok(None) } } + + fn peek_char(&mut self) -> Option<&char> { + unimplemented!() + } + + fn next_char(&mut self) -> Option<&char> { + unimplemented!() + } } diff --git a/src/parser.rs b/src/parser.rs index b08a8eba..a310ebed 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::rc::Rc; use std::str::Chars; use std::iter::Peekable; +use std::sync::{Arc, Mutex}; use super::tokenizer::*; @@ -117,9 +118,13 @@ pub trait SQLParser } -pub fn parse_expr<'a, TokenType, ExprType>(_tokenizer: Rc>, _parser: Rc>, _chars: &mut Peekable>) +pub fn parse_expr<'a, TokenType, ExprType>(parser: Arc>>) -> Result>, ParserError> where TokenType: Debug + PartialEq, ExprType: Debug { - unimplemented!() + let mut guard = parser.lock().unwrap(); + + //Result>, ParserError> + let x = guard.parse_prefix(); + x } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 55604aa9..6db89375 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,7 +1,7 @@ use std::cmp::PartialEq; use std::fmt::Debug; -use std::iter::Peekable; -use std::str::Chars; +//use std::iter::Peekable; +//use std::str::Chars; #[derive(Debug)] pub struct Position { @@ -54,31 +54,35 @@ pub trait SQLTokenizer fn precedence(&self, token: &SQLToken) -> usize; /// return a reference to the next token but do not advance the index - fn peek_token(&self, chars: &mut Peekable) -> Result>, TokenizerError>; + fn peek_token(&mut self) -> Result>, TokenizerError>; /// return a reference to the next token and advance the index - fn next_token(&self, chars: &mut Peekable) -> Result>, TokenizerError>; + fn next_token(&mut self) -> Result>, TokenizerError>; + + fn peek_char(&mut self) -> Option<&char>; + + fn next_char(&mut self) -> Option<&char>; } - -pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result>, TokenizerError> - where TokenType: Debug + PartialEq - { - - let mut peekable = sql.chars().peekable(); - - let mut tokens : Vec> = vec![]; - - loop { - match tokenizer.next_token(&mut peekable)? { - Some(SQLToken::Whitespace(_)) => { /* ignore */ }, - Some(token) => { - println!("Token: {:?}", token); - tokens.push(token) - }, - None => break - } - } - - Ok(tokens) -} \ No newline at end of file +// +//pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result>, TokenizerError> +// where TokenType: Debug + PartialEq +// { +// +// let mut peekable = sql.chars().peekable(); +// +// let mut tokens : Vec> = vec![]; +// +// loop { +// match tokenizer.next_token(&mut peekable)? { +// Some(SQLToken::Whitespace(_)) => { /* ignore */ }, +// Some(token) => { +// println!("Token: {:?}", token); +// tokens.push(token) +// }, +// None => break +// } +// } +// +// Ok(tokens) +//} \ No newline at end of file