diff --git a/examples/acme_parser.rs b/examples/acme_parser.rs deleted file mode 100644 index a69cc6e0..00000000 --- a/examples/acme_parser.rs +++ /dev/null @@ -1,115 +0,0 @@ -use std::sync::{Arc, Mutex}; - -extern crate datafusion_sql; - -use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer; -use datafusion_sql::ansi::parser::ANSISQLParser; -use datafusion_sql::tokenizer::*; -use datafusion_sql::parser::*; - -/// This example demonstrates building a custom ACME parser that extends the generic parser -/// by adding support for a factorial expression `!! expr`. - -/// Custom SQLToken -#[derive(Debug,PartialEq)] -enum AcmeToken { - /// Factorial token `!!` - Factorial -} - -/// Custom SQLExpr -#[derive(Debug)] -enum AcmeExpr { - /// Factorial expression - Factorial(Box>) -} - -struct AcmeTokenizer { - ansi_tokenizer: Arc>> -} - -/// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else -impl SQLTokenizer for AcmeTokenizer { - - fn precedence(&self, _token: &SQLToken) -> usize { - unimplemented!() - } - - fn next_token(&mut self, chars: &mut CharSeq) -> Result>, TokenizerError> { - let mut ansi = self.ansi_tokenizer.lock().unwrap(); - match chars.peek() { - Some(&ch) => match ch { - '!' => { - chars.mark(); - chars.next(); // consume the first `!` - match chars.peek() { - Some(&ch) => match ch { - '!' => { - chars.next(); // consume the second `!` - Ok(Some(SQLToken::Custom(AcmeToken::Factorial))) - }, - _ => { - chars.reset(); - ansi.next_token(chars) - } - }, - None => { - chars.reset(); - ansi.next_token(chars) - } - } - } - _ => ansi.next_token(chars) - } - _ => ansi.next_token(chars) - } - } - -} - -struct AcmeParser { - tokenizer: Arc>> -} - -impl AcmeParser { - - pub fn new(tokenizer: Arc>>) -> Self { - AcmeParser { tokenizer: tokenizer.clone() } - } - -} -impl SQLParser for AcmeParser { - - fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result>>, ParserError> { - Ok(None) - } - - fn parse_infix(&mut self, chars: &mut CharSeq, left: &SQLExpr, precedence: usize) -> Result>>, ParserError> { - Ok(None) - } -} - -fn main() { - - let sql = "1 + !! 5 * 2"; - - // ANSI SQL tokenizer - let ansi_tokenizer = Arc::new(Mutex::new(ANSISQLTokenizer { })); - - // Custom ACME tokenizer - let mut acme_tokenizer = Arc::new(Mutex::new(AcmeTokenizer { - ansi_tokenizer: ansi_tokenizer.clone() - })); - - // Create parsers - let ansi_parser = Arc::new(Mutex::new(ANSISQLParser::new(acme_tokenizer.clone()))); - let acme_parser = Arc::new(Mutex::new(AcmeParser::new(acme_tokenizer.clone()))); - - let mut pratt_parser = PrattParser { - chars: CharSeq::new(sql), - parsers: vec![acme_parser, ansi_parser] - }; - - let expr = pratt_parser.parse_expr().unwrap(); - println!("{:?}", expr); -} diff --git a/examples/parse_sql.rs b/examples/parse_sql.rs new file mode 100644 index 00000000..b1b85e72 --- /dev/null +++ b/examples/parse_sql.rs @@ -0,0 +1,20 @@ +use std::sync::{Arc, Mutex}; + +extern crate datafusion_sql; + +use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer; +use datafusion_sql::ansi::parser::ANSISQLParser; +use datafusion_sql::tokenizer::*; +use datafusion_sql::parser::*; + + +fn main() { + + let sql = "SELECT 1 + 1"; + + // Create parsers + match ANSISQLParser::parse(sql).unwrap() { + Some(ast) => println!("{:?}", ast), + _ => {} + } +} diff --git a/src/ansi/parser.rs b/src/ansi/parser.rs index 5af62feb..f0259c31 100644 --- a/src/ansi/parser.rs +++ b/src/ansi/parser.rs @@ -1,30 +1,56 @@ use std::cmp::PartialEq; use std::fmt::Debug; -//use std::iter::Peekable; -//use std::str::Chars; - -use std::sync::{Arc, Mutex}; +//use std::rc::Rc; +//use std::sync::{Arc, Mutex}; +use super::tokenizer::ANSISQLTokenizer; use super::super::tokenizer::*; use super::super::parser::*; -pub struct ANSISQLParser { - tokenizer: Arc>> +pub struct ANSISQLParser { + tokenizer: Box } -impl ANSISQLParser where TokenType: Debug + PartialEq { +impl ANSISQLParser where { - pub fn new(tokenizer: Arc>>) -> Self { - ANSISQLParser { tokenizer: tokenizer.clone() } + pub fn parse(sql: &str) -> Result>, ParserError> { + let mut parser = ANSISQLParser { tokenizer: Box::new(ANSISQLTokenizer::new(sql)) }; + parser.parse_expr() } } -impl SQLParser for ANSISQLParser - where TokenType: Debug + PartialEq, ExprType: Debug { +impl SQLParser for ANSISQLParser { - fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result>>, ParserError> { + fn parse_expr(&mut self) -> Result>, ParserError> { - match self.tokenizer.lock().unwrap().next_token(chars)? { + let precedence: usize = 0; + + let mut e = self.parse_prefix()?; + + match e { + Some(mut expr) => { + while let Some(token) = self.tokenizer.peek_token()? { + let next_precedence = self.tokenizer.precedence(&token); + + if precedence >= next_precedence { + break; + } + + expr = self.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me + } + + Ok(Some(expr)) + } + _ => { + Ok(None) + } + } + + } + + fn parse_prefix(&mut self) -> Result>, ParserError> { + + match self.tokenizer.next_token()? { Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() { "INSERT" => unimplemented!(), "UPDATE" => unimplemented!(), @@ -37,7 +63,7 @@ impl SQLParser for ANSISQLParser, _precedence: usize) -> Result>>, ParserError> { + fn parse_infix(&mut self, _left: &SQLExpr, _precedence: usize) -> Result>, ParserError> { unimplemented!() } } diff --git a/src/ansi/tokenizer.rs b/src/ansi/tokenizer.rs index f34965f3..9714559f 100644 --- a/src/ansi/tokenizer.rs +++ b/src/ansi/tokenizer.rs @@ -4,26 +4,37 @@ use std::fmt::Debug; use super::super::tokenizer::*; pub struct ANSISQLTokenizer { + chars: CharSeq } -impl SQLTokenizer for ANSISQLTokenizer - where TokenType: Debug + PartialEq { +impl ANSISQLTokenizer { + pub fn new(sql: &str) -> Self { + ANSISQLTokenizer { chars: CharSeq::new(sql) } + } +} - fn precedence(&self, _token: &SQLToken) -> usize { +impl SQLTokenizer for ANSISQLTokenizer { + + fn precedence(&self, _token: &SQLToken) -> usize { unimplemented!() } - fn next_token(&mut self, chars: &mut CharSeq) -> Result>, TokenizerError> { - match chars.next() { + fn peek_token(&mut self) -> Result, TokenizerError> { + unimplemented!() + } + + + fn next_token(&mut self) -> Result, TokenizerError> { + match self.chars.next() { Some(ch) => match ch { ' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))), '0' ... '9' => { let mut s = String::new(); s.push(ch); - while let Some(&ch) = chars.peek() { + while let Some(&ch) = self.chars.peek() { match ch { '0' ... '9' => { - chars.next(); // consume + self.chars.next(); // consume s.push(ch); }, _ => break diff --git a/src/parser.rs b/src/parser.rs index d514692a..afbfda1d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,5 @@ use std::cmp::PartialEq; use std::fmt::Debug; -use std::sync::{Arc, Mutex}; use super::tokenizer::*; @@ -8,7 +7,7 @@ use super::tokenizer::*; /// ANSI SQL:2011 Data Types #[derive(Debug)] -pub enum SQLDataType { +pub enum SQLDataType { /// BOOLEAN Boolean, /// NUMERIC, DECIMAL, DEC @@ -49,14 +48,12 @@ pub enum SQLDataType { Time { precision: usize, tz: bool }, /// Time: `TIMESTAMP [(precision)] [WITH TIME ZONE | WITHOUT TIME ZONE]` Timestamp { precision: usize, tz: bool }, - /// Custom data type - Custom(T) } #[derive(Debug)] -pub enum SQLOperator { +pub enum SQLOperator { Plus, Minus, Mult, @@ -66,217 +63,44 @@ pub enum SQLOperator { GtEq, Lt, LtEq, - Custom(T) // extension point for vendor-specific operators } /// SQL Expressions #[derive(Debug)] -pub enum SQLExpr { +pub enum SQLExpr{ /// Identifier e.g. table name or column name Identifier(String), /// Literal value Literal(String), /// Binary expression e.g. `1 + 2` or `fname LIKE "A%"` - Binary(Box>, SQLOperator, Box>), + Binary(Box, SQLOperator, Box), /// Function invocation with function name and list of argument expressions - FunctionCall(String, Vec>), + FunctionCall(String, Vec), Insert, Update, Delete, Select, CreateTable, - /// Custom expression (vendor-specific) - Custom(ExprType) } #[derive(Debug)] -pub enum ParserError - where TokenType: Debug + PartialEq { - WrongToken { expected: Vec>, actual: SQLToken, line: usize, col: usize }, +pub enum ParserError { + WrongToken { expected: Vec, actual: SQLToken, line: usize, col: usize }, Custom(String) } -impl From for ParserError - where TokenType: Debug + PartialEq { - - fn from(_: TokenizerError) -> Self { - unimplemented!() +impl From for ParserError { + fn from(e: TokenizerError) -> Self { + ParserError::Custom(format!("{:?}", e)) } } -pub trait SQLParser - where TokenType: Debug + PartialEq, ExprType: Debug { - +pub trait SQLParser { + fn parse_expr(&mut self) -> Result>, ParserError>; /// parse the prefix and stop once an infix operator is reached - fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result>>, ParserError> ; + fn parse_prefix(&mut self) -> Result>, ParserError> ; /// parse the next infix expression, returning None if the precedence has changed - fn parse_infix(&mut self, chars: &mut CharSeq, left: &SQLExpr, precedence: usize) -> Result>>, ParserError>; + fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result>, ParserError>; } -pub struct PrattParser { - pub chars: CharSeq, - pub parsers: Vec>>> -} - -impl PrattParser where TokenType: Debug + PartialEq, ExprType: Debug { - - pub fn parse_expr(&mut self) -> Result>>, ParserError> { - - for i in 0..self.parsers.len() { - let mut p = self.parsers[i].lock().unwrap(); - let expr = p.parse_prefix(&mut self.chars)?; - - // return as soon as we have a match - match expr { - Some(_) => return Ok(expr), - _ => {} - } - } - - // found no valid token - Ok(None) - } - -} - -// -//pub fn parse_expr<'a, TokenType, ExprType>(parser: Arc>>) -// -> Result>, ParserError> where TokenType: Debug + PartialEq, ExprType: Debug { -// let mut guard = parser.lock().unwrap(); -// -// //Result>, ParserError> -// let x = guard.parse_prefix(); -// x -//} - - -//impl<'a, TokenType, ExprType> PrattParser<'a, TokenType, ExprType> -// where TokenType: Debug + PartialEq, ExprType: Debug { -// -// pub fn parse_expr(&mut self) -> Result>, ParserError> { -// -// let precedence: usize = 0; -// let parser_ref = self.parser.as_ref(); -// -// let mut expr = parser_ref.parse_prefix()?; -// -// while let Some(token) = self.tokenizer.peek_token(&mut self.chars)? { -// -// let next_precedence = self.tokenizer.precedence(&token); -// -// if precedence >= next_precedence { -// break; -// } -// -// expr = parser_ref.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me -// } -// -// Ok(expr) -// } -// -//} - -// fn parse_prefix(&mut self) -> Result, ParserError> { -// -// match self.tokenizer.peek_token()? { -// Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() { -// "INSERT" => unimplemented!(), -// "UPDATE" => unimplemented!(), -// "DELETE" => unimplemented!(), -// "SELECT" => unimplemented!(), -// "CREATE" => unimplemented!(), -// _ => unimplemented!() -// }, -// _ => unimplemented!() -// } -// unimplemented!() -// } -// -// fn parse_infix(&mut self, expr: Box, precedence: u8) -> Result, ParserError> { -// -// match self.tokenizer.next_token()? { -// Some(tok) => { -// match tok { -// SQLToken::Eq | SQLToken::Gt | SQLToken::GtEq | -// SQLToken::Lt | SQLToken::LtEq => Ok(Box::new(SQLExpr::Binary( -// expr, -// self.to_sql_operator(&tok), -// self.parse_expr(precedence)? -// ))), -// _ => Err(ParserError::WrongToken { -// expected: vec![SQLToken::Eq, SQLToken::Gt], //TODO: complete -// actual: tok, -// line: 0, -// col: 0 -// }) -// } -// }, -// None => Err(ParserError::TBD) -// } -// } -// -// fn to_sql_operator(&self, token: &SQLToken) -> SQLOperator { -// unimplemented!() -// } -// -// fn get_precedence(&self, token: &SQLToken) -> u8 { -// unimplemented!() -// } -// -// /// parse a list of SQL expressions separated by a comma -// fn parse_expr_list(&mut self, precedence: u8) -> Result, ParserError> { -// unimplemented!() -// } -// -//} -// -////impl GenericParser { -//// -//// fn tokenizer(&mut self) -> &mut SQLTokenizer { -//// &mut self.tokenizer -//// } -//// -//// fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result { -//// unimplemented!() -//// } -//// -////// fn parse_identifier(&mut self) -> Result; -//// -////} -// - -// -// -//#[cfg(test)] -//mod tests { -// -// use super::SQLToken::*; -// use super::*; -// #[test] -// fn parse_Acme_create_table() { -// -// // CREATE TABLE test (col1 int8) HASH (col1) -// let tokens = vec![ -// k("CREATE"), k("TABLE"), i("test"), LParen, -// i("col1"), k("int8"), -// RParen, -// k("HASH"), LParen, i("col1"), RParen -// ]; -// -// //let parser = AcmeParser { generic_parser: } -// } -// } -// -// fn k(s: &str) -> SQLToken { -// Keyword(s.to_string()) -// } -// -// fn i(s: &str) -> SQLToken { -// Identifier(s.to_string()) -// } -// -// -//} - - diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 00bb0df4..73eb344f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -70,7 +70,7 @@ pub enum TokenizerError { /// SQL Tokens #[derive(Debug,PartialEq)] -pub enum SQLToken { +pub enum SQLToken { Whitespace(char), Keyword(String), Identifier(String), @@ -89,31 +89,28 @@ pub enum SQLToken { LParen, RParen, Comma, - /// Custom token (dialect-specific) - Custom(T) } -pub trait SQLTokenizer - where TokenType: Debug + PartialEq { +pub trait SQLTokenizer { /// get the precendence of a token - fn precedence(&self, token: &SQLToken) -> usize; + fn precedence(&self, token: &SQLToken) -> usize; + + fn peek_token(&mut self) -> Result, TokenizerError>; /// return a reference to the next token and advance the index - fn next_token(&mut self, chars: &mut CharSeq) -> Result>, TokenizerError>; + fn next_token(&mut self) -> Result, TokenizerError>; } -pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result>, TokenizerError> - where TokenType: Debug + PartialEq - { +pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result, TokenizerError> { let mut chars = CharSeq::new(sql); - let mut tokens : Vec> = vec![]; + let mut tokens : Vec = vec![]; loop { - match tokenizer.next_token(&mut chars)? { + match tokenizer.next_token()? { Some(SQLToken::Whitespace(_)) => { /* ignore */ }, Some(token) => { println!("Token: {:?}", token);