mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Refactoring
This commit is contained in:
parent
2ddc9f7a49
commit
7ff4133327
5 changed files with 108 additions and 93 deletions
|
@ -1,10 +1,9 @@
|
|||
use std::str::Chars;
|
||||
use std::iter::Peekable;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
extern crate datafusion_sql;
|
||||
|
||||
use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer;
|
||||
use datafusion_sql::ansi::parser::ANSISQLParser;
|
||||
use datafusion_sql::tokenizer::*;
|
||||
use datafusion_sql::parser::*;
|
||||
|
||||
|
@ -26,29 +25,30 @@ enum AcmeExpr {
|
|||
}
|
||||
|
||||
struct AcmeTokenizer {
|
||||
generic: ANSISQLTokenizer
|
||||
ansi_tokenizer: Arc<Mutex<SQLTokenizer<AcmeToken>>>
|
||||
}
|
||||
|
||||
/// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else
|
||||
impl SQLTokenizer<AcmeToken> for AcmeTokenizer {
|
||||
|
||||
fn precedence(&self, token: &SQLToken<AcmeToken>) -> usize {
|
||||
fn precedence(&self, _token: &SQLToken<AcmeToken>) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn peek_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeToken>> {
|
||||
fn peek_token(&mut self) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeToken>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeToken>> {
|
||||
match chars.peek() {
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeToken>> {
|
||||
let mut arc = self.ansi_tokenizer.lock().unwrap();
|
||||
match arc.peek_char() {
|
||||
Some(&ch) => match ch {
|
||||
'!' => {
|
||||
chars.next(); // consume the first `!`
|
||||
match chars.peek() {
|
||||
arc.next_char(); // consume the first `!`
|
||||
match arc.peek_char() {
|
||||
Some(&ch) => match ch {
|
||||
'!' => {
|
||||
chars.next(); // consume the second `!`
|
||||
arc.next_char(); // consume the second `!`
|
||||
Ok(Some(SQLToken::Custom(AcmeToken::Factorial)))
|
||||
},
|
||||
_ => Err(TokenizerError::UnexpectedChar(ch,Position::new(0,0)))
|
||||
|
@ -56,34 +56,35 @@ impl SQLTokenizer<AcmeToken> for AcmeTokenizer {
|
|||
None => Ok(Some(SQLToken::Not))
|
||||
}
|
||||
}
|
||||
_ => self.generic.next_token(chars)
|
||||
_ => arc.next_token()
|
||||
}
|
||||
_ => self.generic.next_token(chars)
|
||||
_ => arc.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_char(&mut self) -> Option<&char> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<&char> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
struct AcmeParser {
|
||||
tokenizer: Rc<SQLTokenizer<AcmeToken>>
|
||||
ansi_parser: Arc<Mutex<SQLParser<AcmeToken, AcmeExpr>>>
|
||||
}
|
||||
//
|
||||
//impl<'a> AcmeParser<'a> {
|
||||
//
|
||||
// pub fn new(sql: &'a str) -> Self {
|
||||
// AcmeParser {
|
||||
// chars: sql.chars().peekable()
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
impl SQLParser<AcmeToken, AcmeExpr> for AcmeParser {
|
||||
|
||||
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<AcmeExpr>>, ParserError<AcmeToken>> {
|
||||
unimplemented!()
|
||||
//TODO: add custom overrides
|
||||
self.ansi_parser.lock().unwrap().parse_prefix()
|
||||
}
|
||||
|
||||
fn parse_infix(&mut self, left: &SQLExpr<AcmeExpr>, _precedence: usize) -> Result<Option<Box<SQLExpr<AcmeExpr>>>, ParserError<AcmeToken>> {
|
||||
unimplemented!()
|
||||
fn parse_infix(&mut self, left: &SQLExpr<AcmeExpr>, precedence: usize) -> Result<Option<Box<SQLExpr<AcmeExpr>>>, ParserError<AcmeToken>> {
|
||||
//TODO: add custom overrides
|
||||
self.ansi_parser.lock().unwrap().parse_infix(left, precedence)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,35 +92,22 @@ fn main() {
|
|||
|
||||
let sql = "1 + !! 5 * 2";
|
||||
|
||||
// let acme_parser = AcmeParser::new(sql);
|
||||
// ANSI SQL tokenizer
|
||||
let ansi_tokenizer = Arc::new(Mutex::new(ANSISQLTokenizer { chars: sql.chars().peekable() }));
|
||||
|
||||
// Custom ACME tokenizer
|
||||
let mut acme_tokenizer = Arc::new(Mutex::new(AcmeTokenizer {
|
||||
ansi_tokenizer: ansi_tokenizer.clone()
|
||||
}));
|
||||
|
||||
//acme_parser
|
||||
// Custom ACME parser
|
||||
let acme_parser: Arc<Mutex<SQLParser<AcmeToken, AcmeExpr>>> = Arc::new(Mutex::new(AcmeParser {
|
||||
ansi_parser: Arc::new(Mutex::new(ANSISQLParser::new(acme_tokenizer)))
|
||||
}));
|
||||
|
||||
let mut acme_tokenizer: Rc<SQLTokenizer<AcmeToken>> = Rc::new(AcmeTokenizer {
|
||||
generic: ANSISQLTokenizer { }
|
||||
});
|
||||
|
||||
let mut acme_parser: Rc<SQLParser<AcmeToken, AcmeExpr>> = Rc::new(AcmeParser {
|
||||
tokenizer: acme_tokenizer.clone()
|
||||
});
|
||||
|
||||
// let mut pratt_parser = Rc::new(PrattParser {
|
||||
// chars: sql.chars().peekable(),
|
||||
// tokenizer: acme_tokenizer.clone(),
|
||||
// parser: acme_parser.clone()
|
||||
// });
|
||||
|
||||
let mut chars = sql.chars().peekable();
|
||||
|
||||
let expr = parse_expr(acme_tokenizer, acme_parser, &mut chars);
|
||||
let expr = parse_expr(acme_parser).unwrap();
|
||||
|
||||
println!("Parsed: {:?}", expr);
|
||||
//
|
||||
// let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap();
|
||||
//
|
||||
// println!("tokens = {:?}", tokens);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,22 +1,30 @@
|
|||
use std::cmp::PartialEq;
|
||||
use std::fmt::Debug;
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
//use std::iter::Peekable;
|
||||
//use std::str::Chars;
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::super::tokenizer::*;
|
||||
use super::super::parser::*;
|
||||
|
||||
pub struct ANSISQLParser<'a, TokenType> {
|
||||
chars: Peekable<Chars<'a>>,
|
||||
tokenizer: SQLTokenizer<TokenType>
|
||||
pub struct ANSISQLParser<TokenType> {
|
||||
tokenizer: Arc<Mutex<SQLTokenizer<TokenType>>>
|
||||
}
|
||||
|
||||
impl<'a, TokenType, ExprType> SQLParser<TokenType, ExprType> for ANSISQLParser<'a, TokenType>
|
||||
where TokenType: Debug + PartialEq, ExprType: Debug + PartialEq {
|
||||
impl<TokenType> ANSISQLParser<TokenType> where TokenType: Debug + PartialEq {
|
||||
|
||||
pub fn new(tokenizer: Arc<Mutex<SQLTokenizer<TokenType>>>) -> Self {
|
||||
ANSISQLParser { tokenizer: tokenizer.clone() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<TokenType, ExprType> SQLParser<TokenType, ExprType> for ANSISQLParser<TokenType>
|
||||
where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
|
||||
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> {
|
||||
|
||||
match self.tokenizer.peek_token(&mut self.chars)? {
|
||||
match self.tokenizer.lock().unwrap().peek_token()? {
|
||||
Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() {
|
||||
"INSERT" => unimplemented!(),
|
||||
"UPDATE" => unimplemented!(),
|
||||
|
|
|
@ -5,30 +5,32 @@ use std::str::Chars;
|
|||
|
||||
use super::super::tokenizer::*;
|
||||
|
||||
pub struct ANSISQLTokenizer {}
|
||||
pub struct ANSISQLTokenizer<'a> {
|
||||
pub chars: Peekable<Chars<'a>>
|
||||
}
|
||||
|
||||
impl<TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer
|
||||
impl<'a, TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer<'a>
|
||||
where TokenType: Debug + PartialEq {
|
||||
|
||||
fn precedence(&self, _token: &SQLToken<TokenType>) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn peek_token(&self, _chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>> {
|
||||
fn peek_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>> {
|
||||
match chars.next() {
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>> {
|
||||
match self.chars.next() {
|
||||
Some(ch) => match ch {
|
||||
' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))),
|
||||
'0' ... '9' => {
|
||||
let mut s = String::new();
|
||||
s.push(ch);
|
||||
while let Some(&ch) = chars.peek() {
|
||||
while let Some(&ch) = self.chars.peek() {
|
||||
match ch {
|
||||
'0' ... '9' => {
|
||||
chars.next(); // consume
|
||||
self.chars.next(); // consume
|
||||
s.push(ch);
|
||||
},
|
||||
_ => break
|
||||
|
@ -45,5 +47,13 @@ impl<TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer
|
|||
None => Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_char(&mut self) -> Option<&char> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<&char> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::fmt::Debug;
|
|||
use std::rc::Rc;
|
||||
use std::str::Chars;
|
||||
use std::iter::Peekable;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::tokenizer::*;
|
||||
|
||||
|
@ -117,9 +118,13 @@ pub trait SQLParser<TokenType, ExprType>
|
|||
}
|
||||
|
||||
|
||||
pub fn parse_expr<'a, TokenType, ExprType>(_tokenizer: Rc<SQLTokenizer<TokenType>>, _parser: Rc<SQLParser<TokenType, ExprType>>, _chars: &mut Peekable<Chars<'a>>)
|
||||
pub fn parse_expr<'a, TokenType, ExprType>(parser: Arc<Mutex<SQLParser<TokenType, ExprType>>>)
|
||||
-> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
unimplemented!()
|
||||
let mut guard = parser.lock().unwrap();
|
||||
|
||||
//Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>>
|
||||
let x = guard.parse_prefix();
|
||||
x
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::cmp::PartialEq;
|
||||
use std::fmt::Debug;
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
//use std::iter::Peekable;
|
||||
//use std::str::Chars;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Position {
|
||||
|
@ -54,31 +54,35 @@ pub trait SQLTokenizer<TokenType>
|
|||
fn precedence(&self, token: &SQLToken<TokenType>) -> usize;
|
||||
|
||||
/// return a reference to the next token but do not advance the index
|
||||
fn peek_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>>;
|
||||
fn peek_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>>;
|
||||
|
||||
/// return a reference to the next token and advance the index
|
||||
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>>;
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>>;
|
||||
|
||||
fn peek_char(&mut self) -> Option<&char>;
|
||||
|
||||
fn next_char(&mut self) -> Option<&char>;
|
||||
}
|
||||
|
||||
|
||||
pub fn tokenize<TokenType>(sql: &str, tokenizer: &mut SQLTokenizer<TokenType>) -> Result<Vec<SQLToken<TokenType>>, TokenizerError<TokenType>>
|
||||
where TokenType: Debug + PartialEq
|
||||
{
|
||||
|
||||
let mut peekable = sql.chars().peekable();
|
||||
|
||||
let mut tokens : Vec<SQLToken<TokenType>> = vec![];
|
||||
|
||||
loop {
|
||||
match tokenizer.next_token(&mut peekable)? {
|
||||
Some(SQLToken::Whitespace(_)) => { /* ignore */ },
|
||||
Some(token) => {
|
||||
println!("Token: {:?}", token);
|
||||
tokens.push(token)
|
||||
},
|
||||
None => break
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
//
|
||||
//pub fn tokenize<TokenType>(sql: &str, tokenizer: &mut SQLTokenizer<TokenType>) -> Result<Vec<SQLToken<TokenType>>, TokenizerError<TokenType>>
|
||||
// where TokenType: Debug + PartialEq
|
||||
// {
|
||||
//
|
||||
// let mut peekable = sql.chars().peekable();
|
||||
//
|
||||
// let mut tokens : Vec<SQLToken<TokenType>> = vec![];
|
||||
//
|
||||
// loop {
|
||||
// match tokenizer.next_token(&mut peekable)? {
|
||||
// Some(SQLToken::Whitespace(_)) => { /* ignore */ },
|
||||
// Some(token) => {
|
||||
// println!("Token: {:?}", token);
|
||||
// tokens.push(token)
|
||||
// },
|
||||
// None => break
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Ok(tokens)
|
||||
//}
|
Loading…
Add table
Add a link
Reference in a new issue