mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Refactoring
This commit is contained in:
parent
69a140a820
commit
2e20b15c2e
5 changed files with 68 additions and 58 deletions
|
@ -35,7 +35,7 @@ impl SQLTokenizer<AcmeToken> for AcmeTokenizer {
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError> {
|
||||
fn next_token(&mut self, chars: &mut CharSeq) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError> {
|
||||
// let mut arc = self.ansi_tokenizer.lock().unwrap();
|
||||
// match arc.peek_char() {
|
||||
// Some(&ch) => match ch {
|
||||
|
@ -67,14 +67,14 @@ struct AcmeParser {
|
|||
|
||||
impl SQLParser<AcmeToken, AcmeExpr> for AcmeParser {
|
||||
|
||||
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<AcmeExpr>>, ParserError<AcmeToken>> {
|
||||
fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result<Box<SQLExpr<AcmeExpr>>, ParserError<AcmeToken>> {
|
||||
//TODO: add custom overrides
|
||||
self.ansi_parser.lock().unwrap().parse_prefix()
|
||||
self.ansi_parser.lock().unwrap().parse_prefix(chars)
|
||||
}
|
||||
|
||||
fn parse_infix(&mut self, left: &SQLExpr<AcmeExpr>, precedence: usize) -> Result<Option<Box<SQLExpr<AcmeExpr>>>, ParserError<AcmeToken>> {
|
||||
fn parse_infix(&mut self, chars: &mut CharSeq, left: &SQLExpr<AcmeExpr>, precedence: usize) -> Result<Option<Box<SQLExpr<AcmeExpr>>>, ParserError<AcmeToken>> {
|
||||
//TODO: add custom overrides
|
||||
self.ansi_parser.lock().unwrap().parse_infix(left, precedence)
|
||||
self.ansi_parser.lock().unwrap().parse_infix(chars, left, precedence)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ fn main() {
|
|||
let sql = "1 + !! 5 * 2";
|
||||
|
||||
// ANSI SQL tokenizer
|
||||
let ansi_tokenizer = Arc::new(Mutex::new(ANSISQLTokenizer { chars: sql.chars().peekable() }));
|
||||
let ansi_tokenizer = Arc::new(Mutex::new(ANSISQLTokenizer { }));
|
||||
|
||||
// Custom ACME tokenizer
|
||||
let mut acme_tokenizer = Arc::new(Mutex::new(AcmeTokenizer {
|
||||
|
@ -95,9 +95,9 @@ fn main() {
|
|||
ansi_parser: Arc::new(Mutex::new(ANSISQLParser::new(acme_tokenizer)))
|
||||
}));
|
||||
|
||||
let expr = parse_expr(acme_parser).unwrap();
|
||||
|
||||
println!("Parsed: {:?}", expr);
|
||||
// let expr = parse_expr(acme_parser).unwrap();
|
||||
//
|
||||
// println!("Parsed: {:?}", expr);
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -22,9 +22,9 @@ impl<TokenType> ANSISQLParser<TokenType> where TokenType: Debug + PartialEq {
|
|||
impl<TokenType, ExprType> SQLParser<TokenType, ExprType> for ANSISQLParser<TokenType>
|
||||
where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
|
||||
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> {
|
||||
fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> {
|
||||
|
||||
match self.tokenizer.lock().unwrap().next_token()? {
|
||||
match self.tokenizer.lock().unwrap().next_token(chars)? {
|
||||
Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() {
|
||||
"INSERT" => unimplemented!(),
|
||||
"UPDATE" => unimplemented!(),
|
||||
|
@ -37,7 +37,7 @@ impl<TokenType, ExprType> SQLParser<TokenType, ExprType> for ANSISQLParser<Token
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_infix(&mut self, _left: &SQLExpr<ExprType>, _precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>> {
|
||||
fn parse_infix(&mut self, _chars: &mut CharSeq, _left: &SQLExpr<ExprType>, _precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,28 +5,27 @@ use std::str::Chars;
|
|||
|
||||
use super::super::tokenizer::*;
|
||||
|
||||
pub struct ANSISQLTokenizer<'a> {
|
||||
pub chars: Peekable<Chars<'a>>
|
||||
pub struct ANSISQLTokenizer {
|
||||
}
|
||||
|
||||
impl<'a, TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer<'a>
|
||||
impl<TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer
|
||||
where TokenType: Debug + PartialEq {
|
||||
|
||||
fn precedence(&self, _token: &SQLToken<TokenType>) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError> {
|
||||
match self.chars.next() {
|
||||
fn next_token(&mut self, chars: &mut CharSeq) -> Result<Option<SQLToken<TokenType>>, TokenizerError> {
|
||||
match chars.next() {
|
||||
Some(ch) => match ch {
|
||||
' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))),
|
||||
'0' ... '9' => {
|
||||
let mut s = String::new();
|
||||
s.push(ch);
|
||||
while let Some(&ch) = self.chars.peek() {
|
||||
while let Some(&ch) = chars.peek() {
|
||||
match ch {
|
||||
'0' ... '9' => {
|
||||
self.chars.next(); // consume
|
||||
chars.next(); // consume
|
||||
s.push(ch);
|
||||
},
|
||||
_ => break
|
||||
|
|
|
@ -110,20 +110,20 @@ pub trait SQLParser<TokenType, ExprType>
|
|||
where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
|
||||
/// parse the prefix and stop once an infix operator is reached
|
||||
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> ;
|
||||
fn parse_prefix(&mut self, chars: &mut CharSeq) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> ;
|
||||
/// parse the next infix expression, returning None if the precedence has changed
|
||||
fn parse_infix(&mut self, left: &SQLExpr<ExprType>, precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>>;
|
||||
fn parse_infix(&mut self, chars: &mut CharSeq, left: &SQLExpr<ExprType>, precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>>;
|
||||
}
|
||||
|
||||
|
||||
pub fn parse_expr<'a, TokenType, ExprType>(parser: Arc<Mutex<SQLParser<TokenType, ExprType>>>)
|
||||
-> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
let mut guard = parser.lock().unwrap();
|
||||
|
||||
//Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>>
|
||||
let x = guard.parse_prefix();
|
||||
x
|
||||
}
|
||||
//
|
||||
//pub fn parse_expr<'a, TokenType, ExprType>(parser: Arc<Mutex<SQLParser<TokenType, ExprType>>>)
|
||||
// -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> where TokenType: Debug + PartialEq, ExprType: Debug {
|
||||
// let mut guard = parser.lock().unwrap();
|
||||
//
|
||||
// //Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>>
|
||||
// let x = guard.parse_prefix();
|
||||
// x
|
||||
//}
|
||||
|
||||
|
||||
//pub struct PrattParser<'a, TokenType, ExprType> {
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
use std::cmp::PartialEq;
|
||||
use std::fmt::Debug;
|
||||
//use std::iter::Peekable;
|
||||
//use std::str::Chars;
|
||||
|
||||
|
||||
/// Simple holder for a sequence of characters that supports iteration and mark/reset methods
|
||||
pub struct CharSeq {
|
||||
chars: Vec<char>,
|
||||
i: usize,
|
||||
|
@ -12,6 +10,7 @@ pub struct CharSeq {
|
|||
|
||||
impl CharSeq {
|
||||
|
||||
/// Create a CharSeq from a string
|
||||
pub fn new(sql: &str) -> Self {
|
||||
CharSeq {
|
||||
chars: sql.chars().collect(),
|
||||
|
@ -20,14 +19,26 @@ impl CharSeq {
|
|||
}
|
||||
}
|
||||
|
||||
/// Mark the current index
|
||||
pub fn mark(&mut self) {
|
||||
self.m = self.i;
|
||||
}
|
||||
|
||||
/// Reset the index
|
||||
pub fn reset(&mut self) {
|
||||
self.i = self.m;
|
||||
}
|
||||
|
||||
/// Peek the next char
|
||||
pub fn peek(&mut self) -> Option<&char> {
|
||||
if self.i < self.chars.len() {
|
||||
Some(&self.chars[self.i])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next char
|
||||
pub fn next(&mut self) -> Option<char> {
|
||||
if self.i < self.chars.len() {
|
||||
self.i += 1;
|
||||
|
@ -61,8 +72,8 @@ pub enum TokenizerError {
|
|||
#[derive(Debug,PartialEq)]
|
||||
pub enum SQLToken<T: Debug + PartialEq> {
|
||||
Whitespace(char),
|
||||
Keyword(String), //TODO: &str ?
|
||||
Identifier(String), //TODO: &str ?
|
||||
Keyword(String),
|
||||
Identifier(String),
|
||||
Literal(String), //TODO: need to model different types of literal
|
||||
Plus,
|
||||
Minus,
|
||||
|
@ -89,28 +100,28 @@ pub trait SQLTokenizer<TokenType>
|
|||
fn precedence(&self, token: &SQLToken<TokenType>) -> usize;
|
||||
|
||||
/// return a reference to the next token and advance the index
|
||||
fn next_token(&mut self) -> Result<Option<SQLToken<TokenType>>, TokenizerError>;
|
||||
fn next_token(&mut self, chars: &mut CharSeq) -> Result<Option<SQLToken<TokenType>>, TokenizerError>;
|
||||
}
|
||||
|
||||
//
|
||||
//pub fn tokenize<TokenType>(sql: &str, tokenizer: &mut SQLTokenizer<TokenType>) -> Result<Vec<SQLToken<TokenType>>, TokenizerError<TokenType>>
|
||||
// where TokenType: Debug + PartialEq
|
||||
// {
|
||||
//
|
||||
// let mut peekable = sql.chars().peekable();
|
||||
//
|
||||
// let mut tokens : Vec<SQLToken<TokenType>> = vec![];
|
||||
//
|
||||
// loop {
|
||||
// match tokenizer.next_token(&mut peekable)? {
|
||||
// Some(SQLToken::Whitespace(_)) => { /* ignore */ },
|
||||
// Some(token) => {
|
||||
// println!("Token: {:?}", token);
|
||||
// tokens.push(token)
|
||||
// },
|
||||
// None => break
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Ok(tokens)
|
||||
//}
|
||||
|
||||
pub fn tokenize<TokenType>(sql: &str, tokenizer: &mut SQLTokenizer<TokenType>) -> Result<Vec<SQLToken<TokenType>>, TokenizerError>
|
||||
where TokenType: Debug + PartialEq
|
||||
{
|
||||
|
||||
let mut chars = CharSeq::new(sql);
|
||||
|
||||
let mut tokens : Vec<SQLToken<TokenType>> = vec![];
|
||||
|
||||
loop {
|
||||
match tokenizer.next_token(&mut chars)? {
|
||||
Some(SQLToken::Whitespace(_)) => { /* ignore */ },
|
||||
Some(token) => {
|
||||
println!("Token: {:?}", token);
|
||||
tokens.push(token)
|
||||
},
|
||||
None => break
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue