roughing out classic pratt parser

This commit is contained in:
Andy Grove 2018-02-08 07:49:24 -07:00
parent 1e5686c793
commit b16396aef3
3 changed files with 168 additions and 56 deletions

2
.gitignore vendored
View file

@ -10,4 +10,4 @@ Cargo.lock
**/*.rs.bk
# IDEs
.gitignore
.idea

View file

@ -6,4 +6,5 @@ Goals:
- Support for custom SQL dialects, so other projects can implement their own parsers easily
- Zero-copy of tokens when parsing
- Good error reporting (e.g. show line / column numbers and descriptive messages)

View file

@ -1,8 +1,11 @@
///
/// This is a mock up of some key data structures and traits for a SQL parser
/// that can be used with custom dialects
///
/* --- TOKENIZER API --- */
enum TokenizerError {
WrongToken { expected: SQLToken, actual: SQLToken, line: usize, col: usize },
TBD
}
@ -10,7 +13,9 @@ enum TokenizerError {
enum SQLToken {
Keyword(String),
Identifier(String),
Literal(String), //TODO: need to model different types of literal
Eq,
NotEq,
Gt,
GtEq,
Lt,
@ -25,11 +30,11 @@ trait CustomToken {
//TODO: ???
}
trait SQLTokenizer<'a> {
trait SQLTokenizer {
// return a reference to the next token without consuming it (look ahead)
fn peek_token(&'a mut self) -> Result<Option<&'a SQLToken>, Box<TokenizerError>>;
fn peek_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
// return a reference to the next token and advance the index
fn next_token(&'a mut self) -> Result<Option<&'a SQLToken>, Box<TokenizerError>>;
fn next_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
}
/* --- PARSER API --- */
@ -62,6 +67,11 @@ enum SQLExpr {
Binary(Box<SQLExpr>, SQLOperator, Box<SQLExpr>),
/// Function invocation with function name and list of argument expressions
FunctionCall(String, Vec<SQLExpr>),
Insert,
Update,
Delete,
Select,
CreateTable,
/// Custom expression (vendor-specific)
Custom(Box<CustomExpr>)
}
@ -71,71 +81,172 @@ trait CustomExpr {
}
enum ParserError {
WrongToken { expected: Vec<SQLToken>, actual: SQLToken, line: usize, col: usize },
TBD
}
trait Parser<'a> {
fn parse_expr(&mut self) -> Result<Box<SQLExpr>, Box<ParserError>>;
fn parse_expr_list(&mut self) -> Result<Vec<SQLExpr>, Box<ParserError>>;
fn parse_identifier(&mut self) -> Result<String, Box<ParserError>>;
fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result<bool, Box<ParserError>>;
}
/* --- KUDU PARSER IMPL --- */
struct KuduParser<'a> {
generic_parser: Box<Parser<'a>>
}
impl<'a> Parser<'a> for KuduParser<'a> {
fn parse_expr(&mut self) -> Result<Box<SQLExpr>, Box<ParserError>> {
self.generic_parser.parse_expr()
}
fn parse_expr_list(&mut self) -> Result<Vec<SQLExpr>, Box<ParserError>> {
self.generic_parser.parse_expr_list()
}
fn parse_identifier(&mut self) -> Result<String, Box<ParserError>> {
self.generic_parser.parse_identifier()
}
fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result<bool, Box<ParserError>> {
self.parse_keywords(keywords)
impl From<TokenizerError> for ParserError {
fn from(_: TokenizerError) -> Self {
unimplemented!()
}
}
/* --- PRATT PARSER IMPL --- */
struct PrattParser<'a> {
parser: Box<Parser<'a>>
trait Parser {
fn parse_prefix(&mut self) -> Result<Box<SQLExpr>, ParserError> ;
fn parse_infix(&mut self, left: SQLExpr) -> Result<Box<SQLExpr>, ParserError> ;
}
impl<'a> PrattParser<'a> {
/* -- GENERIC (ANSI SQL) PARSER -- */
fn parse_expr(&'a mut self, precedence: u8) -> SQLExpr {
struct GenericParser {
tokenizer: SQLTokenizer
}
impl GenericParser {
fn parse_expr(&mut self, precedence: u8) -> Result<Box<SQLExpr>, ParserError> {
let mut expr = self.parse_prefix()?;
// loop while there are more tokens and until the precedence changes
while let Some(token) = self.tokenizer.peek_token()? {
let next_precedence = self.get_precedence(&token);
if precedence >= next_precedence {
break;
}
expr = self.parse_infix(expr, next_precedence)?;
}
Ok(expr)
}
fn parse_prefix(&mut self) -> Result<Box<SQLExpr>, ParserError> {
match self.tokenizer.peek_token()? {
Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() {
"INSERT" => unimplemented!(),
"UPDATE" => unimplemented!(),
"DELETE" => unimplemented!(),
"SELECT" => unimplemented!(),
"CREATE" => unimplemented!(),
_ => unimplemented!()
},
_ => unimplemented!()
}
unimplemented!()
}
fn parse_infix(&mut self, expr: Box<SQLExpr>, precedence: u8) -> Result<Box<SQLExpr>, ParserError> {
match self.tokenizer.next_token()? {
Some(tok) => {
match tok {
SQLToken::Eq | SQLToken::Gt | SQLToken::GtEq |
SQLToken::Lt | SQLToken::LtEq => Ok(Box::new(SQLExpr::Binary(
expr,
self.to_sql_operator(&tok),
self.parse_expr(precedence)?
))),
_ => Err(ParserError::WrongToken {
expected: vec![SQLToken::Eq, SQLToken::Gt], //TODO: complete
actual: tok,
line: 0,
col: 0
})
}
},
None => Err(ParserError::TBD)
}
}
fn to_sql_operator(&self, token: &SQLToken) -> SQLOperator {
unimplemented!()
}
fn get_precedence(&self, token: &SQLToken) -> u8 {
unimplemented!()
}
/// parse a list of SQL expressions separated by a comma
fn parse_expr_list(&mut self, precedence: u8) -> Result<Vec<SQLExpr>, ParserError> {
unimplemented!()
}
//
// // Not complete/accurate, but enough to demonstrate the concept that the pratt parser
// // does not need knowledge of the specific tokenizer or parser to operate
//
// loop {
// match self.tokenizer.peek_token() {
// Ok(Some(token)) => {
// let next_precedence = self.parser.get_precedence(&token);
// unimplemented!()
// },
// _ => {
// }
// }
// }
//
//
}
//impl GenericParser {
//
// fn tokenizer(&mut self) -> &mut SQLTokenizer {
// &mut self.tokenizer
// }
//
// fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result<bool, ParserError> {
// unimplemented!()
// }
//
//// fn parse_identifier(&mut self) -> Result<String, ParserError>;
//
//}
/* --- KUDU PARSER IMPL --- */
///// KuduParser is a wrapper around GenericParser
//struct KuduParser {
// generic: GenericParser
//}
//
//impl Parser for KuduParser {
//
// fn parse_prefix(&mut self) -> Result<Box<SQLExpr>, ParserError> {
//
// // just take over the statements we need to and delegate everything else
// // to the generic parser
// if self.generic.parse_keywords(vec!["CREATE", "TABLE"])? {
//
// //TODO: insert kudu CREATE TABLE parsing logic here
// // .. we can delegate to the generic parsers for parts of that even
//
// // mock response
// let kudu_create_table = KuduCreateTable {
// partition: vec![KuduPartition::Hash]
// };
//
// Ok(Box::new(SQLExpr::Custom(Box::new(kudu_create_table ))))
// } else {
// _ => self.generic.parse_prefix()
// }
// }
//
// fn parse_infix(&mut self) -> Result<Box<SQLExpr>, ParserError> {
// self.generic.parse_infix()
// }
//}
//
//impl KuduParser {
//
// fn tokenizer(&mut self) -> &mut SQLTokenizer {
// &mut self.generic.tokenizer
// }
//
//}
//
//enum KuduPartition {
// Hash,
// Range,
//}
//
//struct KuduCreateTable {
// partition: Vec<KuduPartition>
//}
//
//impl CustomExpr for KuduCreateTable {
//
//}
#[cfg(test)]
mod tests {