mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
roughing out new version
This commit is contained in:
parent
4a30441f17
commit
3caeb58d22
4 changed files with 168 additions and 1 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -8,3 +8,6 @@ Cargo.lock
|
|||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# IDEs
|
||||
.gitignore
|
||||
|
|
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "datafusion-sql"
|
||||
version = "0.1.0"
|
||||
authors = ["Andy Grove <andygrove73@gmail.com>"]
|
||||
|
||||
[dependencies]
|
|
@ -1,2 +1,9 @@
|
|||
# datafusion-sql
|
||||
DataFusion SQL Parser
|
||||
|
||||
DataFusion SQL Parser (v2)
|
||||
|
||||
Goals:
|
||||
|
||||
- Support for custom SQL dialects, so other projects can implement their own parsers easily
|
||||
- Zero-copy of tokens when parsing
|
||||
|
||||
|
|
151
src/lib.rs
Normal file
151
src/lib.rs
Normal file
|
@ -0,0 +1,151 @@
|
|||
|
||||
/* --- TOKENIZER API --- */
|
||||
|
||||
enum TokenizerError {
|
||||
WrongToken { expected: SQLToken, actual: SQLToken, line: usize, col: usize },
|
||||
TBD
|
||||
}
|
||||
|
||||
/// SQL Tokens
|
||||
enum SQLToken {
|
||||
Keyword(String),
|
||||
Identifier(String),
|
||||
Eq,
|
||||
Gt,
|
||||
GtEq,
|
||||
Lt,
|
||||
LtEq,
|
||||
LParen,
|
||||
RParen,
|
||||
Comma,
|
||||
Custom(Box<CustomToken>) // extension point for vendor-specific tokens
|
||||
}
|
||||
|
||||
trait CustomToken {
|
||||
//TODO: ???
|
||||
}
|
||||
|
||||
trait SQLTokenizer<'a> {
|
||||
// return a reference to the next token without consuming it (look ahead)
|
||||
fn peek_token(&'a mut self) -> Result<Option<&'a SQLToken>, Box<TokenizerError>>;
|
||||
// return a reference to the next token and advance the index
|
||||
fn next_token(&'a mut self) -> Result<Option<&'a SQLToken>, Box<TokenizerError>>;
|
||||
}
|
||||
|
||||
/* --- PARSER API --- */
|
||||
|
||||
/// SQL Operators
|
||||
enum SQLOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
Mult,
|
||||
Div,
|
||||
Eq,
|
||||
Gt,
|
||||
GtEq,
|
||||
Lt,
|
||||
LtEq,
|
||||
Custom(Box<CustomOperator>) // extension point for vendor-specific operators
|
||||
}
|
||||
|
||||
trait CustomOperator {
|
||||
//TODO: ???
|
||||
}
|
||||
|
||||
/// SQL Expressions
|
||||
enum SQLExpr {
|
||||
/// Identifier e.g. table name or column name
|
||||
Identifier(String),
|
||||
/// Literal value
|
||||
Literal(String),
|
||||
/// Binary expression e.g. `1 + 2` or `fname LIKE "A%"`
|
||||
Binary(Box<SQLExpr>, SQLOperator, Box<SQLExpr>),
|
||||
/// Function invocation with function name and list of argument expressions
|
||||
FunctionCall(String, Vec<SQLExpr>),
|
||||
/// Custom expression (vendor-specific)
|
||||
Custom(Box<CustomExpr>)
|
||||
}
|
||||
|
||||
trait CustomExpr {
|
||||
//TODO: ???
|
||||
}
|
||||
|
||||
enum ParserError {
|
||||
TBD
|
||||
}
|
||||
|
||||
trait Parser<'a> {
|
||||
fn parse_expr(&mut self) -> Result<Box<SQLExpr>, Box<ParserError>>;
|
||||
fn parse_expr_list(&mut self) -> Result<Vec<SQLExpr>, Box<ParserError>>;
|
||||
fn parse_identifier(&mut self) -> Result<String, Box<ParserError>>;
|
||||
fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result<bool, Box<ParserError>>;
|
||||
}
|
||||
|
||||
/* --- KUDU PARSER IMPL --- */
|
||||
|
||||
struct KuduParser<'a> {
|
||||
generic_parser: Box<Parser<'a>>
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> for KuduParser<'a> {
|
||||
|
||||
fn parse_expr(&mut self) -> Result<Box<SQLExpr>, Box<ParserError>> {
|
||||
self.generic_parser.parse_expr()
|
||||
}
|
||||
|
||||
fn parse_expr_list(&mut self) -> Result<Vec<SQLExpr>, Box<ParserError>> {
|
||||
self.generic_parser.parse_expr_list()
|
||||
}
|
||||
|
||||
fn parse_identifier(&mut self) -> Result<String, Box<ParserError>> {
|
||||
self.generic_parser.parse_identifier()
|
||||
}
|
||||
|
||||
fn parse_keywords(&mut self, keywords: Vec<&str>) -> Result<bool, Box<ParserError>> {
|
||||
self.parse_keywords(keywords)
|
||||
}
|
||||
}
|
||||
|
||||
/* --- PRATT PARSER IMPL --- */
|
||||
|
||||
struct PrattParser<'a> {
|
||||
parser: Box<Parser<'a>>
|
||||
}
|
||||
|
||||
impl<'a> PrattParser<'a> {
|
||||
|
||||
fn parse_expr(&'a mut self, precedence: u8) -> SQLExpr {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
//
|
||||
// // Not complete/accurate, but enough to demonstrate the concept that the pratt parser
|
||||
// // does not need knowledge of the specific tokenizer or parser to operate
|
||||
//
|
||||
// loop {
|
||||
// match self.tokenizer.peek_token() {
|
||||
// Ok(Some(token)) => {
|
||||
// let next_precedence = self.parser.get_precedence(&token);
|
||||
// unimplemented!()
|
||||
// },
|
||||
// _ => {
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
//
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let tokens = vec![
|
||||
SQLToken::Keyword("CREATE".to_string()),
|
||||
SQLToken::Keyword("TABLE".to_string()),
|
||||
SQLToken::Keyword("test".to_string()),
|
||||
];
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue