replace with code from datafusion

This commit is contained in:
Andy Grove 2018-09-03 09:56:39 -06:00
parent a86bd30515
commit 0c23392adb
14 changed files with 1762 additions and 595 deletions

View file

@ -1,3 +0,0 @@
pub mod tokenizer;
pub mod parser;

View file

@ -1,70 +0,0 @@
use std::cmp::PartialEq;
use std::fmt::Debug;
//use std::rc::Rc;
//use std::sync::{Arc, Mutex};
use super::tokenizer::ANSISQLTokenizer;
use super::super::tokenizer::*;
use super::super::parser::*;
pub struct ANSISQLParser {
tokenizer: Box<SQLTokenizer>
}
impl ANSISQLParser where {
pub fn parse(sql: &str) -> Result<Option<Box<SQLExpr>>, ParserError> {
let mut parser = ANSISQLParser { tokenizer: Box::new(ANSISQLTokenizer::new(sql)) };
parser.parse_expr()
}
}
impl SQLParser for ANSISQLParser {
fn parse_expr(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> {
let precedence: usize = 0;
let mut e = self.parse_prefix()?;
match e {
Some(mut expr) => {
while let Some(token) = self.tokenizer.peek_token()? {
let next_precedence = self.tokenizer.precedence(&token);
if precedence >= next_precedence {
break;
}
expr = self.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me
}
Ok(Some(expr))
}
_ => {
Ok(None)
}
}
}
fn parse_prefix(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> {
match self.tokenizer.next_token()? {
Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() {
"INSERT" => unimplemented!(),
"UPDATE" => unimplemented!(),
"DELETE" => unimplemented!(),
"SELECT" => unimplemented!(),
"CREATE" => unimplemented!(),
_ => unimplemented!()
},
_ => unimplemented!()
}
}
fn parse_infix(&mut self, _left: &SQLExpr, _precedence: usize) -> Result<Option<Box<SQLExpr>>, ParserError> {
unimplemented!()
}
}

View file

@ -1,56 +0,0 @@
use std::cmp::PartialEq;
use std::fmt::Debug;
use super::super::tokenizer::*;
pub struct ANSISQLTokenizer {
chars: CharSeq
}
impl ANSISQLTokenizer {
pub fn new(sql: &str) -> Self {
ANSISQLTokenizer { chars: CharSeq::new(sql) }
}
}
impl SQLTokenizer for ANSISQLTokenizer {
fn precedence(&self, _token: &SQLToken) -> usize {
unimplemented!()
}
fn peek_token(&mut self) -> Result<Option<SQLToken>, TokenizerError> {
unimplemented!()
}
fn next_token(&mut self) -> Result<Option<SQLToken>, TokenizerError> {
match self.chars.next() {
Some(ch) => match ch {
' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))),
'0' ... '9' => {
let mut s = String::new();
s.push(ch);
while let Some(&ch) = self.chars.peek() {
match ch {
'0' ... '9' => {
self.chars.next(); // consume
s.push(ch);
},
_ => break
}
}
Ok(Some(SQLToken::Literal(s)))
},
'+' => Ok(Some(SQLToken::Plus)),
'-' => Ok(Some(SQLToken::Minus)),
'*' => Ok(Some(SQLToken::Mult)),
'/' => Ok(Some(SQLToken::Divide)),
_ => Err(TokenizerError::UnexpectedChar(ch,Position::new(0, 0)))
},
None => Ok(None)
}
}
}

View file

@ -1,3 +1,22 @@
pub mod ansi;
pub mod tokenizer;
pub mod parser;
// Copyright 2018 Grove Enterprises LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
extern crate fnv;
#[macro_use]
extern crate lazy_static;
pub mod sqlast;
pub mod sqlparser;
pub mod sqltokenizer;

View file

@ -1,106 +0,0 @@
use std::cmp::PartialEq;
use std::fmt::Debug;
use super::tokenizer::*;
// https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html
/// ANSI SQL:2011 Data Types
#[derive(Debug)]
pub enum SQLDataType {
/// BOOLEAN
Boolean,
/// NUMERIC, DECIMAL, DEC
Numeric { precision: usize, scale: Option<usize> },
/// SMALLINT
SmallInt,
/// INT, INTEGER
Int,
/// BIGINT
BigInt,
/// Floating point: `FLOAT(precision)`
Float(usize),
/// REAL
Real,
/// Double: `DOUBLE PRECISION`
Double,
/// Fixed-length character. `CHAR, CHARACTER`
Char(usize),
/// Variable-length character: `VARCHAR, CHARACTER VARYING, CHAR VARYING`
VarChar(usize),
/// Character Large Object: `CHARACTER LARGE OBJECT, CHAR LARGE OBJECT, CLOB`
Clob(usize),
/// Fixed-length character. `NCHAR, NATIONAL CHAR, NATIONAL CHARACTER`
NChar(usize),
/// Variable-length character: `NCHAR VARYING, NATIONAL CHARACTER VARYING, NATIONAL CHAR VARYING`
NVarChar(usize),
/// National Character Large Object: `NATIONAL CHARACTER LARGE OBJECT, NCHAR LARGE OBJECT, NCLOB`
NClob(usize),
/// Fixed-length binary
Binary(usize),
/// Variable-length binary
VarBinary(usize),
/// Binary large object
Blob(usize),
/// Date
Date,
/// Time: `TIME [(precision)] [WITH TIME ZONE | WITHOUT TIME ZONE]`
Time { precision: usize, tz: bool },
/// Time: `TIMESTAMP [(precision)] [WITH TIME ZONE | WITHOUT TIME ZONE]`
Timestamp { precision: usize, tz: bool },
}
#[derive(Debug)]
pub enum SQLOperator {
Plus,
Minus,
Mult,
Div,
Eq,
Gt,
GtEq,
Lt,
LtEq,
}
/// SQL Expressions
#[derive(Debug)]
pub enum SQLExpr{
/// Identifier e.g. table name or column name
Identifier(String),
/// Literal value
Literal(String),
/// Binary expression e.g. `1 + 2` or `fname LIKE "A%"`
Binary(Box<SQLExpr>, SQLOperator, Box<SQLExpr>),
/// Function invocation with function name and list of argument expressions
FunctionCall(String, Vec<SQLExpr>),
Insert,
Update,
Delete,
Select,
CreateTable,
}
#[derive(Debug)]
pub enum ParserError {
WrongToken { expected: Vec<SQLToken>, actual: SQLToken, line: usize, col: usize },
Custom(String)
}
impl From<TokenizerError> for ParserError {
fn from(e: TokenizerError) -> Self {
ParserError::Custom(format!("{:?}", e))
}
}
pub trait SQLParser {
fn parse_expr(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError>;
/// parse the prefix and stop once an infix operator is reached
fn parse_prefix(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> ;
/// parse the next infix expression, returning None if the precedence has changed
fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result<Option<Box<SQLExpr>>, ParserError>;
}

122
src/sqlast.rs Normal file
View file

@ -0,0 +1,122 @@
// Copyright 2018 Grove Enterprises LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! SQL Abstract Syntax Tree (AST) types
/// Supported file types for `CREATE EXTERNAL TABLE`
#[derive(Debug, Clone, PartialEq)]
pub enum FileType {
CSV,
NdJson,
Parquet,
}
/// SQL Abstract Syntax Tree (AST)
#[derive(Debug, Clone, PartialEq)]
pub enum ASTNode {
SQLIdentifier(String),
SQLWildcard,
SQLCompoundIdentifier(Vec<String>),
SQLIsNull(Box<ASTNode>),
SQLIsNotNull(Box<ASTNode>),
SQLBinaryExpr {
left: Box<ASTNode>,
op: SQLOperator,
right: Box<ASTNode>,
},
SQLCast {
expr: Box<ASTNode>,
data_type: SQLType,
},
SQLNested(Box<ASTNode>),
SQLUnary {
operator: SQLOperator,
rex: Box<ASTNode>,
},
SQLLiteralLong(i64),
SQLLiteralDouble(f64),
SQLLiteralString(String),
SQLFunction {
id: String,
args: Vec<ASTNode>,
},
SQLOrderBy {
expr: Box<ASTNode>,
asc: bool,
},
SQLSelect {
projection: Vec<ASTNode>,
relation: Option<Box<ASTNode>>,
selection: Option<Box<ASTNode>>,
order_by: Option<Vec<ASTNode>>,
group_by: Option<Vec<ASTNode>>,
having: Option<Box<ASTNode>>,
limit: Option<Box<ASTNode>>,
},
SQLCreateTable {
/// Table name
name: String,
/// Optional schema
columns: Vec<SQLColumnDef>,
/// File type (CSV or Parquet)
file_type: FileType,
/// For CSV files, indicate whether the file has a header row or not
header_row: bool,
/// Path to file or directory contianing files
location: String,
},
}
/// SQL column definition
#[derive(Debug, Clone, PartialEq)]
pub struct SQLColumnDef {
pub name: String,
pub data_type: SQLType,
pub allow_null: bool,
}
/// SQL datatypes for literals in SQL statements
#[derive(Debug, Clone, PartialEq)]
pub enum SQLType {
Boolean,
UInt8,
UInt16,
UInt32,
UInt64,
Int8,
Int16,
Int32,
Int64,
Float32,
Double64,
Utf8(usize),
}
/// SQL Operator
#[derive(Debug, PartialEq, Clone)]
pub enum SQLOperator {
Plus,
Minus,
Multiply,
Divide,
Modulus,
Gt,
Lt,
GtEq,
LtEq,
Eq,
NotEq,
And,
Or,
}

971
src/sqlparser.rs Normal file
View file

@ -0,0 +1,971 @@
// Copyright 2018 Grove Enterprises LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! SQL Parser
use super::sqlast::*;
use super::sqltokenizer::*;
#[derive(Debug, Clone)]
pub enum ParserError {
TokenizerError(String),
ParserError(String),
}
macro_rules! parser_err {
($MSG:expr) => {
Err(ParserError::ParserError($MSG.to_string()))
};
}
impl From<TokenizerError> for ParserError {
fn from(e: TokenizerError) -> Self {
ParserError::TokenizerError(format!("{:?}", e))
}
}
/// SQL Parser
pub struct Parser {
tokens: Vec<Token>,
index: usize,
}
impl Parser {
/// Parse the specified tokens
pub fn new(tokens: Vec<Token>) -> Self {
Parser {
tokens: tokens,
index: 0,
}
}
/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
pub fn parse_sql(sql: String) -> Result<ASTNode, ParserError> {
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize()?;
let mut parser = Parser::new(tokens);
parser.parse()
}
/// Parse a new expression
pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
self.parse_expr(0)
}
/// Parse tokens until the precedence changes
fn parse_expr(&mut self, precedence: u8) -> Result<ASTNode, ParserError> {
// println!("parse_expr() precendence = {}", precedence);
let mut expr = self.parse_prefix()?;
// println!("parsed prefix: {:?}", expr);
loop {
let next_precedence = self.get_next_precedence()?;
if precedence >= next_precedence {
// println!("break on precedence change ({} >= {})", precedence, next_precedence);
break;
}
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
// println!("parsed infix: {:?}", infix_expr);
expr = infix_expr;
}
}
// println!("parse_expr() returning {:?}", expr);
Ok(expr)
}
/// Parse an expression prefix
fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
match self.next_token() {
Some(t) => {
match t {
Token::Keyword(k) => match k.to_uppercase().as_ref() {
"SELECT" => Ok(self.parse_select()?),
"CREATE" => Ok(self.parse_create()?),
_ => return parser_err!(format!("No prefix parser for keyword {}", k)),
},
Token::Mult => Ok(ASTNode::SQLWildcard),
Token::Identifier(id) => {
match self.peek_token() {
Some(Token::LParen) => {
self.next_token(); // skip lparen
match id.to_uppercase().as_ref() {
"CAST" => self.parse_cast_expression(),
_ => {
let args = self.parse_expr_list()?;
self.next_token(); // skip rparen
Ok(ASTNode::SQLFunction { id, args })
}
}
}
Some(Token::Period) => {
let mut id_parts: Vec<String> = vec![id];
while self.peek_token() == Some(Token::Period) {
self.consume_token(&Token::Period)?;
match self.next_token() {
Some(Token::Identifier(id)) => id_parts.push(id),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
))
}
}
}
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
}
_ => Ok(ASTNode::SQLIdentifier(id)),
}
}
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
Ok(n) => Ok(ASTNode::SQLLiteralDouble(n)),
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
},
Token::Number(ref n) => match n.parse::<i64>() {
Ok(n) => Ok(ASTNode::SQLLiteralLong(n)),
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
},
Token::String(ref s) => Ok(ASTNode::SQLLiteralString(s.to_string())),
_ => parser_err!(format!(
"Prefix parser expected a keyword but found {:?}",
t
)),
}
}
None => parser_err!(format!("Prefix parser expected a keyword but hit EOF")),
}
}
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
fn parse_cast_expression(&mut self) -> Result<ASTNode, ParserError> {
let expr = self.parse_expr(0)?;
self.consume_token(&Token::Keyword("AS".to_string()))?;
let data_type = self.parse_data_type()?;
self.consume_token(&Token::RParen)?;
Ok(ASTNode::SQLCast {
expr: Box::new(expr),
data_type,
})
}
/// Parse an expression infix (typically an operator)
fn parse_infix(
&mut self,
expr: ASTNode,
precedence: u8,
) -> Result<Option<ASTNode>, ParserError> {
match self.next_token() {
Some(tok) => match tok {
Token::Keyword(ref k) => if k == "IS" {
if self.parse_keywords(vec!["NULL"]) {
Ok(Some(ASTNode::SQLIsNull(Box::new(expr))))
} else if self.parse_keywords(vec!["NOT", "NULL"]) {
Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr))))
} else {
parser_err!("Invalid tokens after IS")
}
} else {
Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
}))
},
Token::Eq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::Lt
| Token::LtEq
| Token::Plus
| Token::Minus
| Token::Mult
| Token::Mod
| Token::Div => Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
})),
_ => parser_err!(format!("No infix parser for token {:?}", tok)),
},
None => Ok(None),
}
}
/// Convert a token operator to an AST operator
fn to_sql_operator(&self, tok: &Token) -> Result<SQLOperator, ParserError> {
match tok {
&Token::Eq => Ok(SQLOperator::Eq),
&Token::Neq => Ok(SQLOperator::NotEq),
&Token::Lt => Ok(SQLOperator::Lt),
&Token::LtEq => Ok(SQLOperator::LtEq),
&Token::Gt => Ok(SQLOperator::Gt),
&Token::GtEq => Ok(SQLOperator::GtEq),
&Token::Plus => Ok(SQLOperator::Plus),
&Token::Minus => Ok(SQLOperator::Minus),
&Token::Mult => Ok(SQLOperator::Multiply),
&Token::Div => Ok(SQLOperator::Divide),
&Token::Mod => Ok(SQLOperator::Modulus),
&Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And),
&Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or),
_ => parser_err!(format!("Unsupported SQL operator {:?}", tok)),
}
}
/// Get the precedence of the next token
fn get_next_precedence(&self) -> Result<u8, ParserError> {
if self.index < self.tokens.len() {
self.get_precedence(&self.tokens[self.index])
} else {
Ok(0)
}
}
/// Get the precedence of a token
fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> {
//println!("get_precedence() {:?}", tok);
match tok {
&Token::Keyword(ref k) if k == "OR" => Ok(5),
&Token::Keyword(ref k) if k == "AND" => Ok(10),
&Token::Keyword(ref k) if k == "IS" => Ok(15),
&Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => {
Ok(20)
}
&Token::Plus | &Token::Minus => Ok(30),
&Token::Mult | &Token::Div | &Token::Mod => Ok(40),
_ => Ok(0),
}
}
/// Peek at the next token
fn peek_token(&mut self) -> Option<Token> {
if self.index < self.tokens.len() {
Some(self.tokens[self.index].clone())
} else {
None
}
}
/// Get the next token and increment the token index
fn next_token(&mut self) -> Option<Token> {
if self.index < self.tokens.len() {
self.index = self.index + 1;
Some(self.tokens[self.index - 1].clone())
} else {
None
}
}
/// Get the previous token and decrement the token index
fn prev_token(&mut self) -> Option<Token> {
if self.index > 0 {
Some(self.tokens[self.index - 1].clone())
} else {
None
}
}
/// Look for an expected keyword and consume it if it exists
fn parse_keyword(&mut self, expected: &'static str) -> bool {
match self.peek_token() {
Some(Token::Keyword(k)) => {
if expected.eq_ignore_ascii_case(k.as_str()) {
self.next_token();
true
} else {
false
}
}
_ => false,
}
}
/// Look for an expected sequence of keywords and consume them if they exist
fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool {
let index = self.index;
for keyword in keywords {
//println!("parse_keywords aborting .. expecting {}", keyword);
if !self.parse_keyword(&keyword) {
//println!("parse_keywords aborting .. did not find {}", keyword);
// reset index and return immediately
self.index = index;
return false;
}
}
true
}
// fn parse_identifier(&mut self) -> Result<ASTNode::SQLIdentifier, Err> {
// let expr = self.parse_expr()?;
// match expr {
// Some(ASTNode::SQLIdentifier { .. }) => Ok(expr),
// _ => parser_err!(format!("Expected identifier but found {:?}", expr)))
// }
// }
/// Consume the next token if it matches the expected token, otherwise return an error
fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> {
match self.peek_token() {
Some(ref t) => if *t == *expected {
self.next_token();
Ok(true)
} else {
Ok(false)
},
_ => parser_err!(format!(
"expected token {:?} but was {:?}",
expected,
self.prev_token()
)),
}
}
/// Parse a SQL CREATE statement
fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
if self.parse_keywords(vec!["EXTERNAL", "TABLE"]) {
match self.next_token() {
Some(Token::Identifier(id)) => {
// parse optional column list (schema)
let mut columns = vec![];
if self.consume_token(&Token::LParen)? {
loop {
if let Some(Token::Identifier(column_name)) = self.next_token() {
if let Ok(data_type) = self.parse_data_type() {
let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) {
false
} else if self.parse_keyword("NULL") {
true
} else {
true
};
match self.peek_token() {
Some(Token::Comma) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name,
data_type: data_type,
allow_null,
});
}
Some(Token::RParen) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name,
data_type: data_type,
allow_null,
});
break;
}
_ => {
return parser_err!(
"Expected ',' or ')' after column definition"
);
}
}
} else {
return parser_err!(
"Error parsing data type in column definition"
);
}
} else {
return parser_err!("Error parsing column name");
}
}
}
//println!("Parsed {} column defs", columns.len());
let mut headers = true;
let file_type: FileType = if self.parse_keywords(vec!["STORED", "AS", "CSV"]) {
if self.parse_keywords(vec!["WITH", "HEADER", "ROW"]) {
headers = true;
} else if self.parse_keywords(vec!["WITHOUT", "HEADER", "ROW"]) {
headers = false;
}
FileType::CSV
} else if self.parse_keywords(vec!["STORED", "AS", "NDJSON"]) {
FileType::NdJson
} else if self.parse_keywords(vec!["STORED", "AS", "PARQUET"]) {
FileType::Parquet
} else {
return parser_err!(format!(
"Expected 'STORED AS' clause, found {:?}",
self.peek_token()
));
};
let location: String = if self.parse_keywords(vec!["LOCATION"]) {
self.parse_literal_string()?
} else {
return parser_err!("Missing 'LOCATION' clause");
};
Ok(ASTNode::SQLCreateTable {
name: id,
columns,
file_type,
header_row: headers,
location,
})
}
_ => parser_err!(format!(
"Unexpected token after CREATE EXTERNAL TABLE: {:?}",
self.peek_token()
)),
}
} else {
parser_err!(format!(
"Unexpected token after CREATE: {:?}",
self.peek_token()
))
}
}
/// Parse a literal integer/long
fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
match self.next_token() {
Some(Token::Number(s)) => s.parse::<i64>().map_err(|e| {
ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e))
}),
other => parser_err!(format!("Expected literal int, found {:?}", other)),
}
}
/// Parse a literal string
fn parse_literal_string(&mut self) -> Result<String, ParserError> {
match self.next_token() {
Some(Token::String(ref s)) => Ok(s.clone()),
other => parser_err!(format!("Expected literal string, found {:?}", other)),
}
}
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
match self.next_token() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"BOOLEAN" => Ok(SQLType::Boolean),
"UINT8" => Ok(SQLType::UInt8),
"UINT16" => Ok(SQLType::UInt16),
"UINT32" => Ok(SQLType::UInt32),
"UINT64" => Ok(SQLType::UInt64),
"INT8" => Ok(SQLType::Int8),
"INT16" => Ok(SQLType::Int16),
"INT32" | "INT" | "INTEGER" => Ok(SQLType::Int32),
"INT64" | "LONG" => Ok(SQLType::Int64),
"FLOAT32" | "FLOAT" => Ok(SQLType::Float32),
"FLOAT64" | "DOUBLE" => Ok(SQLType::Double64),
"UTF8" | "VARCHAR" | "STRING" => {
// optional length
if self.consume_token(&Token::LParen)? {
let n = self.parse_literal_int()?;
self.consume_token(&Token::RParen)?;
Ok(SQLType::Utf8(n as usize))
} else {
Ok(SQLType::Utf8(100 as usize))
}
}
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
},
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
}
}
/// Parse a SELECT statement
fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
let projection = self.parse_expr_list()?;
let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") {
//TODO: add support for JOIN
Some(Box::new(self.parse_expr(0)?))
} else {
None
};
let selection = if self.parse_keyword("WHERE") {
Some(Box::new(self.parse_expr(0)?))
} else {
None
};
let group_by = if self.parse_keywords(vec!["GROUP", "BY"]) {
Some(self.parse_expr_list()?)
} else {
None
};
let having = if self.parse_keyword("HAVING") {
Some(Box::new(self.parse_expr(0)?))
} else {
None
};
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
Some(self.parse_order_by_expr_list()?)
} else {
None
};
let limit = if self.parse_keyword("LIMIT") {
self.parse_limit()?
} else {
None
};
if let Some(next_token) = self.peek_token() {
parser_err!(format!(
"Unexpected token at end of SELECT: {:?}",
next_token
))
} else {
Ok(ASTNode::SQLSelect {
projection,
selection,
relation,
limit,
order_by,
group_by,
having,
})
}
}
/// Parse a comma-delimited list of SQL expressions
fn parse_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
let mut expr_list: Vec<ASTNode> = vec![];
loop {
expr_list.push(self.parse_expr(0)?);
if let Some(t) = self.peek_token() {
if t == Token::Comma {
self.next_token();
} else {
break;
}
} else {
//EOF
break;
}
}
Ok(expr_list)
}
/// Parse a comma-delimited list of SQL ORDER BY expressions
fn parse_order_by_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
let mut expr_list: Vec<ASTNode> = vec![];
loop {
let expr = self.parse_expr(0)?;
// look for optional ASC / DESC specifier
let asc = match self.peek_token() {
Some(Token::Keyword(k)) => {
self.next_token(); // consume it
match k.to_uppercase().as_ref() {
"ASC" => true,
"DESC" => false,
_ => {
return parser_err!(format!(
"Invalid modifier for ORDER BY expression: {:?}",
k
))
}
}
}
Some(Token::Comma) => true,
Some(other) => {
return parser_err!(format!("Unexpected token after ORDER BY expr: {:?}", other))
}
None => true,
};
expr_list.push(ASTNode::SQLOrderBy {
expr: Box::new(expr),
asc,
});
if let Some(t) = self.peek_token() {
if t == Token::Comma {
self.next_token();
} else {
break;
}
} else {
// EOF
break;
}
}
Ok(expr_list)
}
/// Parse a LIMIT clause
fn parse_limit(&mut self) -> Result<Option<Box<ASTNode>>, ParserError> {
if self.parse_keyword("ALL") {
Ok(None)
} else {
self.parse_literal_int()
.map(|n| Some(Box::new(ASTNode::SQLLiteralLong(n))))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_simple_select() {
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect {
projection, limit, ..
} => {
assert_eq!(3, projection.len());
assert_eq!(Some(Box::new(ASTNode::SQLLiteralLong(5))), limit);
}
_ => assert!(false),
}
}
#[test]
fn parse_select_wildcard() {
let sql = String::from("SELECT * FROM customer");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect { projection, .. } => {
assert_eq!(1, projection.len());
assert_eq!(ASTNode::SQLWildcard, projection[0]);
}
_ => assert!(false),
}
}
#[test]
fn parse_select_count_wildcard() {
let sql = String::from("SELECT COUNT(*) FROM customer");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect { projection, .. } => {
assert_eq!(1, projection.len());
assert_eq!(
ASTNode::SQLFunction {
id: "COUNT".to_string(),
args: vec![ASTNode::SQLWildcard],
},
projection[0]
);
}
_ => assert!(false),
}
}
#[test]
fn parse_select_string_predicate() {
let sql = String::from(
"SELECT id, fname, lname FROM customer \
WHERE salary != 'Not Provided' AND salary != ''",
);
let _ast = parse_sql(&sql);
//TODO: add assertions
}
#[test]
fn parse_projection_nested_type() {
let sql = String::from("SELECT customer.address.state FROM foo");
let _ast = parse_sql(&sql);
//TODO: add assertions
}
#[test]
fn parse_compound_expr_1() {
use self::ASTNode::*;
use self::SQLOperator::*;
let sql = String::from("a + b * c");
let ast = parse_sql(&sql);
assert_eq!(
SQLBinaryExpr {
left: Box::new(SQLIdentifier("a".to_string())),
op: Plus,
right: Box::new(SQLBinaryExpr {
left: Box::new(SQLIdentifier("b".to_string())),
op: Multiply,
right: Box::new(SQLIdentifier("c".to_string()))
})
},
ast
);
}
#[test]
fn parse_compound_expr_2() {
use self::ASTNode::*;
use self::SQLOperator::*;
let sql = String::from("a * b + c");
let ast = parse_sql(&sql);
assert_eq!(
SQLBinaryExpr {
left: Box::new(SQLBinaryExpr {
left: Box::new(SQLIdentifier("a".to_string())),
op: Multiply,
right: Box::new(SQLIdentifier("b".to_string()))
}),
op: Plus,
right: Box::new(SQLIdentifier("c".to_string()))
},
ast
);
}
#[test]
fn parse_is_null() {
use self::ASTNode::*;
let sql = String::from("a IS NULL");
let ast = parse_sql(&sql);
assert_eq!(SQLIsNull(Box::new(SQLIdentifier("a".to_string()))), ast);
}
#[test]
fn parse_is_not_null() {
use self::ASTNode::*;
let sql = String::from("a IS NOT NULL");
let ast = parse_sql(&sql);
assert_eq!(SQLIsNotNull(Box::new(SQLIdentifier("a".to_string()))), ast);
}
#[test]
fn parse_select_order_by() {
let sql = String::from(
"SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC",
);
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect { order_by, .. } => {
assert_eq!(
Some(vec![
ASTNode::SQLOrderBy {
expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())),
asc: true,
},
ASTNode::SQLOrderBy {
expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())),
asc: false,
},
]),
order_by
);
}
_ => assert!(false),
}
}
#[test]
fn parse_select_group_by() {
let sql = String::from("SELECT id, fname, lname FROM customer GROUP BY lname, fname");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect { group_by, .. } => {
assert_eq!(
Some(vec![
ASTNode::SQLIdentifier("lname".to_string()),
ASTNode::SQLIdentifier("fname".to_string()),
]),
group_by
);
}
_ => assert!(false),
}
}
#[test]
fn parse_limit_accepts_all() {
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT ALL");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect {
projection, limit, ..
} => {
assert_eq!(3, projection.len());
assert_eq!(None, limit);
}
_ => assert!(false),
}
}
#[test]
fn parse_cast() {
let sql = String::from("SELECT CAST(id AS DOUBLE) FROM customer");
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLSelect { projection, .. } => {
assert_eq!(1, projection.len());
assert_eq!(
ASTNode::SQLCast {
expr: Box::new(ASTNode::SQLIdentifier("id".to_string())),
data_type: SQLType::Double64
},
projection[0]
);
}
_ => assert!(false),
}
}
#[test]
fn parse_create_external_table_csv_with_header_row() {
let sql = String::from(
"CREATE EXTERNAL TABLE uk_cities (\
name VARCHAR(100) NOT NULL,\
lat DOUBLE NULL,\
lng DOUBLE NULL) \
STORED AS CSV WITH HEADER ROW \
LOCATION '/mnt/ssd/uk_cities.csv'",
);
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLCreateTable {
name,
columns,
file_type,
header_row,
location,
} => {
assert_eq!("uk_cities", name);
assert_eq!(3, columns.len());
assert_eq!(FileType::CSV, file_type);
assert_eq!(true, header_row);
assert_eq!("/mnt/ssd/uk_cities.csv", location);
let c_name = &columns[0];
assert_eq!("name", c_name.name);
assert_eq!(SQLType::Utf8(100), c_name.data_type);
assert_eq!(false, c_name.allow_null);
let c_lat = &columns[1];
assert_eq!("lat", c_lat.name);
assert_eq!(SQLType::Double64, c_lat.data_type);
assert_eq!(true, c_lat.allow_null);
let c_lng = &columns[2];
assert_eq!("lng", c_lng.name);
assert_eq!(SQLType::Double64, c_lng.data_type);
assert_eq!(true, c_lng.allow_null);
}
_ => assert!(false),
}
}
#[test]
fn parse_create_external_table_csv_without_header_row() {
let sql = String::from(
"CREATE EXTERNAL TABLE uk_cities (\
name VARCHAR(100) NOT NULL,\
lat DOUBLE NOT NULL,\
lng DOUBLE NOT NULL) \
STORED AS CSV WITHOUT HEADER ROW \
LOCATION '/mnt/ssd/uk_cities.csv'",
);
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLCreateTable {
name,
columns,
file_type,
header_row,
location,
} => {
assert_eq!("uk_cities", name);
assert_eq!(3, columns.len());
assert_eq!(FileType::CSV, file_type);
assert_eq!(false, header_row);
assert_eq!("/mnt/ssd/uk_cities.csv", location);
}
_ => assert!(false),
}
}
#[test]
fn parse_create_external_table_parquet() {
let sql = String::from(
"CREATE EXTERNAL TABLE uk_cities \
STORED AS PARQUET \
LOCATION '/mnt/ssd/uk_cities.parquet'",
);
let ast = parse_sql(&sql);
match ast {
ASTNode::SQLCreateTable {
name,
columns,
file_type,
location,
..
} => {
assert_eq!("uk_cities", name);
assert_eq!(0, columns.len());
assert_eq!(FileType::Parquet, file_type);
assert_eq!("/mnt/ssd/uk_cities.parquet", location);
}
_ => assert!(false),
}
}
#[test]
fn parse_scalar_function_in_projection() {
let sql = String::from("SELECT sqrt(id) FROM foo");
let ast = parse_sql(&sql);
if let ASTNode::SQLSelect { projection, .. } = ast {
assert_eq!(
vec![ASTNode::SQLFunction {
id: String::from("sqrt"),
args: vec![ASTNode::SQLIdentifier(String::from("id"))],
}],
projection
);
} else {
assert!(false);
}
}
#[test]
fn parse_aggregate_with_group_by() {
let sql = String::from("SELECT a, COUNT(1), MIN(b), MAX(b) FROM foo GROUP BY a");
let _ast = parse_sql(&sql);
//TODO: assertions
}
#[test]
fn parse_select_version() {
let sql = "SELECT @@version";
match parse_sql(&sql) {
ASTNode::SQLSelect { ref projection, .. } => {
assert_eq!(
projection[0],
ASTNode::SQLIdentifier("@@version".to_string())
);
}
_ => panic!(),
}
}
fn parse_sql(sql: &str) -> ASTNode {
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
ast
}
}

427
src/sqltokenizer.rs Normal file
View file

@ -0,0 +1,427 @@
// Copyright 2018 Grove Enterprises LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! SQL Tokenizer
use std::iter::Peekable;
use std::str::Chars;
use fnv::FnvHashSet;
/// SQL Token enumeration
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
/// SQL identifier e.g. table or column name
Identifier(String),
/// SQL keyword e.g. Keyword("SELECT")
Keyword(String),
/// Numeric literal
Number(String),
/// String literal
String(String),
/// Comma
Comma,
/// Whitespace (space, tab, etc)
Whitespace,
/// Equality operator `=`
Eq,
/// Not Equals operator `!=` or `<>`
Neq,
/// Less Than operator `<`
Lt,
/// Greater han operator `>`
Gt,
/// Less Than Or Equals operator `<=`
LtEq,
/// Greater Than Or Equals operator `>=`
GtEq,
/// Plus operator `+`
Plus,
/// Minus operator `-`
Minus,
/// Multiplication operator `*`
Mult,
/// Division operator `/`
Div,
/// Modulo Operator `%`
Mod,
/// Left parenthesis `(`
LParen,
/// Right parenthesis `)`
RParen,
/// Period (used for compound identifiers or projections into nested types)
Period,
}
/// Tokenizer error
#[derive(Debug)]
pub struct TokenizerError(String);
lazy_static! {
static ref KEYWORDS: FnvHashSet<&'static str> = {
let mut m = FnvHashSet::default();
m.insert("SELECT");
m.insert("FROM");
m.insert("WHERE");
m.insert("LIMIT");
m.insert("ORDER");
m.insert("GROUP");
m.insert("BY");
m.insert("HAVING");
m.insert("UNION");
m.insert("ALL");
m.insert("INSERT");
m.insert("UPDATE");
m.insert("DELETE");
m.insert("IN");
m.insert("IS");
m.insert("NULL");
m.insert("SET");
m.insert("CREATE");
m.insert("EXTERNAL");
m.insert("TABLE");
m.insert("ASC");
m.insert("DESC");
m.insert("AND");
m.insert("OR");
m.insert("NOT");
m.insert("AS");
m.insert("STORED");
m.insert("CSV");
m.insert("PARQUET");
m.insert("LOCATION");
m.insert("WITH");
m.insert("WITHOUT");
m.insert("HEADER");
m.insert("ROW");
// SQL types
m.insert("STRING");
m.insert("VARCHAR");
m.insert("FLOAT");
m.insert("DOUBLE");
m.insert("INT");
m.insert("INTEGER");
m.insert("LONG");
// Arrow native types
m.insert("BOOLEAN");
m.insert("UINT8");
m.insert("UINT16");
m.insert("UINT32");
m.insert("UINT64");
m.insert("INT8");
m.insert("INT16");
m.insert("INT32");
m.insert("INT64");
m.insert("FLOAT32");
m.insert("FLOAT64");
m.insert("UTF8");
m
};
}
/// SQL Tokenizer
pub struct Tokenizer {
pub query: String,
}
impl Tokenizer {
/// Create a new SQL tokenizer for the specified SQL statement
pub fn new(query: &str) -> Self {
Self {
query: query.to_string(),
}
}
/// Tokenize the statement and produce a vector of tokens
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut peekable = self.query.chars().peekable();
let mut tokens: Vec<Token> = vec![];
while let Some(token) = self.next_token(&mut peekable)? {
tokens.push(token);
}
Ok(tokens
.into_iter()
.filter(|t| match t {
Token::Whitespace => false,
_ => true,
})
.collect())
}
/// Get the next token or return None
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<Token>, TokenizerError> {
//println!("next_token: {:?}", chars.peek());
match chars.peek() {
Some(&ch) => match ch {
// whitespace
' ' | '\t' | '\n' => {
chars.next(); // consume
Ok(Some(Token::Whitespace))
}
// identifier or keyword
'a'...'z' | 'A'...'Z' | '_' | '@' => {
let mut s = String::new();
while let Some(&ch) = chars.peek() {
match ch {
'a'...'z' | 'A'...'Z' | '_' | '0'...'9' | '@' => {
chars.next(); // consume
s.push(ch);
}
_ => break,
}
}
let upper_str = s.to_uppercase();
if KEYWORDS.contains(upper_str.as_str()) {
Ok(Some(Token::Keyword(upper_str)))
} else {
Ok(Some(Token::Identifier(s)))
}
}
// string
'\'' => {
//TODO: handle escaped quotes in string
//TODO: handle EOF before terminating quote
let mut s = String::new();
chars.next(); // consume
while let Some(&ch) = chars.peek() {
match ch {
'\'' => {
chars.next(); // consume
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
}
}
Ok(Some(Token::String(s)))
}
// numbers
'0'...'9' => {
let mut s = String::new();
while let Some(&ch) = chars.peek() {
match ch {
'0'...'9' | '.' => {
chars.next(); // consume
s.push(ch);
}
_ => break,
}
}
Ok(Some(Token::Number(s)))
}
// punctuation
',' => {
chars.next();
Ok(Some(Token::Comma))
}
'(' => {
chars.next();
Ok(Some(Token::LParen))
}
')' => {
chars.next();
Ok(Some(Token::RParen))
}
// operators
'+' => {
chars.next();
Ok(Some(Token::Plus))
}
'-' => {
chars.next();
Ok(Some(Token::Minus))
}
'*' => {
chars.next();
Ok(Some(Token::Mult))
}
'/' => {
chars.next();
Ok(Some(Token::Div))
}
'%' => {
chars.next();
Ok(Some(Token::Mod))
}
'=' => {
chars.next();
Ok(Some(Token::Eq))
}
'.' => {
chars.next();
Ok(Some(Token::Period))
}
'!' => {
chars.next(); // consume
match chars.peek() {
Some(&ch) => match ch {
'=' => {
chars.next();
Ok(Some(Token::Neq))
}
_ => Err(TokenizerError(format!("TBD"))),
},
None => Err(TokenizerError(format!("TBD"))),
}
}
'<' => {
chars.next(); // consume
match chars.peek() {
Some(&ch) => match ch {
'=' => {
chars.next();
Ok(Some(Token::LtEq))
}
'>' => {
chars.next();
Ok(Some(Token::Neq))
}
_ => Ok(Some(Token::Lt)),
},
None => Ok(Some(Token::Lt)),
}
}
'>' => {
chars.next(); // consume
match chars.peek() {
Some(&ch) => match ch {
'=' => {
chars.next();
Ok(Some(Token::GtEq))
}
_ => Ok(Some(Token::Gt)),
},
None => Ok(Some(Token::Gt)),
}
}
_ => Err(TokenizerError(format!(
"unhandled char '{}' in tokenizer",
ch
))),
},
None => Ok(None),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokenize_select_1() {
let sql = String::from("SELECT 1");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Keyword(String::from("SELECT")),
Token::Number(String::from("1")),
];
compare(expected, tokens);
}
#[test]
fn tokenize_scalar_function() {
let sql = String::from("SELECT sqrt(1)");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Keyword(String::from("SELECT")),
Token::Identifier(String::from("sqrt")),
Token::LParen,
Token::Number(String::from("1")),
Token::RParen,
];
compare(expected, tokens);
}
#[test]
fn tokenize_simple_select() {
let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Keyword(String::from("SELECT")),
Token::Mult,
Token::Keyword(String::from("FROM")),
Token::Identifier(String::from("customer")),
Token::Keyword(String::from("WHERE")),
Token::Identifier(String::from("id")),
Token::Eq,
Token::Number(String::from("1")),
Token::Keyword(String::from("LIMIT")),
Token::Number(String::from("5")),
];
compare(expected, tokens);
}
#[test]
fn tokenize_string_predicate() {
let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Keyword(String::from("SELECT")),
Token::Mult,
Token::Keyword(String::from("FROM")),
Token::Identifier(String::from("customer")),
Token::Keyword(String::from("WHERE")),
Token::Identifier(String::from("salary")),
Token::Neq,
Token::String(String::from("Not Provided")),
];
compare(expected, tokens);
}
#[test]
fn tokenize_is_null() {
let sql = String::from("a IS NULL");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Identifier(String::from("a")),
Token::Keyword("IS".to_string()),
Token::Keyword("NULL".to_string()),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);
//println!("expected = {:?}", expected);
//println!("------------------------------");
assert_eq!(expected, actual);
}
}

View file

@ -1,124 +0,0 @@
use std::cmp::PartialEq;
use std::fmt::Debug;
/// Simple holder for a sequence of characters that supports iteration and mark/reset methods
pub struct CharSeq {
chars: Vec<char>,
i: usize,
m: usize
}
impl CharSeq {
/// Create a CharSeq from a string
pub fn new(sql: &str) -> Self {
CharSeq {
chars: sql.chars().collect(),
i: 0,
m: 0
}
}
/// Mark the current index
pub fn mark(&mut self) {
self.m = self.i;
}
/// Reset the index
pub fn reset(&mut self) {
self.i = self.m;
}
/// Peek the next char
pub fn peek(&mut self) -> Option<&char> {
if self.i < self.chars.len() {
Some(&self.chars[self.i])
} else {
None
}
}
/// Get the next char
pub fn next(&mut self) -> Option<char> {
if self.i < self.chars.len() {
self.i += 1;
Some(self.chars[self.i-1])
} else {
None
}
}
}
#[derive(Debug)]
pub struct Position {
line: usize,
col: usize
}
impl Position {
pub fn new(line: usize, col: usize) -> Self {
Position { line, col }
}
}
#[derive(Debug)]
pub enum TokenizerError {
UnexpectedChar(char,Position),
UnexpectedEof(Position),
UnterminatedStringLiteral(Position),
Custom(String)
}
/// SQL Tokens
#[derive(Debug,PartialEq)]
pub enum SQLToken {
Whitespace(char),
Keyword(String),
Identifier(String),
Literal(String), //TODO: need to model different types of literal
Plus,
Minus,
Mult,
Divide,
Eq,
Not,
NotEq,
Gt,
GtEq,
Lt,
LtEq,
LParen,
RParen,
Comma,
}
pub trait SQLTokenizer {
/// get the precendence of a token
fn precedence(&self, token: &SQLToken) -> usize;
fn peek_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
/// return a reference to the next token and advance the index
fn next_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
}
pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result<Vec<SQLToken>, TokenizerError> {
let mut chars = CharSeq::new(sql);
let mut tokens : Vec<SQLToken> = vec![];
loop {
match tokenizer.next_token()? {
Some(SQLToken::Whitespace(_)) => { /* ignore */ },
Some(token) => {
println!("Token: {:?}", token);
tokens.push(token)
},
None => break
}
}
Ok(tokens)
}