make precision on FLOAT optional for now

This commit is contained in:
Andy Grove 2018-09-03 14:56:26 -06:00
parent 4dbd76984b
commit cfe7ee5613
3 changed files with 52 additions and 40 deletions

View file

@ -0,0 +1,6 @@
# Writing a Custom SQL Parser
I have explored many different ways of building this library to make it easy to extend it for custom SQL dialects. Most of my attempts ended in failure but I have now found a workable solution. It is not without downsides but this seems to be the most pragmatic solution.
The concept is simply to write a new parser that delegates to the ANSI parser so that as much as possible of the core functionality can be re-used.

View file

@ -95,8 +95,8 @@ pub enum SQLType {
Int, Int,
/// Big integer /// Big integer
BigInt, BigInt,
/// Floating point with precision e.g. FLOAT(8) /// Floating point with optional precision e.g. FLOAT(8)
Float(usize), Float(Option<usize>),
/// Floating point e.g. REAL /// Floating point e.g. REAL
Real, Real,
/// Double e.g. DOUBLE PRECISION /// Double e.g. DOUBLE PRECISION

View file

@ -64,32 +64,23 @@ impl Parser {
} }
/// Parse tokens until the precedence changes /// Parse tokens until the precedence changes
fn parse_expr(&mut self, precedence: u8) -> Result<ASTNode, ParserError> { pub fn parse_expr(&mut self, precedence: u8) -> Result<ASTNode, ParserError> {
// println!("parse_expr() precendence = {}", precedence);
let mut expr = self.parse_prefix()?; let mut expr = self.parse_prefix()?;
// println!("parsed prefix: {:?}", expr);
loop { loop {
let next_precedence = self.get_next_precedence()?; let next_precedence = self.get_next_precedence()?;
if precedence >= next_precedence { if precedence >= next_precedence {
// println!("break on precedence change ({} >= {})", precedence, next_precedence);
break; break;
} }
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
// println!("parsed infix: {:?}", infix_expr);
expr = infix_expr; expr = infix_expr;
} }
} }
// println!("parse_expr() returning {:?}", expr);
Ok(expr) Ok(expr)
} }
/// Parse an expression prefix /// Parse an expression prefix
fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> { pub fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
match self.next_token() { match self.next_token() {
Some(t) => { Some(t) => {
match t { match t {
@ -150,7 +141,7 @@ impl Parser {
} }
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
fn parse_cast_expression(&mut self) -> Result<ASTNode, ParserError> { pub fn parse_cast_expression(&mut self) -> Result<ASTNode, ParserError> {
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
self.consume_token(&Token::Keyword("AS".to_string()))?; self.consume_token(&Token::Keyword("AS".to_string()))?;
let data_type = self.parse_data_type()?; let data_type = self.parse_data_type()?;
@ -162,7 +153,7 @@ impl Parser {
} }
/// Parse an expression infix (typically an operator) /// Parse an expression infix (typically an operator)
fn parse_infix( pub fn parse_infix(
&mut self, &mut self,
expr: ASTNode, expr: ASTNode,
precedence: u8, precedence: u8,
@ -206,7 +197,7 @@ impl Parser {
} }
/// Convert a token operator to an AST operator /// Convert a token operator to an AST operator
fn to_sql_operator(&self, tok: &Token) -> Result<SQLOperator, ParserError> { pub fn to_sql_operator(&self, tok: &Token) -> Result<SQLOperator, ParserError> {
match tok { match tok {
&Token::Eq => Ok(SQLOperator::Eq), &Token::Eq => Ok(SQLOperator::Eq),
&Token::Neq => Ok(SQLOperator::NotEq), &Token::Neq => Ok(SQLOperator::NotEq),
@ -226,7 +217,7 @@ impl Parser {
} }
/// Get the precedence of the next token /// Get the precedence of the next token
fn get_next_precedence(&self) -> Result<u8, ParserError> { pub fn get_next_precedence(&self) -> Result<u8, ParserError> {
if self.index < self.tokens.len() { if self.index < self.tokens.len() {
self.get_precedence(&self.tokens[self.index]) self.get_precedence(&self.tokens[self.index])
} else { } else {
@ -235,7 +226,7 @@ impl Parser {
} }
/// Get the precedence of a token /// Get the precedence of a token
fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> { pub fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> {
//println!("get_precedence() {:?}", tok); //println!("get_precedence() {:?}", tok);
match tok { match tok {
@ -252,7 +243,7 @@ impl Parser {
} }
/// Peek at the next token /// Peek at the next token
fn peek_token(&mut self) -> Option<Token> { pub fn peek_token(&mut self) -> Option<Token> {
if self.index < self.tokens.len() { if self.index < self.tokens.len() {
Some(self.tokens[self.index].clone()) Some(self.tokens[self.index].clone())
} else { } else {
@ -261,7 +252,7 @@ impl Parser {
} }
/// Get the next token and increment the token index /// Get the next token and increment the token index
fn next_token(&mut self) -> Option<Token> { pub fn next_token(&mut self) -> Option<Token> {
if self.index < self.tokens.len() { if self.index < self.tokens.len() {
self.index = self.index + 1; self.index = self.index + 1;
Some(self.tokens[self.index - 1].clone()) Some(self.tokens[self.index - 1].clone())
@ -271,7 +262,7 @@ impl Parser {
} }
/// Get the previous token and decrement the token index /// Get the previous token and decrement the token index
fn prev_token(&mut self) -> Option<Token> { pub fn prev_token(&mut self) -> Option<Token> {
if self.index > 0 { if self.index > 0 {
Some(self.tokens[self.index - 1].clone()) Some(self.tokens[self.index - 1].clone())
} else { } else {
@ -280,7 +271,7 @@ impl Parser {
} }
/// Look for an expected keyword and consume it if it exists /// Look for an expected keyword and consume it if it exists
fn parse_keyword(&mut self, expected: &'static str) -> bool { pub fn parse_keyword(&mut self, expected: &'static str) -> bool {
match self.peek_token() { match self.peek_token() {
Some(Token::Keyword(k)) => { Some(Token::Keyword(k)) => {
if expected.eq_ignore_ascii_case(k.as_str()) { if expected.eq_ignore_ascii_case(k.as_str()) {
@ -295,7 +286,7 @@ impl Parser {
} }
/// Look for an expected sequence of keywords and consume them if they exist /// Look for an expected sequence of keywords and consume them if they exist
fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool { pub fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool {
let index = self.index; let index = self.index;
for keyword in keywords { for keyword in keywords {
//println!("parse_keywords aborting .. expecting {}", keyword); //println!("parse_keywords aborting .. expecting {}", keyword);
@ -312,7 +303,7 @@ impl Parser {
//TODO: this function is inconsistent and sometimes returns bool and sometimes fails //TODO: this function is inconsistent and sometimes returns bool and sometimes fails
/// Consume the next token if it matches the expected token, otherwise return an error /// Consume the next token if it matches the expected token, otherwise return an error
fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> { pub fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> {
match self.peek_token() { match self.peek_token() {
Some(ref t) => if *t == *expected { Some(ref t) => if *t == *expected {
self.next_token(); self.next_token();
@ -329,7 +320,7 @@ impl Parser {
} }
/// Parse a SQL CREATE statement /// Parse a SQL CREATE statement
fn parse_create(&mut self) -> Result<ASTNode, ParserError> { pub fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
if self.parse_keywords(vec!["TABLE"]) { if self.parse_keywords(vec!["TABLE"]) {
match self.next_token() { match self.next_token() {
Some(Token::Identifier(id)) => { Some(Token::Identifier(id)) => {
@ -398,7 +389,7 @@ impl Parser {
} }
/// Parse a literal integer/long /// Parse a literal integer/long
fn parse_literal_int(&mut self) -> Result<i64, ParserError> { pub fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
match self.next_token() { match self.next_token() {
Some(Token::Number(s)) => s.parse::<i64>().map_err(|e| { Some(Token::Number(s)) => s.parse::<i64>().map_err(|e| {
ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e)) ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e))
@ -408,19 +399,19 @@ impl Parser {
} }
/// Parse a literal string /// Parse a literal string
// fn parse_literal_string(&mut self) -> Result<String, ParserError> { pub fn parse_literal_string(&mut self) -> Result<String, ParserError> {
// match self.next_token() { match self.next_token() {
// Some(Token::String(ref s)) => Ok(s.clone()), Some(Token::String(ref s)) => Ok(s.clone()),
// other => parser_err!(format!("Expected literal string, found {:?}", other)), other => parser_err!(format!("Expected literal string, found {:?}", other)),
// } }
// } }
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
fn parse_data_type(&mut self) -> Result<SQLType, ParserError> { pub fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
match self.next_token() { match self.next_token() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"BOOLEAN" => Ok(SQLType::Boolean), "BOOLEAN" => Ok(SQLType::Boolean),
"FLOAT" => Ok(SQLType::Float(self.parse_precision()?)), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)),
"REAL" => Ok(SQLType::Real), "REAL" => Ok(SQLType::Real),
"DOUBLE" => Ok(SQLType::Double), "DOUBLE" => Ok(SQLType::Double),
"SMALLINT" => Ok(SQLType::SmallInt), "SMALLINT" => Ok(SQLType::SmallInt),
@ -433,12 +424,12 @@ impl Parser {
} }
} }
fn parse_precision(&mut self) -> Result<usize, ParserError> { pub fn parse_precision(&mut self) -> Result<usize, ParserError> {
//TODO: error handling //TODO: error handling
Ok(self.parse_optional_precision()?.unwrap()) Ok(self.parse_optional_precision()?.unwrap())
} }
fn parse_optional_precision(&mut self) -> Result<Option<usize>, ParserError> { pub fn parse_optional_precision(&mut self) -> Result<Option<usize>, ParserError> {
if self.consume_token(&Token::LParen)? { if self.consume_token(&Token::LParen)? {
let n = self.parse_literal_int()?; let n = self.parse_literal_int()?;
//TODO: check return value of reading rparen //TODO: check return value of reading rparen
@ -450,7 +441,7 @@ impl Parser {
} }
/// Parse a SELECT statement /// Parse a SELECT statement
fn parse_select(&mut self) -> Result<ASTNode, ParserError> { pub fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
let projection = self.parse_expr_list()?; let projection = self.parse_expr_list()?;
let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") { let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") {
@ -509,7 +500,7 @@ impl Parser {
} }
/// Parse a comma-delimited list of SQL expressions /// Parse a comma-delimited list of SQL expressions
fn parse_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> { pub fn parse_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
let mut expr_list: Vec<ASTNode> = vec![]; let mut expr_list: Vec<ASTNode> = vec![];
loop { loop {
expr_list.push(self.parse_expr(0)?); expr_list.push(self.parse_expr(0)?);
@ -528,7 +519,7 @@ impl Parser {
} }
/// Parse a comma-delimited list of SQL ORDER BY expressions /// Parse a comma-delimited list of SQL ORDER BY expressions
fn parse_order_by_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> { pub fn parse_order_by_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
let mut expr_list: Vec<ASTNode> = vec![]; let mut expr_list: Vec<ASTNode> = vec![];
loop { loop {
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
@ -575,7 +566,7 @@ impl Parser {
} }
/// Parse a LIMIT clause /// Parse a LIMIT clause
fn parse_limit(&mut self) -> Result<Option<Box<ASTNode>>, ParserError> { pub fn parse_limit(&mut self) -> Result<Option<Box<ASTNode>>, ParserError> {
if self.parse_keyword("ALL") { if self.parse_keyword("ALL") {
Ok(None) Ok(None)
} else { } else {
@ -845,6 +836,21 @@ mod tests {
//TODO: assertions //TODO: assertions
} }
#[test]
fn parse_literal_string() {
let sql = "SELECT 'one'";
match parse_sql(&sql) {
ASTNode::SQLSelect { ref projection, .. } => {
assert_eq!(
projection[0],
ASTNode::SQLLiteralString("one".to_string())
);
}
_ => panic!(),
}
}
#[test] #[test]
fn parse_select_version() { fn parse_select_version() {
let sql = "SELECT @@version"; let sql = "SELECT @@version";