Revert "Rework keyword/identifier parsing"

This commit is contained in:
Andy Grove 2019-03-08 07:27:27 -07:00 committed by GitHub
parent 391c53d5c9
commit 43f4c6875f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 695 additions and 791 deletions

View file

@ -14,7 +14,6 @@
//! SQL Parser
use super::dialect::keywords;
use super::dialect::Dialect;
use super::sqlast::*;
use super::sqltokenizer::*;
@ -78,7 +77,9 @@ impl Parser {
break;
}
expr = self.parse_infix(expr, next_precedence)?;
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
expr = infix_expr;
}
}
Ok(expr)
}
@ -91,7 +92,7 @@ impl Parser {
loop {
// stop parsing on `NULL` | `NOT NULL`
match self.peek_token() {
Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break,
Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break,
_ => {}
}
@ -101,7 +102,9 @@ impl Parser {
break;
}
expr = self.parse_infix(expr, next_precedence)?;
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
expr = infix_expr;
}
}
Ok(expr)
}
@ -110,7 +113,7 @@ impl Parser {
pub fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
match self.next_token() {
Some(t) => match t {
Token::SQLWord(w) => match w.keyword.as_ref() {
Token::Keyword(k) => match k.to_uppercase().as_ref() {
"SELECT" => Ok(self.parse_select()?),
"CREATE" => Ok(self.parse_create()?),
"DELETE" => Ok(self.parse_delete()?),
@ -122,31 +125,38 @@ impl Parser {
self.parse_sql_value()
}
"CASE" => self.parse_case_expression(),
"CAST" => self.parse_cast_expression(),
"NOT" => Ok(ASTNode::SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(self.parse_expr(0)?),
}),
_ => match self.peek_token() {
Some(Token::LParen) => self.parse_function(&w.value),
Some(Token::Period) => {
let mut id_parts: Vec<String> = vec![w.value];
while self.consume_token(&Token::Period) {
match self.next_token() {
Some(Token::SQLWord(w)) => id_parts.push(w.value),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
));
}
}
}
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
}
_ => Ok(ASTNode::SQLIdentifier(w.value)),
},
_ => return parser_err!(format!("No prefix parser for keyword {}", k)),
},
Token::Mult => Ok(ASTNode::SQLWildcard),
Token::Identifier(id) => {
if "CAST" == id.to_uppercase() {
self.parse_cast_expression()
} else {
match self.peek_token() {
Some(Token::LParen) => self.parse_function(&id),
Some(Token::Period) => {
let mut id_parts: Vec<String> = vec![id];
while self.peek_token() == Some(Token::Period) {
self.expect_token(&Token::Period)?;
match self.next_token() {
Some(Token::Identifier(id)) => id_parts.push(id),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
))
}
}
}
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
}
_ => Ok(ASTNode::SQLIdentifier(id)),
}
}
}
Token::Number(_) | Token::SingleQuotedString(_) => {
self.prev_token();
self.parse_sql_value()
@ -238,36 +248,40 @@ impl Parser {
}
/// Parse an expression infix (typically an operator)
pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result<ASTNode, ParserError> {
pub fn parse_infix(
&mut self,
expr: ASTNode,
precedence: u8,
) -> Result<Option<ASTNode>, ParserError> {
debug!("parsing infix");
match self.next_token() {
Some(tok) => match tok {
Token::SQLWord(ref k) if k.keyword == "IS" => {
Token::Keyword(ref k) if k == "IS" => {
if self.parse_keywords(vec!["NULL"]) {
Ok(ASTNode::SQLIsNull(Box::new(expr)))
Ok(Some(ASTNode::SQLIsNull(Box::new(expr))))
} else if self.parse_keywords(vec!["NOT", "NULL"]) {
Ok(ASTNode::SQLIsNotNull(Box::new(expr)))
Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr))))
} else {
parser_err!("Invalid tokens after IS")
}
}
Token::SQLWord(ref k) if k.keyword == "NOT" => {
Token::Keyword(ref k) if k == "NOT" => {
if self.parse_keywords(vec!["LIKE"]) {
Ok(ASTNode::SQLBinaryExpr {
Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: SQLOperator::NotLike,
right: Box::new(self.parse_expr(precedence)?),
})
}))
} else {
parser_err!("Invalid tokens after NOT")
}
}
Token::DoubleColon => {
let pg_cast = self.parse_pg_cast(expr)?;
Ok(pg_cast)
}
Token::SQLWord(_)
| Token::Eq
Token::Keyword(_) => Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
})),
Token::Eq
| Token::Neq
| Token::Gt
| Token::GtEq
@ -277,16 +291,18 @@ impl Parser {
| Token::Minus
| Token::Mult
| Token::Mod
| Token::Div => Ok(ASTNode::SQLBinaryExpr {
| Token::Div => Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
}),
})),
Token::DoubleColon => {
let pg_cast = self.parse_pg_cast(expr)?;
Ok(Some(pg_cast))
}
_ => parser_err!(format!("No infix parser for token {:?}", tok)),
},
// This is not supposed to happen, because of the precedence check
// in parse_expr.
None => parser_err!("Unexpected EOF in parse_infix"),
None => Ok(None),
}
}
@ -304,10 +320,10 @@ impl Parser {
&Token::Mult => Ok(SQLOperator::Multiply),
&Token::Div => Ok(SQLOperator::Divide),
&Token::Mod => Ok(SQLOperator::Modulus),
&Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And),
&Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or),
//&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not),
&Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like),
&Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And),
&Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or),
//&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not),
&Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like),
_ => parser_err!(format!("Unsupported SQL operator {:?}", tok)),
}
}
@ -326,11 +342,11 @@ impl Parser {
debug!("get_precedence() {:?}", tok);
match tok {
&Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5),
&Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10),
&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15),
&Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15),
&Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20),
&Token::Keyword(ref k) if k == "OR" => Ok(5),
&Token::Keyword(ref k) if k == "AND" => Ok(10),
&Token::Keyword(ref k) if k == "NOT" => Ok(15),
&Token::Keyword(ref k) if k == "IS" => Ok(15),
&Token::Keyword(ref k) if k == "LIKE" => Ok(20),
&Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => {
Ok(20)
}
@ -429,9 +445,13 @@ impl Parser {
#[must_use]
pub fn parse_keyword(&mut self, expected: &'static str) -> bool {
match self.peek_token() {
Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => {
self.next_token();
true
Some(Token::Keyword(k)) => {
if expected.eq_ignore_ascii_case(k.as_str()) {
self.next_token();
true
} else {
false
}
}
_ => false,
}
@ -502,7 +522,7 @@ impl Parser {
let mut columns = vec![];
if self.consume_token(&Token::LParen) {
loop {
if let Some(Token::SQLWord(column_name)) = self.next_token() {
if let Some(Token::Identifier(column_name)) = self.next_token() {
if let Ok(data_type) = self.parse_data_type() {
let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]);
let is_unique = self.parse_keyword("UNIQUE");
@ -525,7 +545,7 @@ impl Parser {
Some(Token::Comma) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name.value,
name: column_name,
data_type: data_type,
allow_null,
is_primary,
@ -536,7 +556,7 @@ impl Parser {
Some(Token::RParen) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name.value,
name: column_name,
data_type: data_type,
allow_null,
is_primary,
@ -590,16 +610,19 @@ impl Parser {
} else if is_unique_key {
Ok(TableKey::UniqueKey(key))
} else if is_foreign_key {
self.expect_keyword("REFERENCES")?;
let foreign_table = self.parse_tablename()?;
self.expect_token(&Token::LParen)?;
let referred_columns = self.parse_column_names()?;
self.expect_token(&Token::RParen)?;
Ok(TableKey::ForeignKey {
key,
foreign_table,
referred_columns,
})
if self.parse_keyword("REFERENCES") {
let foreign_table = self.parse_tablename()?;
self.expect_token(&Token::LParen)?;
let referred_columns = self.parse_column_names()?;
self.expect_token(&Token::RParen)?;
Ok(TableKey::ForeignKey {
key,
foreign_table,
referred_columns,
})
} else {
parser_err!("Expecting references")
}
} else {
parser_err!(format!(
"Expecting primary key, unique key, or foreign key, found: {:?}",
@ -609,33 +632,39 @@ impl Parser {
}
pub fn parse_alter(&mut self) -> Result<ASTNode, ParserError> {
self.expect_keyword("TABLE")?;
let _ = self.parse_keyword("ONLY");
let table_name = self.parse_tablename()?;
let operation: Result<AlterOperation, ParserError> =
if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) {
match self.next_token() {
Some(Token::SQLWord(ref id)) => {
let table_key = self.parse_table_key(&id.value)?;
Ok(AlterOperation::AddConstraint(table_key))
if self.parse_keyword("TABLE") {
let _ = self.parse_keyword("ONLY");
let table_name = self.parse_tablename()?;
let operation: Result<AlterOperation, ParserError> =
if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) {
match self.next_token() {
Some(Token::Identifier(ref id)) => {
let table_key = self.parse_table_key(id)?;
Ok(AlterOperation::AddConstraint(table_key))
}
_ => {
return parser_err!(format!(
"Expecting identifier, found : {:?}",
self.peek_token()
));
}
}
_ => {
return parser_err!(format!(
"Expecting identifier, found : {:?}",
self.peek_token()
));
}
}
} else {
return parser_err!(format!(
"Expecting ADD CONSTRAINT, found :{:?}",
self.peek_token()
));
};
Ok(ASTNode::SQLAlterTable {
name: table_name,
operation: operation?,
})
} else {
return parser_err!(format!(
"Expecting ADD CONSTRAINT, found :{:?}",
self.peek_token()
));
};
Ok(ASTNode::SQLAlterTable {
name: table_name,
operation: operation?,
})
} else {
parser_err!(format!(
"Expecting TABLE after ALTER, found {:?}",
self.peek_token()
))
}
}
/// Parse a copy statement
@ -688,10 +717,8 @@ impl Parser {
return Ok(values);
}
if let Some(token) = self.next_token() {
if let Token::SQLWord(SQLWord { value: v, .. }) = token {
if v == "N" {
values.push(None);
}
if token == Token::Identifier("N".to_string()) {
values.push(None);
}
} else {
continue;
@ -710,16 +737,11 @@ impl Parser {
match self.next_token() {
Some(t) => {
match t {
Token::SQLWord(k) => match k.keyword.as_ref() {
Token::Keyword(k) => match k.to_uppercase().as_ref() {
"TRUE" => Ok(Value::Boolean(true)),
"FALSE" => Ok(Value::Boolean(false)),
"NULL" => Ok(Value::Null),
_ => {
return parser_err!(format!(
"No value parser for keyword {}",
k.keyword
));
}
_ => return parser_err!(format!("No value parser for keyword {}", k)),
},
//TODO: parse the timestamp here (see parse_timestamp_value())
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
@ -851,7 +873,7 @@ impl Parser {
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
match self.next_token() {
Some(Token::SQLWord(k)) => match k.keyword.as_ref() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"BOOLEAN" => Ok(SQLType::Boolean),
"FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)),
"REAL" => Ok(SQLType::Real),
@ -936,92 +958,64 @@ impl Parser {
let (precision, scale) = self.parse_optional_precision_scale()?;
Ok(SQLType::Decimal(precision, scale))
}
_ => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
},
Some(Token::Identifier(_)) => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
}
}
/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
pub fn parse_optional_alias(
&mut self,
reserved_kwds: &[&str],
) -> Result<Option<SQLIdent>, ParserError> {
let after_as = self.parse_keyword("AS");
let maybe_alias = self.next_token();
match maybe_alias {
// Accept any identifier after `AS` (though many dialects have restrictions on
// keywords that may appear here). If there's no `AS`: don't parse keywords,
// which may start a construct allowed in this position, to be parsed as aliases.
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Some(Token::SQLWord(ref w))
if after_as || !reserved_kwds.contains(&w.keyword.as_str()) =>
{
// have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default
Ok(Some(w.value.clone()))
}
ref not_an_ident if after_as => parser_err!(format!(
"Expected an identifier after AS, got {:?}",
not_an_ident
)),
Some(_not_an_ident) => {
self.prev_token();
Ok(None) // no alias found
}
None => Ok(None),
}
}
/// Parse one or more identifiers with the specified separator between them
pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result<ASTNode, ParserError> {
let mut idents = vec![];
let mut expect_identifier = true;
loop {
let token = &self.next_token();
match token {
Some(Token::SQLWord(s)) if expect_identifier => {
expect_identifier = false;
idents.push(s.to_string());
}
Some(token) if token == separator && !expect_identifier => {
expect_identifier = true;
continue;
}
_ => {
if token.is_some() {
self.prev_token();
Some(token) => match token {
Token::Identifier(s) => {
if expect_identifier {
expect_identifier = false;
idents.push(s.to_string());
} else {
self.prev_token();
break;
}
}
token if token == separator => {
if expect_identifier {
return parser_err!(format!("Expecting identifier, found {:?}", token));
} else {
expect_identifier = true;
continue;
}
}
_ => {
self.prev_token();
break;
}
},
None => {
self.prev_token();
break;
}
}
}
if expect_identifier {
parser_err!(format!(
"Expecting identifier, found {:?}",
self.peek_token()
))
} else {
Ok(ASTNode::SQLCompoundIdentifier(idents))
}
Ok(ASTNode::SQLCompoundIdentifier(idents))
}
pub fn parse_tablename(&mut self) -> Result<String, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Period)?;
match identifier {
// TODO: should store the compound identifier itself
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")),
other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)),
}
}
pub fn parse_column_names(&mut self) -> Result<Vec<SQLIdent>, ParserError> {
pub fn parse_column_names(&mut self) -> Result<Vec<String>, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Comma)?;
match identifier {
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents),
@ -1096,7 +1090,7 @@ impl Parser {
let projection = self.parse_expr_list()?;
let (relation, joins): (Option<Box<ASTNode>>, Vec<Join>) = if self.parse_keyword("FROM") {
let relation = Some(Box::new(self.parse_table_factor()?));
let relation = Some(Box::new(self.parse_expr(0)?));
let joins = self.parse_joins()?;
(relation, joins)
} else {
@ -1155,21 +1149,6 @@ impl Parser {
}
}
/// A table name or a parenthesized subquery, followed by optional `[AS] alias`
pub fn parse_table_factor(&mut self) -> Result<ASTNode, ParserError> {
let relation = if self.consume_token(&Token::LParen) {
self.prev_token();
self.parse_expr(0)?
} else {
self.parse_compound_identifier(&Token::Period)?
};
let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
Ok(ASTNode::TableFactor {
relation: Box::new(relation),
alias,
})
}
fn parse_join_constraint(&mut self, natural: bool) -> Result<JoinConstraint, ParserError> {
if natural {
Ok(JoinConstraint::Natural)
@ -1177,20 +1156,26 @@ impl Parser {
let constraint = self.parse_expr(0)?;
Ok(JoinConstraint::On(constraint))
} else if self.parse_keyword("USING") {
self.expect_token(&Token::LParen)?;
let attributes = self
.parse_expr_list()?
.into_iter()
.map(|ast_node| match ast_node {
ASTNode::SQLIdentifier(ident) => Ok(ident),
unexpected => {
parser_err!(format!("Expected identifier, found {:?}", unexpected))
}
})
.collect::<Result<Vec<String>, ParserError>>()?;
if self.consume_token(&Token::LParen) {
let attributes = self
.parse_expr_list()?
.into_iter()
.map(|ast_node| match ast_node {
ASTNode::SQLIdentifier(ident) => Ok(ident),
unexpected => {
parser_err!(format!("Expected identifier, found {:?}", unexpected))
}
})
.collect::<Result<Vec<String>, ParserError>>()?;
self.expect_token(&Token::RParen)?;
Ok(JoinConstraint::Using(attributes))
if self.consume_token(&Token::RParen) {
Ok(JoinConstraint::Using(attributes))
} else {
parser_err!(format!("Expected token ')', found {:?}", self.peek_token()))
}
} else {
parser_err!(format!("Expected token '(', found {:?}", self.peek_token()))
}
} else {
parser_err!(format!(
"Unexpected token after JOIN: {:?}",
@ -1205,7 +1190,7 @@ impl Parser {
let natural = match &self.peek_token() {
Some(Token::Comma) => {
self.next_token();
let relation = self.parse_table_factor()?;
let relation = self.parse_expr(0)?;
let join = Join {
relation,
join_operator: JoinOperator::Implicit,
@ -1213,10 +1198,10 @@ impl Parser {
joins.push(join);
continue;
}
Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => {
Some(Token::Keyword(kw)) if kw == "CROSS" => {
self.next_token();
self.expect_keyword("JOIN")?;
let relation = self.parse_table_factor()?;
let relation = self.parse_expr(0)?;
let join = Join {
relation,
join_operator: JoinOperator::Cross,
@ -1224,7 +1209,7 @@ impl Parser {
joins.push(join);
continue;
}
Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => {
Some(Token::Keyword(kw)) if kw == "NATURAL" => {
self.next_token();
true
}
@ -1233,49 +1218,49 @@ impl Parser {
};
let join = match &self.peek_token() {
Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => {
Some(Token::Keyword(kw)) if kw == "INNER" => {
self.next_token();
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => {
Some(Token::Keyword(kw)) if kw == "JOIN" => {
self.next_token();
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => {
Some(Token::Keyword(kw)) if kw == "LEFT" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::LeftOuter(
self.parse_join_constraint(natural)?,
),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => {
Some(Token::Keyword(kw)) if kw == "RIGHT" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::RightOuter(
self.parse_join_constraint(natural)?,
),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => {
Some(Token::Keyword(kw)) if kw == "FULL" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::FullOuter(
self.parse_join_constraint(natural)?,
),
@ -1336,19 +1321,33 @@ impl Parser {
loop {
let expr = self.parse_expr(0)?;
let asc = if self.parse_keyword("ASC") {
Some(true)
} else if self.parse_keyword("DESC") {
Some(false)
} else {
None
// look for optional ASC / DESC specifier
let asc = match self.peek_token() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"ASC" => {
self.next_token();
true
}
"DESC" => {
self.next_token();
false
}
_ => true,
},
Some(Token::Comma) => true,
_ => true,
};
expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc));
if let Some(Token::Comma) = self.peek_token() {
self.next_token();
if let Some(t) = self.peek_token() {
if t == Token::Comma {
self.next_token();
} else {
break;
}
} else {
// EOF
break;
}
}