Rework keyword/identifier parsing (1/8)

Fold Token::{Keyword, Identifier, DoubleQuotedString} into one
Token::SQLWord, which has the necessary information (was it a
known keyword and/or was it quoted).

This lets the parser easily accept DoubleQuotedString (a quoted
identifier) everywhere it expects an Identifier in the same match
arm. (To complete support of quoted identifiers, or "delimited
identifiers" as the spec calls them, a TODO in parse_tablename()
ought to be addressed.)

    As an aside, per <https://en.wikibooks.org/wiki/SQL_Dialects_Reference/Data_structure_definition/Delimited_identifiers>
    sqlite seems to be the only one supporting 'identifier'
    (which is rather hairy, since it can also be a string
    literal), and `identifier` seems only to be supported by
    MySQL. I didn't implement either one.

This also allows the use of `parse`/`expect_keyword` machinery
for non-reserved keywords: previously they relied on the keyword
being a Token::Keyword, which wasn't a Token::Identifier, and so
wasn't accepted as one.

Now whether a keyword can be used as an identifier can be decided
by the parser. (I didn't add a blacklist of "reserved" keywords,
so that any keyword which doesn't have a special meaning in the
parser could be used as an identifier. The list of keywords in
the dialect could be re-used for that purpose at a later stage.)
This commit is contained in:
Nickolay Ponomarev 2019-01-12 19:45:08 +03:00
parent eb4b5bc686
commit 9a8b6a8e64
7 changed files with 164 additions and 131 deletions

View file

@ -12,7 +12,7 @@ impl Dialect for GenericSqlDialect {
CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT,
REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC,
BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL,
CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, CAST,
]; ];
} }

View file

@ -15,7 +15,7 @@ impl Dialect for PostgreSqlDialect {
DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY,
STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN,
THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING,
LIKE, LIKE, CAST,
]; ];
} }

View file

@ -25,15 +25,18 @@ pub use self::value::Value;
pub use self::sql_operator::SQLOperator; pub use self::sql_operator::SQLOperator;
// This could be enhanced to remember the way the identifier was quoted
pub type SQLIdent = String;
/// SQL Abstract Syntax Tree (AST) /// SQL Abstract Syntax Tree (AST)
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum ASTNode { pub enum ASTNode {
/// Identifier e.g. table name or column name /// Identifier e.g. table name or column name
SQLIdentifier(String), SQLIdentifier(SQLIdent),
/// Wildcard e.g. `*` /// Wildcard e.g. `*`
SQLWildcard, SQLWildcard,
/// Multi part identifier e.g. `myschema.dbo.mytable` /// Multi part identifier e.g. `myschema.dbo.mytable`
SQLCompoundIdentifier(Vec<String>), SQLCompoundIdentifier(Vec<SQLIdent>),
/// Assigment e.g. `name = 'Fred'` in an UPDATE statement /// Assigment e.g. `name = 'Fred'` in an UPDATE statement
SQLAssignment(SQLAssignment), SQLAssignment(SQLAssignment),
/// `IS NULL` expression /// `IS NULL` expression
@ -93,7 +96,7 @@ pub enum ASTNode {
/// TABLE /// TABLE
table_name: String, table_name: String,
/// COLUMNS /// COLUMNS
columns: Vec<String>, columns: Vec<SQLIdent>,
/// VALUES (vector of rows to insert) /// VALUES (vector of rows to insert)
values: Vec<Vec<ASTNode>>, values: Vec<Vec<ASTNode>>,
}, },
@ -101,7 +104,7 @@ pub enum ASTNode {
/// TABLE /// TABLE
table_name: String, table_name: String,
/// COLUMNS /// COLUMNS
columns: Vec<String>, columns: Vec<SQLIdent>,
/// VALUES a vector of values to be copied /// VALUES a vector of values to be copied
values: Vec<Option<String>>, values: Vec<Option<String>>,
}, },
@ -388,7 +391,7 @@ impl ToString for SQLOrderByExpr {
/// SQL column definition /// SQL column definition
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct SQLColumnDef { pub struct SQLColumnDef {
pub name: String, pub name: SQLIdent,
pub data_type: SQLType, pub data_type: SQLType,
pub is_primary: bool, pub is_primary: bool,
pub is_unique: bool, pub is_unique: bool,

View file

@ -1,3 +1,5 @@
use super::SQLIdent;
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum AlterOperation { pub enum AlterOperation {
AddConstraint(TableKey), AddConstraint(TableKey),
@ -17,8 +19,8 @@ impl ToString for AlterOperation {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct Key { pub struct Key {
pub name: String, pub name: SQLIdent,
pub columns: Vec<String>, pub columns: Vec<SQLIdent>,
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
@ -29,7 +31,7 @@ pub enum TableKey {
ForeignKey { ForeignKey {
key: Key, key: Key,
foreign_table: String, foreign_table: String,
referred_columns: Vec<String>, referred_columns: Vec<SQLIdent>,
}, },
} }

View file

@ -90,7 +90,7 @@ impl Parser {
loop { loop {
// stop parsing on `NULL` | `NOT NULL` // stop parsing on `NULL` | `NOT NULL`
match self.peek_token() { match self.peek_token() {
Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break, Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break,
_ => {} _ => {}
} }
@ -109,7 +109,7 @@ impl Parser {
pub fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> { pub fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
match self.next_token() { match self.next_token() {
Some(t) => match t { Some(t) => match t {
Token::Keyword(k) => match k.to_uppercase().as_ref() { Token::SQLWord(w) => match w.keyword.as_ref() {
"SELECT" => Ok(self.parse_select()?), "SELECT" => Ok(self.parse_select()?),
"CREATE" => Ok(self.parse_create()?), "CREATE" => Ok(self.parse_create()?),
"DELETE" => Ok(self.parse_delete()?), "DELETE" => Ok(self.parse_delete()?),
@ -121,38 +121,31 @@ impl Parser {
self.parse_sql_value() self.parse_sql_value()
} }
"CASE" => self.parse_case_expression(), "CASE" => self.parse_case_expression(),
"CAST" => self.parse_cast_expression(),
"NOT" => Ok(ASTNode::SQLUnary { "NOT" => Ok(ASTNode::SQLUnary {
operator: SQLOperator::Not, operator: SQLOperator::Not,
expr: Box::new(self.parse_expr(0)?), expr: Box::new(self.parse_expr(0)?),
}), }),
_ => return parser_err!(format!("No prefix parser for keyword {}", k)), _ => match self.peek_token() {
}, Some(Token::LParen) => self.parse_function(&w.value),
Token::Mult => Ok(ASTNode::SQLWildcard), Some(Token::Period) => {
Token::Identifier(id) => { let mut id_parts: Vec<String> = vec![w.value];
if "CAST" == id.to_uppercase() { while self.consume_token(&Token::Period) {
self.parse_cast_expression() match self.next_token() {
} else { Some(Token::SQLWord(w)) => id_parts.push(w.value),
match self.peek_token() { _ => {
Some(Token::LParen) => self.parse_function(&id), return parser_err!(format!(
Some(Token::Period) => { "Error parsing compound identifier"
let mut id_parts: Vec<String> = vec![id]; ));
while self.peek_token() == Some(Token::Period) {
self.expect_token(&Token::Period)?;
match self.next_token() {
Some(Token::Identifier(id)) => id_parts.push(id),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
))
}
} }
} }
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
} }
_ => Ok(ASTNode::SQLIdentifier(id)), Ok(ASTNode::SQLCompoundIdentifier(id_parts))
} }
} _ => Ok(ASTNode::SQLIdentifier(w.value)),
} },
},
Token::Mult => Ok(ASTNode::SQLWildcard),
Token::Number(_) | Token::SingleQuotedString(_) => { Token::Number(_) | Token::SingleQuotedString(_) => {
self.prev_token(); self.prev_token();
self.parse_sql_value() self.parse_sql_value()
@ -248,7 +241,7 @@ impl Parser {
debug!("parsing infix"); debug!("parsing infix");
match self.next_token() { match self.next_token() {
Some(tok) => match tok { Some(tok) => match tok {
Token::Keyword(ref k) if k == "IS" => { Token::SQLWord(ref k) if k.keyword == "IS" => {
if self.parse_keywords(vec!["NULL"]) { if self.parse_keywords(vec!["NULL"]) {
Ok(ASTNode::SQLIsNull(Box::new(expr))) Ok(ASTNode::SQLIsNull(Box::new(expr)))
} else if self.parse_keywords(vec!["NOT", "NULL"]) { } else if self.parse_keywords(vec!["NOT", "NULL"]) {
@ -257,7 +250,7 @@ impl Parser {
parser_err!("Invalid tokens after IS") parser_err!("Invalid tokens after IS")
} }
} }
Token::Keyword(ref k) if k == "NOT" => { Token::SQLWord(ref k) if k.keyword == "NOT" => {
if self.parse_keywords(vec!["LIKE"]) { if self.parse_keywords(vec!["LIKE"]) {
Ok(ASTNode::SQLBinaryExpr { Ok(ASTNode::SQLBinaryExpr {
left: Box::new(expr), left: Box::new(expr),
@ -272,7 +265,7 @@ impl Parser {
let pg_cast = self.parse_pg_cast(expr)?; let pg_cast = self.parse_pg_cast(expr)?;
Ok(pg_cast) Ok(pg_cast)
} }
Token::Keyword(_) Token::SQLWord(_)
| Token::Eq | Token::Eq
| Token::Neq | Token::Neq
| Token::Gt | Token::Gt
@ -310,10 +303,10 @@ impl Parser {
&Token::Mult => Ok(SQLOperator::Multiply), &Token::Mult => Ok(SQLOperator::Multiply),
&Token::Div => Ok(SQLOperator::Divide), &Token::Div => Ok(SQLOperator::Divide),
&Token::Mod => Ok(SQLOperator::Modulus), &Token::Mod => Ok(SQLOperator::Modulus),
&Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And), &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And),
&Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or), &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or),
//&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not), //&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not),
&Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like), &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like),
_ => parser_err!(format!("Unsupported SQL operator {:?}", tok)), _ => parser_err!(format!("Unsupported SQL operator {:?}", tok)),
} }
} }
@ -332,11 +325,11 @@ impl Parser {
debug!("get_precedence() {:?}", tok); debug!("get_precedence() {:?}", tok);
match tok { match tok {
&Token::Keyword(ref k) if k == "OR" => Ok(5), &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5),
&Token::Keyword(ref k) if k == "AND" => Ok(10), &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10),
&Token::Keyword(ref k) if k == "NOT" => Ok(15), &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15),
&Token::Keyword(ref k) if k == "IS" => Ok(15), &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15),
&Token::Keyword(ref k) if k == "LIKE" => Ok(20), &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20),
&Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => {
Ok(20) Ok(20)
} }
@ -435,13 +428,9 @@ impl Parser {
#[must_use] #[must_use]
pub fn parse_keyword(&mut self, expected: &'static str) -> bool { pub fn parse_keyword(&mut self, expected: &'static str) -> bool {
match self.peek_token() { match self.peek_token() {
Some(Token::Keyword(k)) => { Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => {
if expected.eq_ignore_ascii_case(k.as_str()) { self.next_token();
self.next_token(); true
true
} else {
false
}
} }
_ => false, _ => false,
} }
@ -512,7 +501,7 @@ impl Parser {
let mut columns = vec![]; let mut columns = vec![];
if self.consume_token(&Token::LParen) { if self.consume_token(&Token::LParen) {
loop { loop {
if let Some(Token::Identifier(column_name)) = self.next_token() { if let Some(Token::SQLWord(column_name)) = self.next_token() {
if let Ok(data_type) = self.parse_data_type() { if let Ok(data_type) = self.parse_data_type() {
let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]);
let is_unique = self.parse_keyword("UNIQUE"); let is_unique = self.parse_keyword("UNIQUE");
@ -535,7 +524,7 @@ impl Parser {
Some(Token::Comma) => { Some(Token::Comma) => {
self.next_token(); self.next_token();
columns.push(SQLColumnDef { columns.push(SQLColumnDef {
name: column_name, name: column_name.value,
data_type: data_type, data_type: data_type,
allow_null, allow_null,
is_primary, is_primary,
@ -546,7 +535,7 @@ impl Parser {
Some(Token::RParen) => { Some(Token::RParen) => {
self.next_token(); self.next_token();
columns.push(SQLColumnDef { columns.push(SQLColumnDef {
name: column_name, name: column_name.value,
data_type: data_type, data_type: data_type,
allow_null, allow_null,
is_primary, is_primary,
@ -628,8 +617,8 @@ impl Parser {
let operation: Result<AlterOperation, ParserError> = let operation: Result<AlterOperation, ParserError> =
if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) {
match self.next_token() { match self.next_token() {
Some(Token::Identifier(ref id)) => { Some(Token::SQLWord(ref id)) => {
let table_key = self.parse_table_key(id)?; let table_key = self.parse_table_key(&id.value)?;
Ok(AlterOperation::AddConstraint(table_key)) Ok(AlterOperation::AddConstraint(table_key))
} }
_ => { _ => {
@ -707,8 +696,10 @@ impl Parser {
return Ok(values); return Ok(values);
} }
if let Some(token) = self.next_token() { if let Some(token) = self.next_token() {
if token == Token::Identifier("N".to_string()) { if let Token::SQLWord(SQLWord { value: v, .. }) = token {
values.push(None); if v == "N" {
values.push(None);
}
} }
} else { } else {
continue; continue;
@ -727,11 +718,16 @@ impl Parser {
match self.next_token() { match self.next_token() {
Some(t) => { Some(t) => {
match t { match t {
Token::Keyword(k) => match k.to_uppercase().as_ref() { Token::SQLWord(k) => match k.keyword.as_ref() {
"TRUE" => Ok(Value::Boolean(true)), "TRUE" => Ok(Value::Boolean(true)),
"FALSE" => Ok(Value::Boolean(false)), "FALSE" => Ok(Value::Boolean(false)),
"NULL" => Ok(Value::Null), "NULL" => Ok(Value::Null),
_ => return parser_err!(format!("No value parser for keyword {}", k)), _ => {
return parser_err!(format!(
"No value parser for keyword {}",
k.keyword
));
}
}, },
//TODO: parse the timestamp here (see parse_timestamp_value()) //TODO: parse the timestamp here (see parse_timestamp_value())
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() { Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
@ -863,7 +859,7 @@ impl Parser {
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<SQLType, ParserError> { pub fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
match self.next_token() { match self.next_token() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { Some(Token::SQLWord(k)) => match k.keyword.as_ref() {
"BOOLEAN" => Ok(SQLType::Boolean), "BOOLEAN" => Ok(SQLType::Boolean),
"FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)),
"REAL" => Ok(SQLType::Real), "REAL" => Ok(SQLType::Real),
@ -948,13 +944,12 @@ impl Parser {
let (precision, scale) = self.parse_optional_precision_scale()?; let (precision, scale) = self.parse_optional_precision_scale()?;
Ok(SQLType::Decimal(precision, scale)) Ok(SQLType::Decimal(precision, scale))
} }
_ => parser_err!(format!("Invalid data type '{:?}'", k)), _ => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
}, },
Some(Token::Identifier(_)) => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
other => parser_err!(format!("Invalid data type: '{:?}'", other)), other => parser_err!(format!("Invalid data type: '{:?}'", other)),
} }
} }
@ -966,7 +961,7 @@ impl Parser {
let token = &self.next_token(); let token = &self.next_token();
match token { match token {
Some(token) => match token { Some(token) => match token {
Token::Identifier(s) => { Token::SQLWord(s) => {
if expect_identifier { if expect_identifier {
expect_identifier = false; expect_identifier = false;
idents.push(s.to_string()); idents.push(s.to_string());
@ -1000,12 +995,13 @@ impl Parser {
pub fn parse_tablename(&mut self) -> Result<String, ParserError> { pub fn parse_tablename(&mut self) -> Result<String, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Period)?; let identifier = self.parse_compound_identifier(&Token::Period)?;
match identifier { match identifier {
// TODO: should store the compound identifier itself
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")), ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")),
other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)),
} }
} }
pub fn parse_column_names(&mut self) -> Result<Vec<String>, ParserError> { pub fn parse_column_names(&mut self) -> Result<Vec<SQLIdent>, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Comma)?; let identifier = self.parse_compound_identifier(&Token::Comma)?;
match identifier { match identifier {
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents), ASTNode::SQLCompoundIdentifier(idents) => Ok(idents),
@ -1188,7 +1184,7 @@ impl Parser {
joins.push(join); joins.push(join);
continue; continue;
} }
Some(Token::Keyword(kw)) if kw == "CROSS" => { Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => {
self.next_token(); self.next_token();
self.expect_keyword("JOIN")?; self.expect_keyword("JOIN")?;
let relation = self.parse_expr(0)?; let relation = self.parse_expr(0)?;
@ -1199,7 +1195,7 @@ impl Parser {
joins.push(join); joins.push(join);
continue; continue;
} }
Some(Token::Keyword(kw)) if kw == "NATURAL" => { Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => {
self.next_token(); self.next_token();
true true
} }
@ -1208,7 +1204,7 @@ impl Parser {
}; };
let join = match &self.peek_token() { let join = match &self.peek_token() {
Some(Token::Keyword(kw)) if kw == "INNER" => { Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => {
self.next_token(); self.next_token();
self.expect_keyword("JOIN")?; self.expect_keyword("JOIN")?;
Join { Join {
@ -1216,14 +1212,14 @@ impl Parser {
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
} }
} }
Some(Token::Keyword(kw)) if kw == "JOIN" => { Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => {
self.next_token(); self.next_token();
Join { Join {
relation: self.parse_expr(0)?, relation: self.parse_expr(0)?,
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
} }
} }
Some(Token::Keyword(kw)) if kw == "LEFT" => { Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => {
self.next_token(); self.next_token();
let _ = self.parse_keyword("OUTER"); let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?; self.expect_keyword("JOIN")?;
@ -1234,7 +1230,7 @@ impl Parser {
), ),
} }
} }
Some(Token::Keyword(kw)) if kw == "RIGHT" => { Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => {
self.next_token(); self.next_token();
let _ = self.parse_keyword("OUTER"); let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?; self.expect_keyword("JOIN")?;
@ -1245,7 +1241,7 @@ impl Parser {
), ),
} }
} }
Some(Token::Keyword(kw)) if kw == "FULL" => { Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => {
self.next_token(); self.next_token();
let _ = self.parse_keyword("OUTER"); let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?; self.expect_keyword("JOIN")?;

View file

@ -26,18 +26,14 @@ use super::dialect::Dialect;
/// SQL Token enumeration /// SQL Token enumeration
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
/// SQL identifier e.g. table or column name /// A keyword (like SELECT) or an optionally quoted SQL identifier
Identifier(String), SQLWord(SQLWord),
/// SQL keyword e.g. Keyword("SELECT")
Keyword(String),
/// Numeric literal /// Numeric literal
Number(String), Number(String),
/// A character that could not be tokenized /// A character that could not be tokenized
Char(char), Char(char),
/// Single quoted string: i.e: 'string' /// Single quoted string: i.e: 'string'
SingleQuotedString(String), SingleQuotedString(String),
/// Double quoted string: i.e: "string"
DoubleQuotedString(String),
/// Comma /// Comma
Comma, Comma,
/// Whitespace (space, tab, etc) /// Whitespace (space, tab, etc)
@ -93,12 +89,10 @@ pub enum Token {
impl ToString for Token { impl ToString for Token {
fn to_string(&self) -> String { fn to_string(&self) -> String {
match self { match self {
Token::Identifier(ref id) => id.to_string(), Token::SQLWord(ref w) => w.to_string(),
Token::Keyword(ref k) => k.to_string(),
Token::Number(ref n) => n.to_string(), Token::Number(ref n) => n.to_string(),
Token::Char(ref c) => c.to_string(), Token::Char(ref c) => c.to_string(),
Token::SingleQuotedString(ref s) => format!("'{}'", s), Token::SingleQuotedString(ref s) => format!("'{}'", s),
Token::DoubleQuotedString(ref s) => format!("\"{}\"", s),
Token::Comma => ",".to_string(), Token::Comma => ",".to_string(),
Token::Whitespace(ws) => ws.to_string(), Token::Whitespace(ws) => ws.to_string(),
Token::Eq => "=".to_string(), Token::Eq => "=".to_string(),
@ -128,6 +122,49 @@ impl ToString for Token {
} }
} }
impl Token {
pub fn make_keyword(keyword: &str) -> Self {
Token::SQLWord(SQLWord {
value: keyword.to_string(),
quote_style: None,
keyword: keyword.to_uppercase().to_string(),
})
}
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
Token::SQLWord(SQLWord {
value: word.to_string(),
quote_style: quote_style,
keyword: "".to_string(),
})
}
}
/// A keyword (like SELECT) or an optionally quoted SQL identifier
#[derive(Debug, Clone, PartialEq)]
pub struct SQLWord {
/// The value of the token, without the enclosing quotes, and with the
/// escape sequences (if any) processed (TODO: escapes are not handled)
pub value: String,
/// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
/// The standard and most implementations allow using double quotes for this,
/// but some implementations support other quoting styles as well (e.g. \[MS SQL])
pub quote_style: Option<char>,
/// If the word was not quoted and it matched one of the known keywords,
/// this will have one of the values from dialect::keywords, otherwise empty
pub keyword: String,
}
impl ToString for SQLWord {
fn to_string(&self) -> String {
match self.quote_style {
Some('"') => format!("\"{}\"", self.value),
Some('[') => format!("[{}]", self.value),
None => self.value.clone(),
_ => panic!("Unexpected quote_style!"),
}
}
}
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Whitespace { pub enum Whitespace {
Space, Space,
@ -189,11 +226,10 @@ impl<'a> Tokenizer<'a> {
} }
Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Whitespace(Whitespace::Tab) => self.col += 4,
Token::Identifier(s) => self.col += s.len() as u64, Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64,
Token::Keyword(s) => self.col += s.len() as u64, Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
Token::Number(s) => self.col += s.len() as u64, Token::Number(s) => self.col += s.len() as u64,
Token::SingleQuotedString(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64,
Token::DoubleQuotedString(s) => self.col += s.len() as u64,
_ => self.col += 1, _ => self.col += 1,
} }
@ -234,14 +270,19 @@ impl<'a> Tokenizer<'a> {
} }
let upper_str = s.to_uppercase(); let upper_str = s.to_uppercase();
if self.is_keyword(upper_str.as_str()) { if self.is_keyword(upper_str.as_str()) {
Ok(Some(Token::Keyword(upper_str))) Ok(Some(Token::SQLWord(SQLWord {
value: s,
quote_style: None,
keyword: upper_str,
})))
} else { } else {
Ok(Some(Token::Identifier(s))) Ok(Some(Token::make_word(&s, None)))
} }
} }
// string // string
'\'' => { '\'' => {
//TODO: handle escaped quotes in string //TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote //TODO: handle EOF before terminating quote
let mut s = String::new(); let mut s = String::new();
chars.next(); // consume chars.next(); // consume
@ -275,7 +316,7 @@ impl<'a> Tokenizer<'a> {
} }
} }
} }
Ok(Some(Token::DoubleQuotedString(s))) Ok(Some(Token::make_word(&s, Some('"'))))
} }
// numbers // numbers
'0'...'9' => { '0'...'9' => {
@ -389,7 +430,7 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")), Token::Number(String::from("1")),
]; ];
@ -405,9 +446,9 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("sqrt")), Token::make_word("sqrt", None),
Token::LParen, Token::LParen,
Token::Number(String::from("1")), Token::Number(String::from("1")),
Token::RParen, Token::RParen,
@ -424,23 +465,23 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("FROM")), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("customer")), Token::make_word("customer", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("WHERE")), Token::make_keyword("WHERE"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("id")), Token::make_word("id", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Eq, Token::Eq,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")), Token::Number(String::from("1")),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("LIMIT")), Token::make_keyword("LIMIT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("5")), Token::Number(String::from("5")),
]; ];
@ -456,17 +497,17 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("FROM")), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("customer")), Token::make_word("customer", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("WHERE")), Token::make_keyword("WHERE"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("salary")), Token::make_word("salary", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Neq, Token::Neq,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
@ -491,7 +532,7 @@ mod tests {
Token::Char('ط'), Token::Char('ط'),
Token::Char('ف'), Token::Char('ف'),
Token::Char('ى'), Token::Char('ى'),
Token::Identifier("h".to_string()), Token::make_word("h", None),
]; ];
compare(expected, tokens); compare(expected, tokens);
} }
@ -507,20 +548,20 @@ mod tests {
let expected = vec![ let expected = vec![
Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline),
Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline),
Token::Keyword("SELECT".into()), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("FROM".into()), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("TABLE".into()), Token::make_keyword("table"),
Token::Whitespace(Whitespace::Tab), Token::Whitespace(Whitespace::Tab),
Token::Char('م'), Token::Char('م'),
Token::Char('ص'), Token::Char('ص'),
Token::Char('ط'), Token::Char('ط'),
Token::Char('ف'), Token::Char('ف'),
Token::Char('ى'), Token::Char('ى'),
Token::Identifier("h".to_string()), Token::make_word("h", None),
]; ];
compare(expected, tokens); compare(expected, tokens);
} }
@ -533,11 +574,11 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Identifier(String::from("a")), Token::make_word("a", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("IS".to_string()), Token::make_keyword("IS"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("NULL".to_string()), Token::make_keyword("NULL"),
]; ];
compare(expected, tokens); compare(expected, tokens);

View file

@ -13,20 +13,11 @@ fn test_prev_index() {
let sql: &str = "SELECT version()"; let sql: &str = "SELECT version()";
let mut parser = parser(sql); let mut parser = parser(sql);
assert_eq!(parser.prev_token(), None); assert_eq!(parser.prev_token(), None);
assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into()))); assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
assert_eq!( assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
parser.next_token(), assert_eq!(parser.prev_token(), Some(Token::make_word("version", None)));
Some(Token::Identifier("version".into())) assert_eq!(parser.peek_token(), Some(Token::make_word("version", None)));
); assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT")));
assert_eq!(
parser.prev_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(
parser.peek_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into())));
assert_eq!(parser.prev_token(), None); assert_eq!(parser.prev_token(), None);
} }