Merge pull request #42 from andygrove/revert-37-keywords

Revert "Rework keyword/identifier parsing"
This commit is contained in:
Andy Grove 2019-03-08 07:28:07 -07:00 committed by GitHub
commit 4a355e6ddc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 695 additions and 791 deletions

View file

@ -1,8 +1,339 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct AnsiSqlDialect {}
impl Dialect for AnsiSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ABS,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
END_EXEC,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOWER,
MATCH,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}

View file

@ -1,7 +1,21 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct GenericSqlDialect {}
impl Dialect for GenericSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE,
DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS,
STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT,
REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC,
BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL,
CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
}

View file

@ -1,23 +1,12 @@
///! This module defines
/// 1) a list of constants for every keyword that
/// can appear in SQLWord::keyword:
/// pub const KEYWORD = "KEYWORD"
/// 2) an `ALL_KEYWORDS` array with every keyword in it
/// This is not a list of *reserved* keywords: some of these can be
/// parsed as identifiers if the parser decides so. This means that
/// new keywords can be added here without affecting the parse result.
///
/// As a matter of fact, most of these keywords are not used at all
/// and could be removed.
/// 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a
/// "table alias" context.
/// make a listing of keywords
/// with static str and their stringified value
macro_rules! keyword {
($($ident:ident),*) => {
$(pub const $ident: &'static str = stringify!($ident);)*
$(pub static $ident: &'static str = stringify!($ident);)*
}
}
/// enumerate all the keywords here for all dialects to support in this project
keyword!(
ABS,
ADD,
@ -363,358 +352,4 @@ keyword!(
);
/// special case of keyword where the it is an invalid identifier
pub const END_EXEC: &'static str = "END-EXEC";
pub const ALL_KEYWORDS: &'static [&'static str] = &[
ABS,
ADD,
ASC,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
BYTEA,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
COPY,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CSV,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESC,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HEADER,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
KEY,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LIMIT,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOCATION,
LOWER,
MATCH,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OBJECT,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PARQUET,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGCLASS,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
STDIN,
STORED,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
TEXT,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
UUID,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
ZONE,
END_EXEC,
];
/// These keywords can't be used as a table alias, so that `FROM table_name alias`
/// can be parsed unambiguously without looking ahead.
pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[
WHERE, GROUP, ON, // keyword is 'reserved' in most dialects
JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle
NATURAL, USING, // not reserved in Oracle & MSSQL
ORDER, // UNION, EXCEPT, INTERSECT, // TODO add these with tests.
];
pub static END_EXEC: &'static str = "END-EXEC";

View file

@ -8,6 +8,8 @@ pub use self::generic_sql::GenericSqlDialect;
pub use self::postgresql::PostgreSqlDialect;
pub trait Dialect {
/// Get a list of keywords for this dialect
fn keywords(&self) -> Vec<&'static str>;
/// Determine if a character is a valid identifier start character
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid identifier character

View file

@ -1,8 +1,24 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct PostgreSqlDialect {}
impl Dialect for PostgreSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT,
INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR,
NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL,
DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN,
DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY,
STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN,
THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING,
LIKE,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
}

View file

@ -25,18 +25,15 @@ pub use self::value::Value;
pub use self::sql_operator::SQLOperator;
// This could be enhanced to remember the way the identifier was quoted
pub type SQLIdent = String;
/// SQL Abstract Syntax Tree (AST)
#[derive(Debug, Clone, PartialEq)]
pub enum ASTNode {
/// Identifier e.g. table name or column name
SQLIdentifier(SQLIdent),
SQLIdentifier(String),
/// Wildcard e.g. `*`
SQLWildcard,
/// Multi part identifier e.g. `myschema.dbo.mytable`
SQLCompoundIdentifier(Vec<SQLIdent>),
SQLCompoundIdentifier(Vec<String>),
/// Assigment e.g. `name = 'Fred'` in an UPDATE statement
SQLAssignment(SQLAssignment),
/// `IS NULL` expression
@ -72,17 +69,12 @@ pub enum ASTNode {
results: Vec<ASTNode>,
else_result: Option<Box<ASTNode>>,
},
/// A table name or a parenthesized subquery with an optional alias
TableFactor {
relation: Box<ASTNode>, // SQLNested or SQLCompoundIdentifier
alias: Option<SQLIdent>,
},
/// SELECT
SQLSelect {
/// projection expressions
projection: Vec<ASTNode>,
/// FROM
relation: Option<Box<ASTNode>>, // TableFactor
relation: Option<Box<ASTNode>>,
// JOIN
joins: Vec<Join>,
/// WHERE
@ -101,7 +93,7 @@ pub enum ASTNode {
/// TABLE
table_name: String,
/// COLUMNS
columns: Vec<SQLIdent>,
columns: Vec<String>,
/// VALUES (vector of rows to insert)
values: Vec<Vec<ASTNode>>,
},
@ -109,7 +101,7 @@ pub enum ASTNode {
/// TABLE
table_name: String,
/// COLUMNS
columns: Vec<SQLIdent>,
columns: Vec<String>,
/// VALUES a vector of values to be copied
values: Vec<Option<String>>,
},
@ -196,13 +188,6 @@ impl ToString for ASTNode {
}
s + " END"
}
ASTNode::TableFactor { relation, alias } => {
if let Some(alias) = alias {
format!("{} AS {}", relation.to_string(), alias)
} else {
relation.to_string()
}
}
ASTNode::SQLSelect {
projection,
relation,
@ -381,21 +366,21 @@ impl ToString for SQLAssignment {
#[derive(Debug, Clone, PartialEq)]
pub struct SQLOrderByExpr {
pub expr: Box<ASTNode>,
pub asc: Option<bool>,
pub asc: bool,
}
impl SQLOrderByExpr {
pub fn new(expr: Box<ASTNode>, asc: Option<bool>) -> Self {
pub fn new(expr: Box<ASTNode>, asc: bool) -> Self {
SQLOrderByExpr { expr, asc }
}
}
impl ToString for SQLOrderByExpr {
fn to_string(&self) -> String {
match self.asc {
Some(true) => format!("{} ASC", self.expr.to_string()),
Some(false) => format!("{} DESC", self.expr.to_string()),
None => self.expr.to_string(),
if self.asc {
format!("{} ASC", self.expr.as_ref().to_string())
} else {
format!("{} DESC", self.expr.as_ref().to_string())
}
}
}
@ -403,7 +388,7 @@ impl ToString for SQLOrderByExpr {
/// SQL column definition
#[derive(Debug, Clone, PartialEq)]
pub struct SQLColumnDef {
pub name: SQLIdent,
pub name: String,
pub data_type: SQLType,
pub is_primary: bool,
pub is_unique: bool,
@ -432,7 +417,7 @@ impl ToString for SQLColumnDef {
#[derive(Debug, Clone, PartialEq)]
pub struct Join {
pub relation: ASTNode, // TableFactor
pub relation: ASTNode,
pub join_operator: JoinOperator,
}

View file

@ -1,5 +1,3 @@
use super::SQLIdent;
#[derive(Debug, PartialEq, Clone)]
pub enum AlterOperation {
AddConstraint(TableKey),
@ -19,8 +17,8 @@ impl ToString for AlterOperation {
#[derive(Debug, PartialEq, Clone)]
pub struct Key {
pub name: SQLIdent,
pub columns: Vec<SQLIdent>,
pub name: String,
pub columns: Vec<String>,
}
#[derive(Debug, PartialEq, Clone)]
@ -31,7 +29,7 @@ pub enum TableKey {
ForeignKey {
key: Key,
foreign_table: String,
referred_columns: Vec<SQLIdent>,
referred_columns: Vec<String>,
},
}

View file

@ -14,7 +14,6 @@
//! SQL Parser
use super::dialect::keywords;
use super::dialect::Dialect;
use super::sqlast::*;
use super::sqltokenizer::*;
@ -78,7 +77,9 @@ impl Parser {
break;
}
expr = self.parse_infix(expr, next_precedence)?;
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
expr = infix_expr;
}
}
Ok(expr)
}
@ -91,7 +92,7 @@ impl Parser {
loop {
// stop parsing on `NULL` | `NOT NULL`
match self.peek_token() {
Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break,
Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break,
_ => {}
}
@ -101,7 +102,9 @@ impl Parser {
break;
}
expr = self.parse_infix(expr, next_precedence)?;
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
expr = infix_expr;
}
}
Ok(expr)
}
@ -110,7 +113,7 @@ impl Parser {
pub fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
match self.next_token() {
Some(t) => match t {
Token::SQLWord(w) => match w.keyword.as_ref() {
Token::Keyword(k) => match k.to_uppercase().as_ref() {
"SELECT" => Ok(self.parse_select()?),
"CREATE" => Ok(self.parse_create()?),
"DELETE" => Ok(self.parse_delete()?),
@ -122,31 +125,38 @@ impl Parser {
self.parse_sql_value()
}
"CASE" => self.parse_case_expression(),
"CAST" => self.parse_cast_expression(),
"NOT" => Ok(ASTNode::SQLUnary {
operator: SQLOperator::Not,
expr: Box::new(self.parse_expr(0)?),
}),
_ => match self.peek_token() {
Some(Token::LParen) => self.parse_function(&w.value),
Some(Token::Period) => {
let mut id_parts: Vec<String> = vec![w.value];
while self.consume_token(&Token::Period) {
match self.next_token() {
Some(Token::SQLWord(w)) => id_parts.push(w.value),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
));
}
}
}
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
}
_ => Ok(ASTNode::SQLIdentifier(w.value)),
},
_ => return parser_err!(format!("No prefix parser for keyword {}", k)),
},
Token::Mult => Ok(ASTNode::SQLWildcard),
Token::Identifier(id) => {
if "CAST" == id.to_uppercase() {
self.parse_cast_expression()
} else {
match self.peek_token() {
Some(Token::LParen) => self.parse_function(&id),
Some(Token::Period) => {
let mut id_parts: Vec<String> = vec![id];
while self.peek_token() == Some(Token::Period) {
self.expect_token(&Token::Period)?;
match self.next_token() {
Some(Token::Identifier(id)) => id_parts.push(id),
_ => {
return parser_err!(format!(
"Error parsing compound identifier"
))
}
}
}
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
}
_ => Ok(ASTNode::SQLIdentifier(id)),
}
}
}
Token::Number(_) | Token::SingleQuotedString(_) => {
self.prev_token();
self.parse_sql_value()
@ -238,36 +248,40 @@ impl Parser {
}
/// Parse an expression infix (typically an operator)
pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result<ASTNode, ParserError> {
pub fn parse_infix(
&mut self,
expr: ASTNode,
precedence: u8,
) -> Result<Option<ASTNode>, ParserError> {
debug!("parsing infix");
match self.next_token() {
Some(tok) => match tok {
Token::SQLWord(ref k) if k.keyword == "IS" => {
Token::Keyword(ref k) if k == "IS" => {
if self.parse_keywords(vec!["NULL"]) {
Ok(ASTNode::SQLIsNull(Box::new(expr)))
Ok(Some(ASTNode::SQLIsNull(Box::new(expr))))
} else if self.parse_keywords(vec!["NOT", "NULL"]) {
Ok(ASTNode::SQLIsNotNull(Box::new(expr)))
Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr))))
} else {
parser_err!("Invalid tokens after IS")
}
}
Token::SQLWord(ref k) if k.keyword == "NOT" => {
Token::Keyword(ref k) if k == "NOT" => {
if self.parse_keywords(vec!["LIKE"]) {
Ok(ASTNode::SQLBinaryExpr {
Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: SQLOperator::NotLike,
right: Box::new(self.parse_expr(precedence)?),
})
}))
} else {
parser_err!("Invalid tokens after NOT")
}
}
Token::DoubleColon => {
let pg_cast = self.parse_pg_cast(expr)?;
Ok(pg_cast)
}
Token::SQLWord(_)
| Token::Eq
Token::Keyword(_) => Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
})),
Token::Eq
| Token::Neq
| Token::Gt
| Token::GtEq
@ -277,16 +291,18 @@ impl Parser {
| Token::Minus
| Token::Mult
| Token::Mod
| Token::Div => Ok(ASTNode::SQLBinaryExpr {
| Token::Div => Ok(Some(ASTNode::SQLBinaryExpr {
left: Box::new(expr),
op: self.to_sql_operator(&tok)?,
right: Box::new(self.parse_expr(precedence)?),
}),
})),
Token::DoubleColon => {
let pg_cast = self.parse_pg_cast(expr)?;
Ok(Some(pg_cast))
}
_ => parser_err!(format!("No infix parser for token {:?}", tok)),
},
// This is not supposed to happen, because of the precedence check
// in parse_expr.
None => parser_err!("Unexpected EOF in parse_infix"),
None => Ok(None),
}
}
@ -304,10 +320,10 @@ impl Parser {
&Token::Mult => Ok(SQLOperator::Multiply),
&Token::Div => Ok(SQLOperator::Divide),
&Token::Mod => Ok(SQLOperator::Modulus),
&Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And),
&Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or),
//&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not),
&Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like),
&Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And),
&Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or),
//&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not),
&Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like),
_ => parser_err!(format!("Unsupported SQL operator {:?}", tok)),
}
}
@ -326,11 +342,11 @@ impl Parser {
debug!("get_precedence() {:?}", tok);
match tok {
&Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5),
&Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10),
&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15),
&Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15),
&Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20),
&Token::Keyword(ref k) if k == "OR" => Ok(5),
&Token::Keyword(ref k) if k == "AND" => Ok(10),
&Token::Keyword(ref k) if k == "NOT" => Ok(15),
&Token::Keyword(ref k) if k == "IS" => Ok(15),
&Token::Keyword(ref k) if k == "LIKE" => Ok(20),
&Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => {
Ok(20)
}
@ -429,9 +445,13 @@ impl Parser {
#[must_use]
pub fn parse_keyword(&mut self, expected: &'static str) -> bool {
match self.peek_token() {
Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => {
self.next_token();
true
Some(Token::Keyword(k)) => {
if expected.eq_ignore_ascii_case(k.as_str()) {
self.next_token();
true
} else {
false
}
}
_ => false,
}
@ -502,7 +522,7 @@ impl Parser {
let mut columns = vec![];
if self.consume_token(&Token::LParen) {
loop {
if let Some(Token::SQLWord(column_name)) = self.next_token() {
if let Some(Token::Identifier(column_name)) = self.next_token() {
if let Ok(data_type) = self.parse_data_type() {
let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]);
let is_unique = self.parse_keyword("UNIQUE");
@ -525,7 +545,7 @@ impl Parser {
Some(Token::Comma) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name.value,
name: column_name,
data_type: data_type,
allow_null,
is_primary,
@ -536,7 +556,7 @@ impl Parser {
Some(Token::RParen) => {
self.next_token();
columns.push(SQLColumnDef {
name: column_name.value,
name: column_name,
data_type: data_type,
allow_null,
is_primary,
@ -590,16 +610,19 @@ impl Parser {
} else if is_unique_key {
Ok(TableKey::UniqueKey(key))
} else if is_foreign_key {
self.expect_keyword("REFERENCES")?;
let foreign_table = self.parse_tablename()?;
self.expect_token(&Token::LParen)?;
let referred_columns = self.parse_column_names()?;
self.expect_token(&Token::RParen)?;
Ok(TableKey::ForeignKey {
key,
foreign_table,
referred_columns,
})
if self.parse_keyword("REFERENCES") {
let foreign_table = self.parse_tablename()?;
self.expect_token(&Token::LParen)?;
let referred_columns = self.parse_column_names()?;
self.expect_token(&Token::RParen)?;
Ok(TableKey::ForeignKey {
key,
foreign_table,
referred_columns,
})
} else {
parser_err!("Expecting references")
}
} else {
parser_err!(format!(
"Expecting primary key, unique key, or foreign key, found: {:?}",
@ -609,33 +632,39 @@ impl Parser {
}
pub fn parse_alter(&mut self) -> Result<ASTNode, ParserError> {
self.expect_keyword("TABLE")?;
let _ = self.parse_keyword("ONLY");
let table_name = self.parse_tablename()?;
let operation: Result<AlterOperation, ParserError> =
if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) {
match self.next_token() {
Some(Token::SQLWord(ref id)) => {
let table_key = self.parse_table_key(&id.value)?;
Ok(AlterOperation::AddConstraint(table_key))
if self.parse_keyword("TABLE") {
let _ = self.parse_keyword("ONLY");
let table_name = self.parse_tablename()?;
let operation: Result<AlterOperation, ParserError> =
if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) {
match self.next_token() {
Some(Token::Identifier(ref id)) => {
let table_key = self.parse_table_key(id)?;
Ok(AlterOperation::AddConstraint(table_key))
}
_ => {
return parser_err!(format!(
"Expecting identifier, found : {:?}",
self.peek_token()
));
}
}
_ => {
return parser_err!(format!(
"Expecting identifier, found : {:?}",
self.peek_token()
));
}
}
} else {
return parser_err!(format!(
"Expecting ADD CONSTRAINT, found :{:?}",
self.peek_token()
));
};
Ok(ASTNode::SQLAlterTable {
name: table_name,
operation: operation?,
})
} else {
return parser_err!(format!(
"Expecting ADD CONSTRAINT, found :{:?}",
self.peek_token()
));
};
Ok(ASTNode::SQLAlterTable {
name: table_name,
operation: operation?,
})
} else {
parser_err!(format!(
"Expecting TABLE after ALTER, found {:?}",
self.peek_token()
))
}
}
/// Parse a copy statement
@ -688,10 +717,8 @@ impl Parser {
return Ok(values);
}
if let Some(token) = self.next_token() {
if let Token::SQLWord(SQLWord { value: v, .. }) = token {
if v == "N" {
values.push(None);
}
if token == Token::Identifier("N".to_string()) {
values.push(None);
}
} else {
continue;
@ -710,16 +737,11 @@ impl Parser {
match self.next_token() {
Some(t) => {
match t {
Token::SQLWord(k) => match k.keyword.as_ref() {
Token::Keyword(k) => match k.to_uppercase().as_ref() {
"TRUE" => Ok(Value::Boolean(true)),
"FALSE" => Ok(Value::Boolean(false)),
"NULL" => Ok(Value::Null),
_ => {
return parser_err!(format!(
"No value parser for keyword {}",
k.keyword
));
}
_ => return parser_err!(format!("No value parser for keyword {}", k)),
},
//TODO: parse the timestamp here (see parse_timestamp_value())
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
@ -851,7 +873,7 @@ impl Parser {
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
match self.next_token() {
Some(Token::SQLWord(k)) => match k.keyword.as_ref() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"BOOLEAN" => Ok(SQLType::Boolean),
"FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)),
"REAL" => Ok(SQLType::Real),
@ -936,92 +958,64 @@ impl Parser {
let (precision, scale) = self.parse_optional_precision_scale()?;
Ok(SQLType::Decimal(precision, scale))
}
_ => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
},
Some(Token::Identifier(_)) => {
self.prev_token();
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
Ok(SQLType::Custom(type_name))
}
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
}
}
/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
pub fn parse_optional_alias(
&mut self,
reserved_kwds: &[&str],
) -> Result<Option<SQLIdent>, ParserError> {
let after_as = self.parse_keyword("AS");
let maybe_alias = self.next_token();
match maybe_alias {
// Accept any identifier after `AS` (though many dialects have restrictions on
// keywords that may appear here). If there's no `AS`: don't parse keywords,
// which may start a construct allowed in this position, to be parsed as aliases.
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Some(Token::SQLWord(ref w))
if after_as || !reserved_kwds.contains(&w.keyword.as_str()) =>
{
// have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default
Ok(Some(w.value.clone()))
}
ref not_an_ident if after_as => parser_err!(format!(
"Expected an identifier after AS, got {:?}",
not_an_ident
)),
Some(_not_an_ident) => {
self.prev_token();
Ok(None) // no alias found
}
None => Ok(None),
}
}
/// Parse one or more identifiers with the specified separator between them
pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result<ASTNode, ParserError> {
let mut idents = vec![];
let mut expect_identifier = true;
loop {
let token = &self.next_token();
match token {
Some(Token::SQLWord(s)) if expect_identifier => {
expect_identifier = false;
idents.push(s.to_string());
}
Some(token) if token == separator && !expect_identifier => {
expect_identifier = true;
continue;
}
_ => {
if token.is_some() {
self.prev_token();
Some(token) => match token {
Token::Identifier(s) => {
if expect_identifier {
expect_identifier = false;
idents.push(s.to_string());
} else {
self.prev_token();
break;
}
}
token if token == separator => {
if expect_identifier {
return parser_err!(format!("Expecting identifier, found {:?}", token));
} else {
expect_identifier = true;
continue;
}
}
_ => {
self.prev_token();
break;
}
},
None => {
self.prev_token();
break;
}
}
}
if expect_identifier {
parser_err!(format!(
"Expecting identifier, found {:?}",
self.peek_token()
))
} else {
Ok(ASTNode::SQLCompoundIdentifier(idents))
}
Ok(ASTNode::SQLCompoundIdentifier(idents))
}
pub fn parse_tablename(&mut self) -> Result<String, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Period)?;
match identifier {
// TODO: should store the compound identifier itself
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")),
other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)),
}
}
pub fn parse_column_names(&mut self) -> Result<Vec<SQLIdent>, ParserError> {
pub fn parse_column_names(&mut self) -> Result<Vec<String>, ParserError> {
let identifier = self.parse_compound_identifier(&Token::Comma)?;
match identifier {
ASTNode::SQLCompoundIdentifier(idents) => Ok(idents),
@ -1096,7 +1090,7 @@ impl Parser {
let projection = self.parse_expr_list()?;
let (relation, joins): (Option<Box<ASTNode>>, Vec<Join>) = if self.parse_keyword("FROM") {
let relation = Some(Box::new(self.parse_table_factor()?));
let relation = Some(Box::new(self.parse_expr(0)?));
let joins = self.parse_joins()?;
(relation, joins)
} else {
@ -1155,21 +1149,6 @@ impl Parser {
}
}
/// A table name or a parenthesized subquery, followed by optional `[AS] alias`
pub fn parse_table_factor(&mut self) -> Result<ASTNode, ParserError> {
let relation = if self.consume_token(&Token::LParen) {
self.prev_token();
self.parse_expr(0)?
} else {
self.parse_compound_identifier(&Token::Period)?
};
let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
Ok(ASTNode::TableFactor {
relation: Box::new(relation),
alias,
})
}
fn parse_join_constraint(&mut self, natural: bool) -> Result<JoinConstraint, ParserError> {
if natural {
Ok(JoinConstraint::Natural)
@ -1177,20 +1156,26 @@ impl Parser {
let constraint = self.parse_expr(0)?;
Ok(JoinConstraint::On(constraint))
} else if self.parse_keyword("USING") {
self.expect_token(&Token::LParen)?;
let attributes = self
.parse_expr_list()?
.into_iter()
.map(|ast_node| match ast_node {
ASTNode::SQLIdentifier(ident) => Ok(ident),
unexpected => {
parser_err!(format!("Expected identifier, found {:?}", unexpected))
}
})
.collect::<Result<Vec<String>, ParserError>>()?;
if self.consume_token(&Token::LParen) {
let attributes = self
.parse_expr_list()?
.into_iter()
.map(|ast_node| match ast_node {
ASTNode::SQLIdentifier(ident) => Ok(ident),
unexpected => {
parser_err!(format!("Expected identifier, found {:?}", unexpected))
}
})
.collect::<Result<Vec<String>, ParserError>>()?;
self.expect_token(&Token::RParen)?;
Ok(JoinConstraint::Using(attributes))
if self.consume_token(&Token::RParen) {
Ok(JoinConstraint::Using(attributes))
} else {
parser_err!(format!("Expected token ')', found {:?}", self.peek_token()))
}
} else {
parser_err!(format!("Expected token '(', found {:?}", self.peek_token()))
}
} else {
parser_err!(format!(
"Unexpected token after JOIN: {:?}",
@ -1205,7 +1190,7 @@ impl Parser {
let natural = match &self.peek_token() {
Some(Token::Comma) => {
self.next_token();
let relation = self.parse_table_factor()?;
let relation = self.parse_expr(0)?;
let join = Join {
relation,
join_operator: JoinOperator::Implicit,
@ -1213,10 +1198,10 @@ impl Parser {
joins.push(join);
continue;
}
Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => {
Some(Token::Keyword(kw)) if kw == "CROSS" => {
self.next_token();
self.expect_keyword("JOIN")?;
let relation = self.parse_table_factor()?;
let relation = self.parse_expr(0)?;
let join = Join {
relation,
join_operator: JoinOperator::Cross,
@ -1224,7 +1209,7 @@ impl Parser {
joins.push(join);
continue;
}
Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => {
Some(Token::Keyword(kw)) if kw == "NATURAL" => {
self.next_token();
true
}
@ -1233,49 +1218,49 @@ impl Parser {
};
let join = match &self.peek_token() {
Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => {
Some(Token::Keyword(kw)) if kw == "INNER" => {
self.next_token();
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => {
Some(Token::Keyword(kw)) if kw == "JOIN" => {
self.next_token();
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => {
Some(Token::Keyword(kw)) if kw == "LEFT" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::LeftOuter(
self.parse_join_constraint(natural)?,
),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => {
Some(Token::Keyword(kw)) if kw == "RIGHT" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::RightOuter(
self.parse_join_constraint(natural)?,
),
}
}
Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => {
Some(Token::Keyword(kw)) if kw == "FULL" => {
self.next_token();
let _ = self.parse_keyword("OUTER");
self.expect_keyword("JOIN")?;
Join {
relation: self.parse_table_factor()?,
relation: self.parse_expr(0)?,
join_operator: JoinOperator::FullOuter(
self.parse_join_constraint(natural)?,
),
@ -1336,19 +1321,33 @@ impl Parser {
loop {
let expr = self.parse_expr(0)?;
let asc = if self.parse_keyword("ASC") {
Some(true)
} else if self.parse_keyword("DESC") {
Some(false)
} else {
None
// look for optional ASC / DESC specifier
let asc = match self.peek_token() {
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
"ASC" => {
self.next_token();
true
}
"DESC" => {
self.next_token();
false
}
_ => true,
},
Some(Token::Comma) => true,
_ => true,
};
expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc));
if let Some(Token::Comma) = self.peek_token() {
self.next_token();
if let Some(t) = self.peek_token() {
if t == Token::Comma {
self.next_token();
} else {
break;
}
} else {
// EOF
break;
}
}

View file

@ -21,20 +21,23 @@
use std::iter::Peekable;
use std::str::Chars;
use super::dialect::keywords::ALL_KEYWORDS;
use super::dialect::Dialect;
/// SQL Token enumeration
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
/// A keyword (like SELECT) or an optionally quoted SQL identifier
SQLWord(SQLWord),
/// SQL identifier e.g. table or column name
Identifier(String),
/// SQL keyword e.g. Keyword("SELECT")
Keyword(String),
/// Numeric literal
Number(String),
/// A character that could not be tokenized
Char(char),
/// Single quoted string: i.e: 'string'
SingleQuotedString(String),
/// Double quoted string: i.e: "string"
DoubleQuotedString(String),
/// Comma
Comma,
/// Whitespace (space, tab, etc)
@ -90,10 +93,12 @@ pub enum Token {
impl ToString for Token {
fn to_string(&self) -> String {
match self {
Token::SQLWord(ref w) => w.to_string(),
Token::Identifier(ref id) => id.to_string(),
Token::Keyword(ref k) => k.to_string(),
Token::Number(ref n) => n.to_string(),
Token::Char(ref c) => c.to_string(),
Token::SingleQuotedString(ref s) => format!("'{}'", s),
Token::DoubleQuotedString(ref s) => format!("\"{}\"", s),
Token::Comma => ",".to_string(),
Token::Whitespace(ws) => ws.to_string(),
Token::Eq => "=".to_string(),
@ -123,54 +128,6 @@ impl ToString for Token {
}
}
impl Token {
pub fn make_keyword(keyword: &str) -> Self {
Token::make_word(keyword, None)
}
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
let word_uppercase = word.to_uppercase();
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
Token::SQLWord(SQLWord {
value: word.to_string(),
quote_style: quote_style,
keyword: if is_keyword {
word_uppercase.to_string()
} else {
"".to_string()
},
})
}
}
/// A keyword (like SELECT) or an optionally quoted SQL identifier
#[derive(Debug, Clone, PartialEq)]
pub struct SQLWord {
/// The value of the token, without the enclosing quotes, and with the
/// escape sequences (if any) processed (TODO: escapes are not handled)
pub value: String,
/// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
/// The standard and most implementations allow using double quotes for this,
/// but some implementations support other quoting styles as well (e.g. \[MS SQL])
pub quote_style: Option<char>,
/// If the word was not quoted and it matched one of the known keywords,
/// this will have one of the values from dialect::keywords, otherwise empty
pub keyword: String,
}
impl ToString for SQLWord {
fn to_string(&self) -> String {
match self.quote_style {
Some('"') => format!("\"{}\"", self.value),
Some('[') => format!("[{}]", self.value),
None => self.value.clone(),
_ => panic!("Unexpected quote_style!"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Whitespace {
Space,
@ -211,6 +168,13 @@ impl<'a> Tokenizer<'a> {
}
}
fn is_keyword(&self, s: &str) -> bool {
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
return self.dialect.keywords().contains(&s);
}
/// Tokenize the statement and produce a vector of tokens
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut peekable = self.query.chars().peekable();
@ -225,10 +189,11 @@ impl<'a> Tokenizer<'a> {
}
Token::Whitespace(Whitespace::Tab) => self.col += 4,
Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64,
Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
Token::Identifier(s) => self.col += s.len() as u64,
Token::Keyword(s) => self.col += s.len() as u64,
Token::Number(s) => self.col += s.len() as u64,
Token::SingleQuotedString(s) => self.col += s.len() as u64,
Token::DoubleQuotedString(s) => self.col += s.len() as u64,
_ => self.col += 1,
}
@ -267,12 +232,16 @@ impl<'a> Tokenizer<'a> {
break;
}
}
Ok(Some(Token::make_word(&s, None)))
let upper_str = s.to_uppercase();
if self.is_keyword(upper_str.as_str()) {
Ok(Some(Token::Keyword(upper_str)))
} else {
Ok(Some(Token::Identifier(s)))
}
}
// string
'\'' => {
//TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote
let mut s = String::new();
chars.next(); // consume
@ -306,7 +275,7 @@ impl<'a> Tokenizer<'a> {
}
}
}
Ok(Some(Token::make_word(&s, Some('"'))))
Ok(Some(Token::DoubleQuotedString(s)))
}
// numbers
'0'...'9' => {
@ -420,7 +389,7 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Keyword(String::from("SELECT")),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
];
@ -436,9 +405,9 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Keyword(String::from("SELECT")),
Token::Whitespace(Whitespace::Space),
Token::make_word("sqrt", None),
Token::Identifier(String::from("sqrt")),
Token::LParen,
Token::Number(String::from("1")),
Token::RParen,
@ -455,23 +424,23 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Keyword(String::from("SELECT")),
Token::Whitespace(Whitespace::Space),
Token::Mult,
Token::Whitespace(Whitespace::Space),
Token::make_keyword("FROM"),
Token::Keyword(String::from("FROM")),
Token::Whitespace(Whitespace::Space),
Token::make_word("customer", None),
Token::Identifier(String::from("customer")),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("WHERE"),
Token::Keyword(String::from("WHERE")),
Token::Whitespace(Whitespace::Space),
Token::make_word("id", None),
Token::Identifier(String::from("id")),
Token::Whitespace(Whitespace::Space),
Token::Eq,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("LIMIT"),
Token::Keyword(String::from("LIMIT")),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("5")),
];
@ -487,17 +456,17 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Keyword(String::from("SELECT")),
Token::Whitespace(Whitespace::Space),
Token::Mult,
Token::Whitespace(Whitespace::Space),
Token::make_keyword("FROM"),
Token::Keyword(String::from("FROM")),
Token::Whitespace(Whitespace::Space),
Token::make_word("customer", None),
Token::Identifier(String::from("customer")),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("WHERE"),
Token::Keyword(String::from("WHERE")),
Token::Whitespace(Whitespace::Space),
Token::make_word("salary", None),
Token::Identifier(String::from("salary")),
Token::Whitespace(Whitespace::Space),
Token::Neq,
Token::Whitespace(Whitespace::Space),
@ -522,7 +491,7 @@ mod tests {
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::make_word("h", None),
Token::Identifier("h".to_string()),
];
compare(expected, tokens);
}
@ -538,20 +507,20 @@ mod tests {
let expected = vec![
Token::Whitespace(Whitespace::Newline),
Token::Whitespace(Whitespace::Newline),
Token::make_keyword("SELECT"),
Token::Keyword("SELECT".into()),
Token::Whitespace(Whitespace::Space),
Token::Mult,
Token::Whitespace(Whitespace::Space),
Token::make_keyword("FROM"),
Token::Keyword("FROM".into()),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("table"),
Token::Keyword("TABLE".into()),
Token::Whitespace(Whitespace::Tab),
Token::Char('م'),
Token::Char('ص'),
Token::Char('ط'),
Token::Char('ف'),
Token::Char('ى'),
Token::make_word("h", None),
Token::Identifier("h".to_string()),
];
compare(expected, tokens);
}
@ -564,11 +533,11 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_word("a", None),
Token::Identifier(String::from("a")),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("IS"),
Token::Keyword("IS".to_string()),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("NULL"),
Token::Keyword("NULL".to_string()),
];
compare(expected, tokens);

View file

@ -229,33 +229,27 @@ fn parse_not_like() {
#[test]
fn parse_select_order_by() {
fn chk(sql: &str) {
match verified(&sql) {
ASTNode::SQLSelect { order_by, .. } => {
assert_eq!(
Some(vec![
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())),
asc: Some(true),
},
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())),
asc: Some(false),
},
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("id".to_string())),
asc: None,
},
]),
order_by
);
}
_ => assert!(false),
let sql = String::from(
"SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC",
);
match verified(&sql) {
ASTNode::SQLSelect { order_by, .. } => {
assert_eq!(
Some(vec![
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())),
asc: true,
},
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())),
asc: false,
},
]),
order_by
);
}
_ => assert!(false),
}
chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id");
// make sure ORDER is not treated as an alias
chk("SELECT id, fname, lname FROM customer ORDER BY lname ASC, fname DESC, id");
}
#[test]
@ -272,11 +266,11 @@ fn parse_select_order_by_limit() {
Some(vec![
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())),
asc: Some(true),
asc: true,
},
SQLOrderByExpr {
expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())),
asc: Some(false),
asc: false,
},
]),
order_by
@ -541,10 +535,7 @@ fn parse_implicit_join() {
assert_eq!(
joins[0],
Join {
relation: ASTNode::TableFactor {
relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])),
alias: None,
},
relation: ASTNode::SQLIdentifier("t2".to_string()),
join_operator: JoinOperator::Implicit
}
)
@ -563,10 +554,7 @@ fn parse_cross_join() {
assert_eq!(
joins[0],
Join {
relation: ASTNode::TableFactor {
relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])),
alias: None,
},
relation: ASTNode::SQLIdentifier("t2".to_string()),
join_operator: JoinOperator::Cross
}
)
@ -579,14 +567,10 @@ fn parse_cross_join() {
fn parse_joins_on() {
fn join_with_constraint(
relation: impl Into<String>,
alias: Option<SQLIdent>,
f: impl Fn(JoinConstraint) -> JoinOperator,
) -> Join {
Join {
relation: ASTNode::TableFactor {
relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])),
alias,
},
relation: ASTNode::SQLIdentifier(relation.into()),
join_operator: f(JoinConstraint::On(ASTNode::SQLBinaryExpr {
left: Box::new(ASTNode::SQLIdentifier("c1".into())),
op: SQLOperator::Eq,
@ -594,35 +578,21 @@ fn parse_joins_on() {
})),
}
}
// Test parsing of aliases
assert_eq!(
joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")),
vec![join_with_constraint(
"t2",
Some("foo".to_string()),
JoinOperator::Inner
)]
);
parses_to(
"SELECT * FROM t1 JOIN t2 foo ON c1 = c2",
"SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2",
);
// Test parsing of different join operators
assert_eq!(
joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")),
vec![join_with_constraint("t2", None, JoinOperator::Inner)]
vec![join_with_constraint("t2", JoinOperator::Inner)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")),
vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)]
vec![join_with_constraint("t2", JoinOperator::LeftOuter)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")),
vec![join_with_constraint("t2", None, JoinOperator::RightOuter)]
vec![join_with_constraint("t2", JoinOperator::RightOuter)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")),
vec![join_with_constraint("t2", None, JoinOperator::FullOuter)]
vec![join_with_constraint("t2", JoinOperator::FullOuter)]
);
}
@ -630,46 +600,29 @@ fn parse_joins_on() {
fn parse_joins_using() {
fn join_with_constraint(
relation: impl Into<String>,
alias: Option<SQLIdent>,
f: impl Fn(JoinConstraint) -> JoinOperator,
) -> Join {
Join {
relation: ASTNode::TableFactor {
relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])),
alias,
},
relation: ASTNode::SQLIdentifier(relation.into()),
join_operator: f(JoinConstraint::Using(vec!["c1".into()])),
}
}
// Test parsing of aliases
assert_eq!(
joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")),
vec![join_with_constraint(
"t2",
Some("foo".to_string()),
JoinOperator::Inner
)]
);
parses_to(
"SELECT * FROM t1 JOIN t2 foo USING(c1)",
"SELECT * FROM t1 JOIN t2 AS foo USING(c1)",
);
// Test parsing of different join operators
assert_eq!(
joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")),
vec![join_with_constraint("t2", None, JoinOperator::Inner)]
vec![join_with_constraint("t2", JoinOperator::Inner)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")),
vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)]
vec![join_with_constraint("t2", JoinOperator::LeftOuter)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")),
vec![join_with_constraint("t2", None, JoinOperator::RightOuter)]
vec![join_with_constraint("t2", JoinOperator::RightOuter)]
);
assert_eq!(
joins_from(verified("SELECT * FROM t1 FULL JOIN t2 USING(c1)")),
vec![join_with_constraint("t2", None, JoinOperator::FullOuter)]
vec![join_with_constraint("t2", JoinOperator::FullOuter)]
);
}

View file

@ -13,11 +13,20 @@ fn test_prev_index() {
let sql: &str = "SELECT version()";
let mut parser = parser(sql);
assert_eq!(parser.prev_token(), None);
assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
assert_eq!(parser.prev_token(), Some(Token::make_word("version", None)));
assert_eq!(parser.peek_token(), Some(Token::make_word("version", None)));
assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT")));
assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into())));
assert_eq!(
parser.next_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(
parser.prev_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(
parser.peek_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into())));
assert_eq!(parser.prev_token(), None);
}
@ -103,13 +112,6 @@ fn parse_invalid_table_name() {
assert!(ast.is_err());
}
#[test]
fn parse_no_table_name() {
let mut parser = parser("");
let ast = parser.parse_tablename();
assert!(ast.is_err());
}
#[test]
fn parse_insert_with_columns() {
let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)");