Remove dialect-specific keyword lists (2/8)

Now populating SQLWord.keyword based on the list of globally supported
keywords.
This commit is contained in:
Nickolay Ponomarev 2019-01-30 03:59:13 +03:00
parent 9a8b6a8e64
commit f87230553e
6 changed files with 372 additions and 391 deletions

View file

@ -1,339 +1,8 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct AnsiSqlDialect {}
impl Dialect for AnsiSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ABS,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
END_EXEC,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOWER,
MATCH,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}

View file

@ -1,21 +1,7 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct GenericSqlDialect {}
impl Dialect for GenericSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE,
DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS,
STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT,
REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC,
BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL,
CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, CAST,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
}

View file

@ -1,12 +1,21 @@
/// make a listing of keywords
/// with static str and their stringified value
///! This module defines a list of constants for every keyword that
/// can appear in SQLWord::keyword:
/// pub const KEYWORD = "KEYWORD"
/// and an `ALL_KEYWORDS` array with every keyword in it.
///
/// This is not a list of *reserved* keywords: some of these can be
/// parsed as identifiers if the parser decides so. This means that
/// new keywords can be added here without affecting the parse result.
///
/// As a matter of fact, most of these keywords are not used at all
/// and could be removed.
macro_rules! keyword {
($($ident:ident),*) => {
$(pub static $ident: &'static str = stringify!($ident);)*
$(pub const $ident: &'static str = stringify!($ident);)*
}
}
/// enumerate all the keywords here for all dialects to support in this project
keyword!(
ABS,
ADD,
@ -352,4 +361,349 @@ keyword!(
);
/// special case of keyword where the it is an invalid identifier
pub static END_EXEC: &'static str = "END-EXEC";
pub const END_EXEC: &'static str = "END-EXEC";
pub const ALL_KEYWORDS: &'static [&'static str] = &[
ABS,
ADD,
ASC,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
BYTEA,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
COPY,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CSV,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESC,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HEADER,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
KEY,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LIMIT,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOCATION,
LOWER,
MATCH,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OBJECT,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PARQUET,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGCLASS,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
STDIN,
STORED,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
TEXT,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
UUID,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
ZONE,
END_EXEC,
];

View file

@ -8,8 +8,6 @@ pub use self::generic_sql::GenericSqlDialect;
pub use self::postgresql::PostgreSqlDialect;
pub trait Dialect {
/// Get a list of keywords for this dialect
fn keywords(&self) -> Vec<&'static str>;
/// Determine if a character is a valid identifier start character
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid identifier character

View file

@ -1,24 +1,8 @@
use dialect::Dialect;
use dialect::keywords::*;
pub struct PostgreSqlDialect {}
impl Dialect for PostgreSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT,
INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR,
NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL,
DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN,
DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY,
STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN,
THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING,
LIKE, CAST,
];
}
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
}

View file

@ -21,6 +21,7 @@
use std::iter::Peekable;
use std::str::Chars;
use super::dialect::keywords::ALL_KEYWORDS;
use super::dialect::Dialect;
/// SQL Token enumeration
@ -124,17 +125,22 @@ impl ToString for Token {
impl Token {
pub fn make_keyword(keyword: &str) -> Self {
Token::SQLWord(SQLWord {
value: keyword.to_string(),
quote_style: None,
keyword: keyword.to_uppercase().to_string(),
})
Token::make_word(keyword, None)
}
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
let word_uppercase = word.to_uppercase();
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
Token::SQLWord(SQLWord {
value: word.to_string(),
quote_style: quote_style,
keyword: "".to_string(),
keyword: if is_keyword {
word_uppercase.to_string()
} else {
"".to_string()
},
})
}
}
@ -205,13 +211,6 @@ impl<'a> Tokenizer<'a> {
}
}
fn is_keyword(&self, s: &str) -> bool {
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
return self.dialect.keywords().contains(&s);
}
/// Tokenize the statement and produce a vector of tokens
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut peekable = self.query.chars().peekable();
@ -268,16 +267,7 @@ impl<'a> Tokenizer<'a> {
break;
}
}
let upper_str = s.to_uppercase();
if self.is_keyword(upper_str.as_str()) {
Ok(Some(Token::SQLWord(SQLWord {
value: s,
quote_style: None,
keyword: upper_str,
})))
} else {
Ok(Some(Token::make_word(&s, None)))
}
Ok(Some(Token::make_word(&s, None)))
}
// string
'\'' => {