Merge pull request #43 from nickolay/master

This commit is contained in:
Andy Grove 2019-04-02 21:27:30 -06:00 committed by GitHub
commit d1b5668fd3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 2665 additions and 1569 deletions

View file

@ -1,7 +1,7 @@
[package] [package]
name = "sqlparser" name = "sqlparser"
description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011"
version = "0.2.5-alpha.0" version = "0.3.0"
authors = ["Andy Grove <andygrove73@gmail.com>"] authors = ["Andy Grove <andygrove73@gmail.com>"]
homepage = "https://github.com/andygrove/sqlparser-rs" homepage = "https://github.com/andygrove/sqlparser-rs"
documentation = "https://docs.rs/sqlparser/" documentation = "https://docs.rs/sqlparser/"
@ -21,3 +21,6 @@ path = "src/lib.rs"
log = "0.4.5" log = "0.4.5"
chrono = "0.4.6" chrono = "0.4.6"
uuid = "0.7.1" uuid = "0.7.1"
[dev-dependencies]
simple_logger = "1.0.1"

View file

@ -30,7 +30,7 @@ println!("AST: {:?}", ast);
This outputs This outputs
```rust ```rust
AST: SQLSelect { projection: [SQLIdentifier("a"), SQLIdentifier("b"), SQLLiteralLong(123), SQLFunction { id: "myfunc", args: [SQLIdentifier("b")] }], relation: Some(SQLIdentifier("table_1")), selection: Some(SQLBinaryExpr { left: SQLBinaryExpr { left: SQLIdentifier("a"), op: Gt, right: SQLIdentifier("b") }, op: And, right: SQLBinaryExpr { left: SQLIdentifier("b"), op: Lt, right: SQLLiteralLong(100) } }), order_by: Some([SQLOrderBy { expr: SQLIdentifier("a"), asc: false }, SQLOrderBy { expr: SQLIdentifier("b"), asc: true }]), group_by: None, having: None, limit: None } AST: [SQLSelect(SQLSelect { projection: [SQLIdentifier("a"), SQLIdentifier("b"), SQLValue(Long(123)), SQLFunction { id: "myfunc", args: [SQLIdentifier("b")] }], relation: Some(Table { name: SQLObjectName(["table_1"]), alias: None }), joins: [], selection: Some(SQLBinaryExpr { left: SQLBinaryExpr { left: SQLIdentifier("a"), op: Gt, right: SQLIdentifier("b") }, op: And, right: SQLBinaryExpr { left: SQLIdentifier("b"), op: Lt, right: SQLValue(Long(100)) } }), order_by: Some([SQLOrderByExpr { expr: SQLIdentifier("a"), asc: Some(false) }, SQLOrderByExpr { expr: SQLIdentifier("b"), asc: None }]), group_by: None, having: None, limit: None })]
``` ```
## Design ## Design

46
examples/cli.rs Normal file
View file

@ -0,0 +1,46 @@
extern crate simple_logger;
extern crate sqlparser;
///! A small command-line app to run the parser.
/// Run with `cargo run --example cli`
use std::fs;
use sqlparser::dialect::GenericSqlDialect;
use sqlparser::sqlparser::Parser;
fn main() {
simple_logger::init().unwrap();
let filename = std::env::args()
.nth(1)
.expect("No arguments provided!\n\nUsage: cargo run --example cli FILENAME.sql");
let contents =
fs::read_to_string(&filename).expect(&format!("Unable to read the file {}", &filename));
let without_bom = if contents.chars().nth(0).unwrap() as u64 != 0xfeff {
contents.as_str()
} else {
let mut chars = contents.chars();
chars.next();
chars.as_str()
};
println!("Input:\n'{}'", &without_bom);
let parse_result = Parser::parse_sql(&GenericSqlDialect {}, without_bom.to_owned());
match parse_result {
Ok(statements) => {
println!(
"Round-trip:\n'{}'",
statements
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join("\n")
);
println!("Parse results:\n{:#?}", statements);
std::process::exit(0);
}
Err(e) => {
println!("Error during parsing: {:?}", e);
std::process::exit(1);
}
}
}

View file

@ -1,339 +1,8 @@
use dialect::Dialect; use dialect::Dialect;
use dialect::keywords::*;
pub struct AnsiSqlDialect {} pub struct AnsiSqlDialect {}
impl Dialect for AnsiSqlDialect { impl Dialect for AnsiSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ABS,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
END_EXEC,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOWER,
MATCH,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
];
}
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
} }

View file

@ -1,21 +1,7 @@
use dialect::Dialect; use dialect::Dialect;
use dialect::keywords::*;
pub struct GenericSqlDialect {} pub struct GenericSqlDialect {}
impl Dialect for GenericSqlDialect { impl Dialect for GenericSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE,
DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS,
STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT,
REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC,
BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL,
CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE,
];
}
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
} }

View file

@ -1,12 +1,23 @@
/// make a listing of keywords ///! This module defines
/// with static str and their stringified value /// 1) a list of constants for every keyword that
/// can appear in SQLWord::keyword:
/// pub const KEYWORD = "KEYWORD"
/// 2) an `ALL_KEYWORDS` array with every keyword in it
/// This is not a list of *reserved* keywords: some of these can be
/// parsed as identifiers if the parser decides so. This means that
/// new keywords can be added here without affecting the parse result.
///
/// As a matter of fact, most of these keywords are not used at all
/// and could be removed.
/// 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a
/// "table alias" context.
macro_rules! keyword { macro_rules! keyword {
($($ident:ident),*) => { ($($ident:ident),*) => {
$(pub static $ident: &'static str = stringify!($ident);)* $(pub const $ident: &'static str = stringify!($ident);)*
} }
} }
/// enumerate all the keywords here for all dialects to support in this project
keyword!( keyword!(
ABS, ABS,
ADD, ADD,
@ -180,6 +191,7 @@ keyword!(
LOCATION, LOCATION,
LOWER, LOWER,
MATCH, MATCH,
MATERIALIZED,
MAX, MAX,
MEMBER, MEMBER,
MERGE, MERGE,
@ -339,6 +351,7 @@ keyword!(
VARCHAR, VARCHAR,
VARYING, VARYING,
VERSIONING, VERSIONING,
VIEW,
WHEN, WHEN,
WHENEVER, WHENEVER,
WHERE, WHERE,
@ -352,4 +365,369 @@ keyword!(
); );
/// special case of keyword where the it is an invalid identifier /// special case of keyword where the it is an invalid identifier
pub static END_EXEC: &'static str = "END-EXEC"; pub const END_EXEC: &'static str = "END-EXEC";
pub const ALL_KEYWORDS: &'static [&'static str] = &[
ABS,
ADD,
ASC,
ALL,
ALLOCATE,
ALTER,
AND,
ANY,
ARE,
ARRAY,
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASENSITIVE,
ASYMMETRIC,
AT,
ATOMIC,
AUTHORIZATION,
AVG,
BEGIN,
BEGIN_FRAME,
BEGIN_PARTITION,
BETWEEN,
BIGINT,
BINARY,
BLOB,
BOOLEAN,
BOTH,
BY,
BYTEA,
CALL,
CALLED,
CARDINALITY,
CASCADED,
CASE,
CAST,
CEIL,
CEILING,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHECK,
CLOB,
CLOSE,
COALESCE,
COLLATE,
COLLECT,
COLUMN,
COMMIT,
CONDITION,
CONNECT,
CONSTRAINT,
CONTAINS,
CONVERT,
COPY,
CORR,
CORRESPONDING,
COUNT,
COVAR_POP,
COVAR_SAMP,
CREATE,
CROSS,
CSV,
CUBE,
CUME_DIST,
CURRENT,
CURRENT_CATALOG,
CURRENT_DATE,
CURRENT_DEFAULT_TRANSFORM_GROUP,
CURRENT_PATH,
CURRENT_ROLE,
CURRENT_ROW,
CURRENT_SCHEMA,
CURRENT_TIME,
CURRENT_TIMESTAMP,
CURRENT_TRANSFORM_GROUP_FOR_TYPE,
CURRENT_USER,
CURSOR,
CYCLE,
DATE,
DAY,
DEALLOCATE,
DEC,
DECIMAL,
DECLARE,
DEFAULT,
DELETE,
DENSE_RANK,
DEREF,
DESC,
DESCRIBE,
DETERMINISTIC,
DISCONNECT,
DISTINCT,
DOUBLE,
DROP,
DYNAMIC,
EACH,
ELEMENT,
ELSE,
END,
END_FRAME,
END_PARTITION,
EQUALS,
ESCAPE,
EVERY,
EXCEPT,
EXEC,
EXECUTE,
EXISTS,
EXP,
EXTERNAL,
EXTRACT,
FALSE,
FETCH,
FILTER,
FIRST_VALUE,
FLOAT,
FLOOR,
FOR,
FOREIGN,
FRAME_ROW,
FREE,
FROM,
FULL,
FUNCTION,
FUSION,
GET,
GLOBAL,
GRANT,
GROUP,
GROUPING,
GROUPS,
HAVING,
HEADER,
HOLD,
HOUR,
IDENTITY,
IN,
INDICATOR,
INNER,
INOUT,
INSENSITIVE,
INSERT,
INT,
INTEGER,
INTERSECT,
INTERSECTION,
INTERVAL,
INTO,
IS,
JOIN,
KEY,
LAG,
LANGUAGE,
LARGE,
LAST_VALUE,
LATERAL,
LEAD,
LEADING,
LEFT,
LIKE,
LIKE_REGEX,
LIMIT,
LN,
LOCAL,
LOCALTIME,
LOCALTIMESTAMP,
LOCATION,
LOWER,
MATCH,
MATERIALIZED,
MAX,
MEMBER,
MERGE,
METHOD,
MIN,
MINUTE,
MOD,
MODIFIES,
MODULE,
MONTH,
MULTISET,
NATIONAL,
NATURAL,
NCHAR,
NCLOB,
NEW,
NO,
NONE,
NORMALIZE,
NOT,
NTH_VALUE,
NTILE,
NULL,
NULLIF,
NUMERIC,
OBJECT,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OF,
OFFSET,
OLD,
ON,
ONLY,
OPEN,
OR,
ORDER,
OUT,
OUTER,
OVER,
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PARQUET,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERIOD,
PORTION,
POSITION,
POSITION_REGEX,
POWER,
PRECEDES,
PRECISION,
PREPARE,
PRIMARY,
PROCEDURE,
RANGE,
RANK,
READS,
REAL,
RECURSIVE,
REF,
REFERENCES,
REFERENCING,
REGCLASS,
REGR_AVGX,
REGR_AVGY,
REGR_COUNT,
REGR_INTERCEPT,
REGR_R2,
REGR_SLOPE,
REGR_SXX,
REGR_SXY,
REGR_SYY,
RELEASE,
RESULT,
RETURN,
RETURNS,
REVOKE,
RIGHT,
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
SAVEPOINT,
SCOPE,
SCROLL,
SEARCH,
SECOND,
SELECT,
SENSITIVE,
SESSION_USER,
SET,
SIMILAR,
SMALLINT,
SOME,
SPECIFIC,
SPECIFICTYPE,
SQL,
SQLEXCEPTION,
SQLSTATE,
SQLWARNING,
SQRT,
START,
STATIC,
STDDEV_POP,
STDDEV_SAMP,
STDIN,
STORED,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
TEXT,
THEN,
TIME,
TIMESTAMP,
TIMEZONE_HOUR,
TIMEZONE_MINUTE,
TO,
TRAILING,
TRANSLATE,
TRANSLATE_REGEX,
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
UESCAPE,
UNION,
UNIQUE,
UNKNOWN,
UNNEST,
UPDATE,
UPPER,
USER,
USING,
UUID,
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VERSIONING,
VIEW,
WHEN,
WHENEVER,
WHERE,
WIDTH_BUCKET,
WINDOW,
WITH,
WITHIN,
WITHOUT,
YEAR,
ZONE,
END_EXEC,
];
/// These keywords can't be used as a table alias, so that `FROM table_name alias`
/// can be parsed unambiguously without looking ahead.
pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[
// Reserved as both a table and a column alias:
WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT,
// Reserved only as a table alias in the `FROM`/`JOIN` clauses:
ON, JOIN, INNER, CROSS, FULL, LEFT, RIGHT, NATURAL, USING,
];
/// Can't be used as a column alias, so that `SELECT <expr> alias`
/// can be parsed unambiguously without looking ahead.
pub const RESERVED_FOR_COLUMN_ALIAS: &'static [&'static str] = &[
// Reserved as both a table and a column alias:
WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT,
// Reserved only as a column alias in the `SELECT` clause:
FROM,
];

View file

@ -8,10 +8,16 @@ pub use self::generic_sql::GenericSqlDialect;
pub use self::postgresql::PostgreSqlDialect; pub use self::postgresql::PostgreSqlDialect;
pub trait Dialect { pub trait Dialect {
/// Get a list of keywords for this dialect /// Determine if a character starts a quoted identifier. The default
fn keywords(&self) -> Vec<&'static str>; /// implementation, accepting "double quoted" ids is both ANSI-compliant
/// Determine if a character is a valid identifier start character /// and appropriate for most dialects (with the notable exception of
/// MySQL, MS SQL, and sqlite). You can accept one of characters listed
/// in `SQLWord::matching_end_quote()` here
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '"'
}
/// Determine if a character is a valid start character for an unquoted identifier
fn is_identifier_start(&self, ch: char) -> bool; fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid identifier character /// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool; fn is_identifier_part(&self, ch: char) -> bool;
} }

View file

@ -1,24 +1,8 @@
use dialect::Dialect; use dialect::Dialect;
use dialect::keywords::*;
pub struct PostgreSqlDialect {} pub struct PostgreSqlDialect {}
impl Dialect for PostgreSqlDialect { impl Dialect for PostgreSqlDialect {
fn keywords(&self) -> Vec<&'static str> {
return vec![
ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT,
INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR,
NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types
CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL,
DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN,
DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY,
STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN,
THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING,
LIKE,
];
}
fn is_identifier_start(&self, ch: char) -> bool { fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@'
} }

View file

@ -14,32 +14,63 @@
//! SQL Abstract Syntax Tree (AST) types //! SQL Abstract Syntax Tree (AST) types
mod query;
mod sql_operator; mod sql_operator;
mod sqltype; mod sqltype;
mod table_key; mod table_key;
mod value; mod value;
pub use self::query::{
Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, SQLSelectItem,
SQLSetExpr, SQLSetOperator, TableFactor,
};
pub use self::sqltype::SQLType; pub use self::sqltype::SQLType;
pub use self::table_key::{AlterOperation, Key, TableKey}; pub use self::table_key::{AlterOperation, Key, TableKey};
pub use self::value::Value; pub use self::value::Value;
pub use self::sql_operator::SQLOperator; pub use self::sql_operator::SQLOperator;
/// SQL Abstract Syntax Tree (AST) /// Identifier name, in the originally quoted form (e.g. `"id"`)
pub type SQLIdent = String;
/// Represents a parsed SQL expression, which is a common building
/// block of SQL statements (the part after SELECT, WHERE, etc.)
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum ASTNode { pub enum ASTNode {
/// Identifier e.g. table name or column name /// Identifier e.g. table name or column name
SQLIdentifier(String), SQLIdentifier(SQLIdent),
/// Wildcard e.g. `*` /// Unqualified wildcard (`*`). SQL allows this in limited contexts (such as right
/// after `SELECT` or as part of an aggregate function, e.g. `COUNT(*)`, but we
/// currently accept it in contexts where it doesn't make sense, such as `* + *`
SQLWildcard, SQLWildcard,
/// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
/// (Same caveats apply to SQLQualifiedWildcard as to SQLWildcard.)
SQLQualifiedWildcard(Vec<SQLIdent>),
/// Multi part identifier e.g. `myschema.dbo.mytable` /// Multi part identifier e.g. `myschema.dbo.mytable`
SQLCompoundIdentifier(Vec<String>), SQLCompoundIdentifier(Vec<SQLIdent>),
/// Assigment e.g. `name = 'Fred'` in an UPDATE statement
SQLAssignment(SQLAssignment),
/// `IS NULL` expression /// `IS NULL` expression
SQLIsNull(Box<ASTNode>), SQLIsNull(Box<ASTNode>),
/// `IS NOT NULL` expression /// `IS NOT NULL` expression
SQLIsNotNull(Box<ASTNode>), SQLIsNotNull(Box<ASTNode>),
/// `[ NOT ] IN (val1, val2, ...)`
SQLInList {
expr: Box<ASTNode>,
list: Vec<ASTNode>,
negated: bool,
},
/// `[ NOT ] IN (SELECT ...)`
SQLInSubquery {
expr: Box<ASTNode>,
subquery: Box<SQLQuery>,
negated: bool,
},
/// <expr> [ NOT ] BETWEEN <low> AND <high>
SQLBetween {
expr: Box<ASTNode>,
negated: bool,
low: Box<ASTNode>,
high: Box<ASTNode>,
},
/// Binary expression e.g. `1 + 1` or `foo > bar` /// Binary expression e.g. `1 + 1` or `foo > bar`
SQLBinaryExpr { SQLBinaryExpr {
left: Box<ASTNode>, left: Box<ASTNode>,
@ -61,7 +92,8 @@ pub enum ASTNode {
/// SQLValue /// SQLValue
SQLValue(Value), SQLValue(Value),
/// Scalar function call e.g. `LEFT(foo, 5)` /// Scalar function call e.g. `LEFT(foo, 5)`
SQLFunction { id: String, args: Vec<ASTNode> }, /// TODO: this can be a compound SQLObjectName as well (for UDFs)
SQLFunction { id: SQLIdent, args: Vec<ASTNode> },
/// CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END /// CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END
SQLCase { SQLCase {
// TODO: support optional operand for "simple case" // TODO: support optional operand for "simple case"
@ -69,71 +101,9 @@ pub enum ASTNode {
results: Vec<ASTNode>, results: Vec<ASTNode>,
else_result: Option<Box<ASTNode>>, else_result: Option<Box<ASTNode>>,
}, },
/// SELECT /// A parenthesized subquery `(SELECT ...)`, used in expression like
SQLSelect { /// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
/// projection expressions SQLSubquery(Box<SQLQuery>),
projection: Vec<ASTNode>,
/// FROM
relation: Option<Box<ASTNode>>,
// JOIN
joins: Vec<Join>,
/// WHERE
selection: Option<Box<ASTNode>>,
/// ORDER BY
order_by: Option<Vec<SQLOrderByExpr>>,
/// GROUP BY
group_by: Option<Vec<ASTNode>>,
/// HAVING
having: Option<Box<ASTNode>>,
/// LIMIT
limit: Option<Box<ASTNode>>,
},
/// INSERT
SQLInsert {
/// TABLE
table_name: String,
/// COLUMNS
columns: Vec<String>,
/// VALUES (vector of rows to insert)
values: Vec<Vec<ASTNode>>,
},
SQLCopy {
/// TABLE
table_name: String,
/// COLUMNS
columns: Vec<String>,
/// VALUES a vector of values to be copied
values: Vec<Option<String>>,
},
/// UPDATE
SQLUpdate {
/// TABLE
table_name: String,
/// Column assignments
assignments: Vec<SQLAssignment>,
/// WHERE
selection: Option<Box<ASTNode>>,
},
/// DELETE
SQLDelete {
/// FROM
relation: Option<Box<ASTNode>>,
/// WHERE
selection: Option<Box<ASTNode>>,
},
/// CREATE TABLE
SQLCreateTable {
/// Table name
name: String,
/// Optional schema
columns: Vec<SQLColumnDef>,
},
/// ALTER TABLE
SQLAlterTable {
/// Table name
name: String,
operation: AlterOperation,
},
} }
impl ToString for ASTNode { impl ToString for ASTNode {
@ -141,10 +111,45 @@ impl ToString for ASTNode {
match self { match self {
ASTNode::SQLIdentifier(s) => s.to_string(), ASTNode::SQLIdentifier(s) => s.to_string(),
ASTNode::SQLWildcard => "*".to_string(), ASTNode::SQLWildcard => "*".to_string(),
ASTNode::SQLQualifiedWildcard(q) => q.join(".") + "*",
ASTNode::SQLCompoundIdentifier(s) => s.join("."), ASTNode::SQLCompoundIdentifier(s) => s.join("."),
ASTNode::SQLAssignment(ass) => ass.to_string(),
ASTNode::SQLIsNull(ast) => format!("{} IS NULL", ast.as_ref().to_string()), ASTNode::SQLIsNull(ast) => format!("{} IS NULL", ast.as_ref().to_string()),
ASTNode::SQLIsNotNull(ast) => format!("{} IS NOT NULL", ast.as_ref().to_string()), ASTNode::SQLIsNotNull(ast) => format!("{} IS NOT NULL", ast.as_ref().to_string()),
ASTNode::SQLInList {
expr,
list,
negated,
} => format!(
"{} {}IN ({})",
expr.as_ref().to_string(),
if *negated { "NOT " } else { "" },
list.iter()
.map(|a| a.to_string())
.collect::<Vec<String>>()
.join(", ")
),
ASTNode::SQLInSubquery {
expr,
subquery,
negated,
} => format!(
"{} {}IN ({})",
expr.as_ref().to_string(),
if *negated { "NOT " } else { "" },
subquery.to_string()
),
ASTNode::SQLBetween {
expr,
negated,
low,
high,
} => format!(
"{} {}BETWEEN {} AND {}",
expr.to_string(),
if *negated { "NOT " } else { "" },
low.to_string(),
high.to_string()
),
ASTNode::SQLBinaryExpr { left, op, right } => format!( ASTNode::SQLBinaryExpr { left, op, right } => format!(
"{} {} {}", "{} {} {}",
left.as_ref().to_string(), left.as_ref().to_string(),
@ -188,67 +193,81 @@ impl ToString for ASTNode {
} }
s + " END" s + " END"
} }
ASTNode::SQLSelect { ASTNode::SQLSubquery(s) => format!("({})", s.to_string()),
projection, }
relation, }
joins, }
selection,
order_by, /// A top-level statement (SELECT, INSERT, CREATE, etc.)
group_by, #[derive(Debug, Clone, PartialEq)]
having, pub enum SQLStatement {
limit, /// SELECT
} => { SQLSelect(SQLQuery),
let mut s = format!( /// INSERT
"SELECT {}", SQLInsert {
projection /// TABLE
.iter() table_name: SQLObjectName,
.map(|p| p.to_string()) /// COLUMNS
.collect::<Vec<String>>() columns: Vec<SQLIdent>,
.join(", ") /// VALUES (vector of rows to insert)
); values: Vec<Vec<ASTNode>>,
if let Some(relation) = relation { },
s += &format!(" FROM {}", relation.as_ref().to_string()); SQLCopy {
} /// TABLE
for join in joins { table_name: SQLObjectName,
s += &join.to_string(); /// COLUMNS
} columns: Vec<SQLIdent>,
if let Some(selection) = selection { /// VALUES a vector of values to be copied
s += &format!(" WHERE {}", selection.as_ref().to_string()); values: Vec<Option<String>>,
} },
if let Some(group_by) = group_by { /// UPDATE
s += &format!( SQLUpdate {
" GROUP BY {}", /// TABLE
group_by table_name: SQLObjectName,
.iter() /// Column assignments
.map(|g| g.to_string()) assignments: Vec<SQLAssignment>,
.collect::<Vec<String>>() /// WHERE
.join(", ") selection: Option<ASTNode>,
); },
} /// DELETE
if let Some(having) = having { SQLDelete {
s += &format!(" HAVING {}", having.as_ref().to_string()); /// FROM
} table_name: SQLObjectName,
if let Some(order_by) = order_by { /// WHERE
s += &format!( selection: Option<ASTNode>,
" ORDER BY {}", },
order_by /// CREATE VIEW
.iter() SQLCreateView {
.map(|o| o.to_string()) /// View name
.collect::<Vec<String>>() name: SQLObjectName,
.join(", ") query: SQLQuery,
); materialized: bool,
} },
if let Some(limit) = limit { /// CREATE TABLE
s += &format!(" LIMIT {}", limit.as_ref().to_string()); SQLCreateTable {
} /// Table name
s name: SQLObjectName,
} /// Optional schema
ASTNode::SQLInsert { columns: Vec<SQLColumnDef>,
},
/// ALTER TABLE
SQLAlterTable {
/// Table name
name: SQLObjectName,
operation: AlterOperation,
},
}
impl ToString for SQLStatement {
fn to_string(&self) -> String {
match self {
SQLStatement::SQLSelect(s) => s.to_string(),
SQLStatement::SQLInsert {
table_name, table_name,
columns, columns,
values, values,
} => { } => {
let mut s = format!("INSERT INTO {}", table_name); let mut s = format!("INSERT INTO {}", table_name.to_string());
if columns.len() > 0 { if columns.len() > 0 {
s += &format!(" ({})", columns.join(", ")); s += &format!(" ({})", columns.join(", "));
} }
@ -268,12 +287,12 @@ impl ToString for ASTNode {
} }
s s
} }
ASTNode::SQLCopy { SQLStatement::SQLCopy {
table_name, table_name,
columns, columns,
values, values,
} => { } => {
let mut s = format!("COPY {}", table_name); let mut s = format!("COPY {}", table_name.to_string());
if columns.len() > 0 { if columns.len() > 0 {
s += &format!( s += &format!(
" ({})", " ({})",
@ -298,12 +317,12 @@ impl ToString for ASTNode {
s += "\n\\."; s += "\n\\.";
s s
} }
ASTNode::SQLUpdate { SQLStatement::SQLUpdate {
table_name, table_name,
assignments, assignments,
selection, selection,
} => { } => {
let mut s = format!("UPDATE {}", table_name); let mut s = format!("UPDATE {}", table_name.to_string());
if assignments.len() > 0 { if assignments.len() > 0 {
s += &format!( s += &format!(
"{}", "{}",
@ -315,84 +334,80 @@ impl ToString for ASTNode {
); );
} }
if let Some(selection) = selection { if let Some(selection) = selection {
s += &format!(" WHERE {}", selection.as_ref().to_string()); s += &format!(" WHERE {}", selection.to_string());
} }
s s
} }
ASTNode::SQLDelete { SQLStatement::SQLDelete {
relation, table_name,
selection, selection,
} => { } => {
let mut s = String::from("DELETE"); let mut s = format!("DELETE FROM {}", table_name.to_string());
if let Some(relation) = relation {
s += &format!(" FROM {}", relation.as_ref().to_string());
}
if let Some(selection) = selection { if let Some(selection) = selection {
s += &format!(" WHERE {}", selection.as_ref().to_string()); s += &format!(" WHERE {}", selection.to_string());
} }
s s
} }
ASTNode::SQLCreateTable { name, columns } => format!( SQLStatement::SQLCreateView {
"CREATE TABLE {} ({})",
name, name,
query,
materialized,
} => {
let modifier = if *materialized { " MATERIALIZED" } else { "" };
format!(
"CREATE{} VIEW {} AS {}",
modifier,
name.to_string(),
query.to_string()
)
}
SQLStatement::SQLCreateTable { name, columns } => format!(
"CREATE TABLE {} ({})",
name.to_string(),
columns columns
.iter() .iter()
.map(|c| c.to_string()) .map(|c| c.to_string())
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join(", ") .join(", ")
), ),
ASTNode::SQLAlterTable { name, operation } => { SQLStatement::SQLAlterTable { name, operation } => {
format!("ALTER TABLE {} {}", name, operation.to_string()) format!("ALTER TABLE {} {}", name.to_string(), operation.to_string())
} }
} }
} }
} }
/// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj
#[derive(Debug, Clone, PartialEq)]
pub struct SQLObjectName(pub Vec<SQLIdent>);
impl ToString for SQLObjectName {
fn to_string(&self) -> String {
self.0.join(".")
}
}
/// SQL assignment `foo = expr` as used in SQLUpdate /// SQL assignment `foo = expr` as used in SQLUpdate
/// TODO: unify this with the ASTNode SQLAssignment
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct SQLAssignment { pub struct SQLAssignment {
id: String, id: SQLIdent,
value: Box<ASTNode>, value: ASTNode,
} }
impl ToString for SQLAssignment { impl ToString for SQLAssignment {
fn to_string(&self) -> String { fn to_string(&self) -> String {
format!("SET {} = {}", self.id, self.value.as_ref().to_string()) format!("SET {} = {}", self.id, self.value.to_string())
}
}
/// SQL ORDER BY expression
#[derive(Debug, Clone, PartialEq)]
pub struct SQLOrderByExpr {
pub expr: Box<ASTNode>,
pub asc: bool,
}
impl SQLOrderByExpr {
pub fn new(expr: Box<ASTNode>, asc: bool) -> Self {
SQLOrderByExpr { expr, asc }
}
}
impl ToString for SQLOrderByExpr {
fn to_string(&self) -> String {
if self.asc {
format!("{} ASC", self.expr.as_ref().to_string())
} else {
format!("{} DESC", self.expr.as_ref().to_string())
}
} }
} }
/// SQL column definition /// SQL column definition
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct SQLColumnDef { pub struct SQLColumnDef {
pub name: String, pub name: SQLIdent,
pub data_type: SQLType, pub data_type: SQLType,
pub is_primary: bool, pub is_primary: bool,
pub is_unique: bool, pub is_unique: bool,
pub default: Option<Box<ASTNode>>, pub default: Option<ASTNode>,
pub allow_null: bool, pub allow_null: bool,
} }
@ -406,7 +421,7 @@ impl ToString for SQLColumnDef {
s += " UNIQUE"; s += " UNIQUE";
} }
if let Some(ref default) = self.default { if let Some(ref default) = self.default {
s += &format!(" DEFAULT {}", default.as_ref().to_string()); s += &format!(" DEFAULT {}", default.to_string());
} }
if !self.allow_null { if !self.allow_null {
s += " NOT NULL"; s += " NOT NULL";
@ -414,72 +429,3 @@ impl ToString for SQLColumnDef {
s s
} }
} }
#[derive(Debug, Clone, PartialEq)]
pub struct Join {
pub relation: ASTNode,
pub join_operator: JoinOperator,
}
impl ToString for Join {
fn to_string(&self) -> String {
fn prefix(constraint: &JoinConstraint) -> String {
match constraint {
JoinConstraint::Natural => "NATURAL ".to_string(),
_ => "".to_string(),
}
}
fn suffix(constraint: &JoinConstraint) -> String {
match constraint {
JoinConstraint::On(expr) => format!("ON {}", expr.to_string()),
JoinConstraint::Using(attrs) => format!("USING({})", attrs.join(", ")),
_ => "".to_string(),
}
}
match &self.join_operator {
JoinOperator::Inner(constraint) => format!(
" {}JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::Cross => format!(" CROSS JOIN {}", self.relation.to_string()),
JoinOperator::Implicit => format!(", {}", self.relation.to_string()),
JoinOperator::LeftOuter(constraint) => format!(
" {}LEFT JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::RightOuter(constraint) => format!(
" {}RIGHT JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::FullOuter(constraint) => format!(
" {}FULL JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum JoinOperator {
Inner(JoinConstraint),
LeftOuter(JoinConstraint),
RightOuter(JoinConstraint),
FullOuter(JoinConstraint),
Implicit,
Cross,
}
#[derive(Debug, Clone, PartialEq)]
pub enum JoinConstraint {
On(ASTNode),
Using(Vec<String>),
Natural,
}

309
src/sqlast/query.rs Normal file
View file

@ -0,0 +1,309 @@
use super::*;
/// The most complete variant of a `SELECT` query expression, optionally
/// including `WITH`, `UNION` / other set operations, and `ORDER BY`.
#[derive(Debug, Clone, PartialEq)]
pub struct SQLQuery {
/// WITH (common table expressions, or CTEs)
pub ctes: Vec<Cte>,
/// SELECT or UNION / EXCEPT / INTECEPT
pub body: SQLSetExpr,
/// ORDER BY
pub order_by: Option<Vec<SQLOrderByExpr>>,
/// LIMIT
pub limit: Option<ASTNode>,
}
impl ToString for SQLQuery {
fn to_string(&self) -> String {
let mut s = String::new();
if !self.ctes.is_empty() {
s += &format!(
"WITH {} ",
self.ctes
.iter()
.map(|cte| format!("{} AS ({})", cte.alias, cte.query.to_string()))
.collect::<Vec<String>>()
.join(", ")
)
}
s += &self.body.to_string();
if let Some(ref order_by) = self.order_by {
s += &format!(
" ORDER BY {}",
order_by
.iter()
.map(|o| o.to_string())
.collect::<Vec<String>>()
.join(", ")
);
}
if let Some(ref limit) = self.limit {
s += &format!(" LIMIT {}", limit.to_string());
}
s
}
}
/// A node in a tree, representing a "query body" expression, roughly:
/// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]`
#[derive(Debug, Clone, PartialEq)]
pub enum SQLSetExpr {
/// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations)
Select(SQLSelect),
/// Parenthesized SELECT subquery, which may include more set operations
/// in its body and an optional ORDER BY / LIMIT.
Query(Box<SQLQuery>),
/// UNION/EXCEPT/INTERSECT of two queries
SetOperation {
op: SQLSetOperator,
all: bool,
left: Box<SQLSetExpr>,
right: Box<SQLSetExpr>,
},
// TODO: ANSI SQL supports `TABLE` and `VALUES` here.
}
impl ToString for SQLSetExpr {
fn to_string(&self) -> String {
match self {
SQLSetExpr::Select(s) => s.to_string(),
SQLSetExpr::Query(q) => format!("({})", q.to_string()),
SQLSetExpr::SetOperation {
left,
right,
op,
all,
} => {
let all_str = if *all { " ALL" } else { "" };
format!(
"{} {}{} {}",
left.to_string(),
op.to_string(),
all_str,
right.to_string()
)
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum SQLSetOperator {
Union,
Except,
Intersect,
}
impl ToString for SQLSetOperator {
fn to_string(&self) -> String {
match self {
SQLSetOperator::Union => "UNION".to_string(),
SQLSetOperator::Except => "EXCEPT".to_string(),
SQLSetOperator::Intersect => "INTERSECT".to_string(),
}
}
}
/// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may
/// appear either as the only body item of an `SQLQuery`, or as an operand
/// to a set operation like `UNION`.
#[derive(Debug, Clone, PartialEq)]
pub struct SQLSelect {
/// projection expressions
pub projection: Vec<SQLSelectItem>,
/// FROM
pub relation: Option<TableFactor>,
/// JOIN
pub joins: Vec<Join>,
/// WHERE
pub selection: Option<ASTNode>,
/// GROUP BY
pub group_by: Option<Vec<ASTNode>>,
/// HAVING
pub having: Option<ASTNode>,
}
impl ToString for SQLSelect {
fn to_string(&self) -> String {
let mut s = format!(
"SELECT {}",
self.projection
.iter()
.map(|p| p.to_string())
.collect::<Vec<String>>()
.join(", ")
);
if let Some(ref relation) = self.relation {
s += &format!(" FROM {}", relation.to_string());
}
for join in &self.joins {
s += &join.to_string();
}
if let Some(ref selection) = self.selection {
s += &format!(" WHERE {}", selection.to_string());
}
if let Some(ref group_by) = self.group_by {
s += &format!(
" GROUP BY {}",
group_by
.iter()
.map(|g| g.to_string())
.collect::<Vec<String>>()
.join(", ")
);
}
if let Some(ref having) = self.having {
s += &format!(" HAVING {}", having.to_string());
}
s
}
}
/// A single CTE (used after `WITH`): `alias AS ( query )`
#[derive(Debug, Clone, PartialEq)]
pub struct Cte {
pub alias: SQLIdent,
pub query: SQLQuery,
}
/// One item of the comma-separated list following `SELECT`
#[derive(Debug, Clone, PartialEq)]
pub enum SQLSelectItem {
/// Any expression, not followed by `[ AS ] alias`
UnnamedExpression(ASTNode),
/// An expression, followed by `[ AS ] alias`
ExpressionWithAlias(ASTNode, SQLIdent),
/// `alias.*` or even `schema.table.*`
QualifiedWildcard(SQLObjectName),
/// An unqualified `*`
Wildcard,
}
impl ToString for SQLSelectItem {
fn to_string(&self) -> String {
match &self {
SQLSelectItem::UnnamedExpression(expr) => expr.to_string(),
SQLSelectItem::ExpressionWithAlias(expr, alias) => {
format!("{} AS {}", expr.to_string(), alias)
}
SQLSelectItem::QualifiedWildcard(prefix) => format!("{}.*", prefix.to_string()),
SQLSelectItem::Wildcard => "*".to_string(),
}
}
}
/// A table name or a parenthesized subquery with an optional alias
#[derive(Debug, Clone, PartialEq)]
pub enum TableFactor {
Table {
name: SQLObjectName,
alias: Option<SQLIdent>,
},
Derived {
subquery: Box<SQLQuery>,
alias: Option<SQLIdent>,
},
}
impl ToString for TableFactor {
fn to_string(&self) -> String {
let (base, alias) = match self {
TableFactor::Table { name, alias } => (name.to_string(), alias),
TableFactor::Derived { subquery, alias } => {
(format!("({})", subquery.to_string()), alias)
}
};
if let Some(alias) = alias {
format!("{} AS {}", base, alias)
} else {
base
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Join {
pub relation: TableFactor,
pub join_operator: JoinOperator,
}
impl ToString for Join {
fn to_string(&self) -> String {
fn prefix(constraint: &JoinConstraint) -> String {
match constraint {
JoinConstraint::Natural => "NATURAL ".to_string(),
_ => "".to_string(),
}
}
fn suffix(constraint: &JoinConstraint) -> String {
match constraint {
JoinConstraint::On(expr) => format!("ON {}", expr.to_string()),
JoinConstraint::Using(attrs) => format!("USING({})", attrs.join(", ")),
_ => "".to_string(),
}
}
match &self.join_operator {
JoinOperator::Inner(constraint) => format!(
" {}JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::Cross => format!(" CROSS JOIN {}", self.relation.to_string()),
JoinOperator::Implicit => format!(", {}", self.relation.to_string()),
JoinOperator::LeftOuter(constraint) => format!(
" {}LEFT JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::RightOuter(constraint) => format!(
" {}RIGHT JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
JoinOperator::FullOuter(constraint) => format!(
" {}FULL JOIN {} {}",
prefix(constraint),
self.relation.to_string(),
suffix(constraint)
),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum JoinOperator {
Inner(JoinConstraint),
LeftOuter(JoinConstraint),
RightOuter(JoinConstraint),
FullOuter(JoinConstraint),
Implicit,
Cross,
}
#[derive(Debug, Clone, PartialEq)]
pub enum JoinConstraint {
On(ASTNode),
Using(Vec<SQLIdent>),
Natural,
}
/// SQL ORDER BY expression
#[derive(Debug, Clone, PartialEq)]
pub struct SQLOrderByExpr {
pub expr: ASTNode,
pub asc: Option<bool>,
}
impl ToString for SQLOrderByExpr {
fn to_string(&self) -> String {
match self.asc {
Some(true) => format!("{} ASC", self.expr.to_string()),
Some(false) => format!("{} DESC", self.expr.to_string()),
None => self.expr.to_string(),
}
}
}

View file

@ -1,3 +1,5 @@
use super::SQLObjectName;
/// SQL datatypes for literals in SQL statements /// SQL datatypes for literals in SQL statements
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum SQLType { pub enum SQLType {
@ -15,8 +17,8 @@ pub enum SQLType {
Varbinary(usize), Varbinary(usize),
/// Large binary object e.g. BLOB(1000) /// Large binary object e.g. BLOB(1000)
Blob(usize), Blob(usize),
/// Decimal type with precision and optional scale e.g. DECIMAL(10,2) /// Decimal type with optional precision and scale e.g. DECIMAL(10,2)
Decimal(usize, Option<usize>), Decimal(Option<usize>, Option<usize>),
/// Small integer /// Small integer
SmallInt, SmallInt,
/// Integer /// Integer
@ -44,7 +46,7 @@ pub enum SQLType {
/// Bytea /// Bytea
Bytea, Bytea,
/// Custom type such as enums /// Custom type such as enums
Custom(String), Custom(SQLObjectName),
/// Arrays /// Arrays
Array(Box<SQLType>), Array(Box<SQLType>),
} }
@ -73,9 +75,13 @@ impl ToString for SQLType {
SQLType::Blob(size) => format!("blob({})", size), SQLType::Blob(size) => format!("blob({})", size),
SQLType::Decimal(precision, scale) => { SQLType::Decimal(precision, scale) => {
if let Some(scale) = scale { if let Some(scale) = scale {
format!("numeric({},{})", precision, scale) format!("numeric({},{})", precision.unwrap(), scale)
} else { } else {
format!("numeric({})", precision) if let Some(precision) = precision {
format!("numeric({})", precision)
} else {
format!("numeric")
}
} }
} }
SQLType::SmallInt => "smallint".to_string(), SQLType::SmallInt => "smallint".to_string(),

View file

@ -1,7 +1,9 @@
use super::{SQLIdent, SQLObjectName};
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum AlterOperation { pub enum AlterOperation {
AddConstraint(TableKey), AddConstraint(TableKey),
RemoveConstraint { name: String }, RemoveConstraint { name: SQLIdent },
} }
impl ToString for AlterOperation { impl ToString for AlterOperation {
@ -17,8 +19,8 @@ impl ToString for AlterOperation {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct Key { pub struct Key {
pub name: String, pub name: SQLIdent,
pub columns: Vec<String>, pub columns: Vec<SQLIdent>,
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
@ -28,8 +30,8 @@ pub enum TableKey {
Key(Key), Key(Key),
ForeignKey { ForeignKey {
key: Key, key: Key,
foreign_table: String, foreign_table: SQLObjectName,
referred_columns: Vec<String>, referred_columns: Vec<SQLIdent>,
}, },
} }
@ -51,7 +53,7 @@ impl ToString for TableKey {
"{} FOREIGN KEY ({}) REFERENCES {}({})", "{} FOREIGN KEY ({}) REFERENCES {}({})",
key.name, key.name,
key.columns.join(", "), key.columns.join(", "),
foreign_table, foreign_table.to_string(),
referred_columns.join(", ") referred_columns.join(", ")
), ),
} }

View file

@ -13,6 +13,8 @@ pub enum Value {
Uuid(Uuid), Uuid(Uuid),
/// 'string value' /// 'string value'
SingleQuotedString(String), SingleQuotedString(String),
/// N'string value'
NationalStringLiteral(String),
/// Boolean value true or false, /// Boolean value true or false,
Boolean(bool), Boolean(bool),
/// Date value /// Date value
@ -34,6 +36,7 @@ impl ToString for Value {
Value::Double(v) => v.to_string(), Value::Double(v) => v.to_string(),
Value::Uuid(v) => v.to_string(), Value::Uuid(v) => v.to_string(),
Value::SingleQuotedString(v) => format!("'{}'", v), Value::SingleQuotedString(v) => format!("'{}'", v),
Value::NationalStringLiteral(v) => format!("N'{}'", v),
Value::Boolean(v) => v.to_string(), Value::Boolean(v) => v.to_string(),
Value::Date(v) => v.to_string(), Value::Date(v) => v.to_string(),
Value::Time(v) => v.to_string(), Value::Time(v) => v.to_string(),

File diff suppressed because it is too large Load diff

View file

@ -21,23 +21,22 @@
use std::iter::Peekable; use std::iter::Peekable;
use std::str::Chars; use std::str::Chars;
use super::dialect::keywords::ALL_KEYWORDS;
use super::dialect::Dialect; use super::dialect::Dialect;
/// SQL Token enumeration /// SQL Token enumeration
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
/// SQL identifier e.g. table or column name /// A keyword (like SELECT) or an optionally quoted SQL identifier
Identifier(String), SQLWord(SQLWord),
/// SQL keyword e.g. Keyword("SELECT")
Keyword(String),
/// Numeric literal /// Numeric literal
Number(String), Number(String),
/// A character that could not be tokenized /// A character that could not be tokenized
Char(char), Char(char),
/// Single quoted string: i.e: 'string' /// Single quoted string: i.e: 'string'
SingleQuotedString(String), SingleQuotedString(String),
/// Double quoted string: i.e: "string" /// "National" string literal: i.e: N'string'
DoubleQuotedString(String), NationalStringLiteral(String),
/// Comma /// Comma
Comma, Comma,
/// Whitespace (space, tab, etc) /// Whitespace (space, tab, etc)
@ -93,12 +92,11 @@ pub enum Token {
impl ToString for Token { impl ToString for Token {
fn to_string(&self) -> String { fn to_string(&self) -> String {
match self { match self {
Token::Identifier(ref id) => id.to_string(), Token::SQLWord(ref w) => w.to_string(),
Token::Keyword(ref k) => k.to_string(),
Token::Number(ref n) => n.to_string(), Token::Number(ref n) => n.to_string(),
Token::Char(ref c) => c.to_string(), Token::Char(ref c) => c.to_string(),
Token::SingleQuotedString(ref s) => format!("'{}'", s), Token::SingleQuotedString(ref s) => format!("'{}'", s),
Token::DoubleQuotedString(ref s) => format!("\"{}\"", s), Token::NationalStringLiteral(ref s) => format!("N'{}'", s),
Token::Comma => ",".to_string(), Token::Comma => ",".to_string(),
Token::Whitespace(ws) => ws.to_string(), Token::Whitespace(ws) => ws.to_string(),
Token::Eq => "=".to_string(), Token::Eq => "=".to_string(),
@ -128,11 +126,72 @@ impl ToString for Token {
} }
} }
impl Token {
pub fn make_keyword(keyword: &str) -> Self {
Token::make_word(keyword, None)
}
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
let word_uppercase = word.to_uppercase();
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
Token::SQLWord(SQLWord {
value: word.to_string(),
quote_style: quote_style,
keyword: if is_keyword {
word_uppercase.to_string()
} else {
"".to_string()
},
})
}
}
/// A keyword (like SELECT) or an optionally quoted SQL identifier
#[derive(Debug, Clone, PartialEq)]
pub struct SQLWord {
/// The value of the token, without the enclosing quotes, and with the
/// escape sequences (if any) processed (TODO: escapes are not handled)
pub value: String,
/// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
/// The standard and most implementations allow using double quotes for this,
/// but some implementations support other quoting styles as well (e.g. \[MS SQL])
pub quote_style: Option<char>,
/// If the word was not quoted and it matched one of the known keywords,
/// this will have one of the values from dialect::keywords, otherwise empty
pub keyword: String,
}
impl ToString for SQLWord {
fn to_string(&self) -> String {
match self.quote_style {
Some(s) if s == '"' || s == '[' || s == '`' => {
format!("{}{}{}", s, self.value, SQLWord::matching_end_quote(s))
}
None => self.value.clone(),
_ => panic!("Unexpected quote_style!"),
}
}
}
impl SQLWord {
fn matching_end_quote(ch: char) -> char {
match ch {
'"' => '"', // ANSI and most dialects
'[' => ']', // MS SQL
'`' => '`', // MySQL
_ => panic!("unexpected quoting style!"),
}
}
}
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Whitespace { pub enum Whitespace {
Space, Space,
Newline, Newline,
Tab, Tab,
SingleLineComment(String),
MultiLineComment(String),
} }
impl ToString for Whitespace { impl ToString for Whitespace {
@ -141,6 +200,8 @@ impl ToString for Whitespace {
Whitespace::Space => " ".to_string(), Whitespace::Space => " ".to_string(),
Whitespace::Newline => "\n".to_string(), Whitespace::Newline => "\n".to_string(),
Whitespace::Tab => "\t".to_string(), Whitespace::Tab => "\t".to_string(),
Whitespace::SingleLineComment(s) => format!("--{}", s),
Whitespace::MultiLineComment(s) => format!("/*{}*/", s),
} }
} }
} }
@ -168,13 +229,6 @@ impl<'a> Tokenizer<'a> {
} }
} }
fn is_keyword(&self, s: &str) -> bool {
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
return self.dialect.keywords().contains(&s);
}
/// Tokenize the statement and produce a vector of tokens /// Tokenize the statement and produce a vector of tokens
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> { pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut peekable = self.query.chars().peekable(); let mut peekable = self.query.chars().peekable();
@ -189,11 +243,10 @@ impl<'a> Tokenizer<'a> {
} }
Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Whitespace(Whitespace::Tab) => self.col += 4,
Token::Identifier(s) => self.col += s.len() as u64, Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64,
Token::Keyword(s) => self.col += s.len() as u64, Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
Token::Number(s) => self.col += s.len() as u64, Token::Number(s) => self.col += s.len() as u64,
Token::SingleQuotedString(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64,
Token::DoubleQuotedString(s) => self.col += s.len() as u64,
_ => self.col += 1, _ => self.col += 1,
} }
@ -219,63 +272,44 @@ impl<'a> Tokenizer<'a> {
chars.next(); chars.next();
Ok(Some(Token::Whitespace(Whitespace::Newline))) Ok(Some(Token::Whitespace(Whitespace::Newline)))
} }
// identifier or keyword 'N' => {
ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume, to check the next char
let mut s = String::new(); match chars.peek() {
chars.next(); // consume Some('\'') => {
s.push(ch); // N'...' - a <national character string literal>
while let Some(&ch) = chars.peek() { let s = self.tokenize_single_quoted_string(chars);
if self.dialect.is_identifier_part(ch) { Ok(Some(Token::NationalStringLiteral(s)))
chars.next(); // consume }
s.push(ch); _ => {
} else { // regular identifier starting with an "N"
break; let s = self.tokenize_word('N', chars);
Ok(Some(Token::make_word(&s, None)))
} }
} }
let upper_str = s.to_uppercase(); }
if self.is_keyword(upper_str.as_str()) { // identifier or keyword
Ok(Some(Token::Keyword(upper_str))) ch if self.dialect.is_identifier_start(ch) => {
} else { chars.next(); // consume the first char
Ok(Some(Token::Identifier(s))) let s = self.tokenize_word(ch, chars);
} Ok(Some(Token::make_word(&s, None)))
} }
// string // string
'\'' => { '\'' => {
//TODO: handle escaped quotes in string let s = self.tokenize_single_quoted_string(chars);
//TODO: handle EOF before terminating quote
let mut s = String::new();
chars.next(); // consume
while let Some(&ch) = chars.peek() {
match ch {
'\'' => {
chars.next(); // consume
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
}
}
Ok(Some(Token::SingleQuotedString(s))) Ok(Some(Token::SingleQuotedString(s)))
} }
// string // delimited (quoted) identifier
'"' => { quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
let mut s = String::new(); let mut s = String::new();
chars.next(); // consume chars.next(); // consume the opening quote
while let Some(&ch) = chars.peek() { let quote_end = SQLWord::matching_end_quote(quote_start);
while let Some(ch) = chars.next() {
match ch { match ch {
'"' => { c if c == quote_end => break,
chars.next(); // consume _ => s.push(ch),
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
} }
} }
Ok(Some(Token::DoubleQuotedString(s))) Ok(Some(Token::make_word(&s, Some(quote_start))))
} }
// numbers // numbers
'0'...'9' => { '0'...'9' => {
@ -296,10 +330,45 @@ impl<'a> Tokenizer<'a> {
')' => self.consume_and_return(chars, Token::RParen), ')' => self.consume_and_return(chars, Token::RParen),
',' => self.consume_and_return(chars, Token::Comma), ',' => self.consume_and_return(chars, Token::Comma),
// operators // operators
'-' => {
chars.next(); // consume the '-'
match chars.peek() {
Some('-') => {
chars.next(); // consume the second '-', starting a single-line comment
let mut s = String::new();
loop {
match chars.next() {
Some(ch) if ch != '\n' => {
s.push(ch);
}
other => {
if other.is_some() {
s.push('\n');
}
break Ok(Some(Token::Whitespace(
Whitespace::SingleLineComment(s),
)));
}
}
}
}
// a regular '-' operator
_ => Ok(Some(Token::Minus)),
}
}
'/' => {
chars.next(); // consume the '/'
match chars.peek() {
Some('*') => {
chars.next(); // consume the '*', starting a multi-line comment
self.tokenize_multiline_comment(chars)
}
// a regular '/' operator
_ => Ok(Some(Token::Div)),
}
}
'+' => self.consume_and_return(chars, Token::Plus), '+' => self.consume_and_return(chars, Token::Plus),
'-' => self.consume_and_return(chars, Token::Minus),
'*' => self.consume_and_return(chars, Token::Mult), '*' => self.consume_and_return(chars, Token::Mult),
'/' => self.consume_and_return(chars, Token::Div),
'%' => self.consume_and_return(chars, Token::Mod), '%' => self.consume_and_return(chars, Token::Mod),
'=' => self.consume_and_return(chars, Token::Eq), '=' => self.consume_and_return(chars, Token::Eq),
'.' => self.consume_and_return(chars, Token::Period), '.' => self.consume_and_return(chars, Token::Period),
@ -366,6 +435,75 @@ impl<'a> Tokenizer<'a> {
} }
} }
/// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars>) -> String {
let mut s = String::new();
s.push(first_char);
while let Some(&ch) = chars.peek() {
if self.dialect.is_identifier_part(ch) {
chars.next(); // consume
s.push(ch);
} else {
break;
}
}
s
}
/// Read a single quoted string, starting with the opening quote.
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars>) -> String {
//TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote
//TODO: handle 'string' <white space> 'string continuation'
let mut s = String::new();
chars.next(); // consume the opening quote
while let Some(&ch) = chars.peek() {
match ch {
'\'' => {
chars.next(); // consume
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
}
}
s
}
fn tokenize_multiline_comment(
&self,
chars: &mut Peekable<Chars>,
) -> Result<Option<Token>, TokenizerError> {
let mut s = String::new();
let mut maybe_closing_comment = false;
// TODO: deal with nested comments
loop {
match chars.next() {
Some(ch) => {
if maybe_closing_comment {
if ch == '/' {
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
} else {
s.push('*');
}
}
maybe_closing_comment = ch == '*';
if !maybe_closing_comment {
s.push(ch);
}
}
None => {
break Err(TokenizerError(
"Unexpected EOF while in a multi-line comment".to_string(),
));
}
}
}
}
fn consume_and_return( fn consume_and_return(
&self, &self,
chars: &mut Peekable<Chars>, chars: &mut Peekable<Chars>,
@ -389,7 +527,7 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")), Token::Number(String::from("1")),
]; ];
@ -405,9 +543,9 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("sqrt")), Token::make_word("sqrt", None),
Token::LParen, Token::LParen,
Token::Number(String::from("1")), Token::Number(String::from("1")),
Token::RParen, Token::RParen,
@ -424,23 +562,23 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("FROM")), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("customer")), Token::make_word("customer", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("WHERE")), Token::make_keyword("WHERE"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("id")), Token::make_word("id", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Eq, Token::Eq,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")), Token::Number(String::from("1")),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("LIMIT")), Token::make_keyword("LIMIT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Number(String::from("5")), Token::Number(String::from("5")),
]; ];
@ -456,17 +594,17 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Keyword(String::from("SELECT")), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("FROM")), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("customer")), Token::make_word("customer", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword(String::from("WHERE")), Token::make_keyword("WHERE"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Identifier(String::from("salary")), Token::make_word("salary", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Neq, Token::Neq,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
@ -491,7 +629,7 @@ mod tests {
Token::Char('ط'), Token::Char('ط'),
Token::Char('ف'), Token::Char('ف'),
Token::Char('ى'), Token::Char('ى'),
Token::Identifier("h".to_string()), Token::make_word("h", None),
]; ];
compare(expected, tokens); compare(expected, tokens);
} }
@ -507,20 +645,20 @@ mod tests {
let expected = vec![ let expected = vec![
Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline),
Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline),
Token::Keyword("SELECT".into()), Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Mult, Token::Mult,
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("FROM".into()), Token::make_keyword("FROM"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("TABLE".into()), Token::make_keyword("table"),
Token::Whitespace(Whitespace::Tab), Token::Whitespace(Whitespace::Tab),
Token::Char('م'), Token::Char('م'),
Token::Char('ص'), Token::Char('ص'),
Token::Char('ط'), Token::Char('ط'),
Token::Char('ف'), Token::Char('ف'),
Token::Char('ى'), Token::Char('ى'),
Token::Identifier("h".to_string()), Token::make_word("h", None),
]; ];
compare(expected, tokens); compare(expected, tokens);
} }
@ -533,16 +671,78 @@ mod tests {
let tokens = tokenizer.tokenize().unwrap(); let tokens = tokenizer.tokenize().unwrap();
let expected = vec![ let expected = vec![
Token::Identifier(String::from("a")), Token::make_word("a", None),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("IS".to_string()), Token::make_keyword("IS"),
Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space),
Token::Keyword("NULL".to_string()), Token::make_keyword("NULL"),
]; ];
compare(expected, tokens); compare(expected, tokens);
} }
#[test]
fn tokenize_comment() {
let sql = String::from("0--this is a comment\n1");
let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Number("0".to_string()),
Token::Whitespace(Whitespace::SingleLineComment(
"this is a comment\n".to_string(),
)),
Token::Number("1".to_string()),
];
compare(expected, tokens);
}
#[test]
fn tokenize_comment_at_eof() {
let sql = String::from("--this is a comment");
let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment(
"this is a comment".to_string(),
))];
compare(expected, tokens);
}
#[test]
fn tokenize_multiline_comment() {
let sql = String::from("0/*multi-line\n* /comment*/1");
let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Number("0".to_string()),
Token::Whitespace(Whitespace::MultiLineComment(
"multi-line\n* /comment".to_string(),
)),
Token::Number("1".to_string()),
];
compare(expected, tokens);
}
#[test]
fn tokenize_multiline_comment_with_even_asterisks() {
let sql = String::from("\n/** Comment **/\n");
let dialect = GenericSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Whitespace(Whitespace::Newline),
Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
Token::Whitespace(Whitespace::Newline),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) { fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------"); //println!("------------------------------");
//println!("tokens = {:?}", actual); //println!("tokens = {:?}", actual);

View file

@ -4,25 +4,19 @@ extern crate sqlparser;
use sqlparser::dialect::AnsiSqlDialect; use sqlparser::dialect::AnsiSqlDialect;
use sqlparser::sqlast::*; use sqlparser::sqlast::*;
use sqlparser::sqlparser::*; use sqlparser::sqlparser::*;
use sqlparser::sqltokenizer::*;
#[test] #[test]
fn parse_simple_select() { fn parse_simple_select() {
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1"); let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1");
let ast = parse_sql(&sql); let ast = Parser::parse_sql(&AnsiSqlDialect {}, sql).unwrap();
match ast { assert_eq!(1, ast.len());
ASTNode::SQLSelect { projection, .. } => { match ast.first().unwrap() {
SQLStatement::SQLSelect(SQLQuery {
body: SQLSetExpr::Select(SQLSelect { projection, .. }),
..
}) => {
assert_eq!(3, projection.len()); assert_eq!(3, projection.len());
} }
_ => assert!(false), _ => assert!(false),
} }
} }
fn parse_sql(sql: &str) -> ASTNode {
let dialect = AnsiSqlDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let mut parser = Parser::new(tokens);
let ast = parser.parse().unwrap();
ast
}

File diff suppressed because it is too large Load diff

View file

@ -13,34 +13,25 @@ fn test_prev_index() {
let sql: &str = "SELECT version()"; let sql: &str = "SELECT version()";
let mut parser = parser(sql); let mut parser = parser(sql);
assert_eq!(parser.prev_token(), None); assert_eq!(parser.prev_token(), None);
assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into()))); assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT")));
assert_eq!( assert_eq!(parser.next_token(), Some(Token::make_word("version", None)));
parser.next_token(), assert_eq!(parser.prev_token(), Some(Token::make_word("version", None)));
Some(Token::Identifier("version".into())) assert_eq!(parser.peek_token(), Some(Token::make_word("version", None)));
); assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT")));
assert_eq!(
parser.prev_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(
parser.peek_token(),
Some(Token::Identifier("version".into()))
);
assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into())));
assert_eq!(parser.prev_token(), None); assert_eq!(parser.prev_token(), None);
} }
#[test] #[test]
fn parse_simple_insert() { fn parse_simple_insert() {
let sql = String::from("INSERT INTO customer VALUES(1, 2, 3)"); let sql = String::from("INSERT INTO customer VALUES(1, 2, 3)");
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLInsert { SQLStatement::SQLInsert {
table_name, table_name,
columns, columns,
values, values,
.. ..
} => { } => {
assert_eq!(table_name, "customer"); assert_eq!(table_name.to_string(), "customer");
assert!(columns.is_empty()); assert!(columns.is_empty());
assert_eq!( assert_eq!(
vec![vec![ vec![vec![
@ -58,14 +49,14 @@ fn parse_simple_insert() {
#[test] #[test]
fn parse_common_insert() { fn parse_common_insert() {
let sql = String::from("INSERT INTO public.customer VALUES(1, 2, 3)"); let sql = String::from("INSERT INTO public.customer VALUES(1, 2, 3)");
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLInsert { SQLStatement::SQLInsert {
table_name, table_name,
columns, columns,
values, values,
.. ..
} => { } => {
assert_eq!(table_name, "public.customer"); assert_eq!(table_name.to_string(), "public.customer");
assert!(columns.is_empty()); assert!(columns.is_empty());
assert_eq!( assert_eq!(
vec![vec![ vec![vec![
@ -83,14 +74,14 @@ fn parse_common_insert() {
#[test] #[test]
fn parse_complex_insert() { fn parse_complex_insert() {
let sql = String::from("INSERT INTO db.public.customer VALUES(1, 2, 3)"); let sql = String::from("INSERT INTO db.public.customer VALUES(1, 2, 3)");
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLInsert { SQLStatement::SQLInsert {
table_name, table_name,
columns, columns,
values, values,
.. ..
} => { } => {
assert_eq!(table_name, "db.public.customer"); assert_eq!(table_name.to_string(), "db.public.customer");
assert!(columns.is_empty()); assert!(columns.is_empty());
assert_eq!( assert_eq!(
vec![vec![ vec![vec![
@ -108,21 +99,28 @@ fn parse_complex_insert() {
#[test] #[test]
fn parse_invalid_table_name() { fn parse_invalid_table_name() {
let mut parser = parser("db.public..customer"); let mut parser = parser("db.public..customer");
let ast = parser.parse_tablename(); let ast = parser.parse_object_name();
assert!(ast.is_err());
}
#[test]
fn parse_no_table_name() {
let mut parser = parser("");
let ast = parser.parse_object_name();
assert!(ast.is_err()); assert!(ast.is_err());
} }
#[test] #[test]
fn parse_insert_with_columns() { fn parse_insert_with_columns() {
let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)"); let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)");
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLInsert { SQLStatement::SQLInsert {
table_name, table_name,
columns, columns,
values, values,
.. ..
} => { } => {
assert_eq!(table_name, "public.customer"); assert_eq!(table_name.to_string(), "public.customer");
assert_eq!( assert_eq!(
columns, columns,
vec!["id".to_string(), "name".to_string(), "active".to_string()] vec!["id".to_string(), "name".to_string(), "active".to_string()]
@ -143,8 +141,7 @@ fn parse_insert_with_columns() {
#[test] #[test]
fn parse_insert_invalid() { fn parse_insert_invalid() {
let sql = String::from("INSERT public.customer (id, name, active) VALUES (1, 2, 3)"); let sql = String::from("INSERT public.customer (id, name, active) VALUES (1, 2, 3)");
let mut parser = parser(&sql); match Parser::parse_sql(&PostgreSqlDialect {}, sql) {
match parser.parse() {
Err(_) => {} Err(_) => {}
_ => assert!(false), _ => assert!(false),
} }
@ -165,9 +162,9 @@ fn parse_create_table_with_defaults() {
last_update timestamp without time zone DEFAULT now() NOT NULL, last_update timestamp without time zone DEFAULT now() NOT NULL,
active integer NOT NULL)", active integer NOT NULL)",
); );
match parse_sql(&sql) { match one_statement_parses_to(&sql, "") {
ASTNode::SQLCreateTable { name, columns } => { SQLStatement::SQLCreateTable { name, columns } => {
assert_eq!("public.customer", name); assert_eq!("public.customer", name.to_string());
assert_eq!(10, columns.len()); assert_eq!(10, columns.len());
let c_name = &columns[0]; let c_name = &columns[0];
@ -206,10 +203,9 @@ fn parse_create_table_from_pg_dump() {
release_year public.year, release_year public.year,
active integer active integer
)"); )");
let ast = parse_sql(&sql); match one_statement_parses_to(&sql, "") {
match ast { SQLStatement::SQLCreateTable { name, columns } => {
ASTNode::SQLCreateTable { name, columns } => { assert_eq!("public.customer", name.to_string());
assert_eq!("public.customer", name);
let c_customer_id = &columns[0]; let c_customer_id = &columns[0];
assert_eq!("customer_id", c_customer_id.name); assert_eq!("customer_id", c_customer_id.name);
@ -228,7 +224,7 @@ fn parse_create_table_from_pg_dump() {
let c_create_date1 = &columns[8]; let c_create_date1 = &columns[8];
assert_eq!( assert_eq!(
Some(Box::new(ASTNode::SQLCast { Some(ASTNode::SQLCast {
expr: Box::new(ASTNode::SQLCast { expr: Box::new(ASTNode::SQLCast {
expr: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( expr: Box::new(ASTNode::SQLValue(Value::SingleQuotedString(
"now".to_string() "now".to_string()
@ -236,13 +232,16 @@ fn parse_create_table_from_pg_dump() {
data_type: SQLType::Text data_type: SQLType::Text
}), }),
data_type: SQLType::Date data_type: SQLType::Date
})), }),
c_create_date1.default c_create_date1.default
); );
let c_release_year = &columns[10]; let c_release_year = &columns[10];
assert_eq!( assert_eq!(
SQLType::Custom("public.year".to_string()), SQLType::Custom(SQLObjectName(vec![
"public".to_string(),
"year".to_string()
])),
c_release_year.data_type c_release_year.data_type
); );
} }
@ -261,9 +260,9 @@ fn parse_create_table_with_inherit() {
use_metric boolean DEFAULT true\ use_metric boolean DEFAULT true\
)", )",
); );
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLCreateTable { name, columns } => { SQLStatement::SQLCreateTable { name, columns } => {
assert_eq!("bazaar.settings", name); assert_eq!("bazaar.settings", name.to_string());
let c_name = &columns[0]; let c_name = &columns[0];
assert_eq!("settings_id", c_name.name); assert_eq!("settings_id", c_name.name);
@ -290,9 +289,9 @@ fn parse_alter_table_constraint_primary_key() {
ALTER TABLE bazaar.address \ ALTER TABLE bazaar.address \
ADD CONSTRAINT address_pkey PRIMARY KEY (address_id)", ADD CONSTRAINT address_pkey PRIMARY KEY (address_id)",
); );
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLAlterTable { name, .. } => { SQLStatement::SQLAlterTable { name, .. } => {
assert_eq!(name, "bazaar.address"); assert_eq!(name.to_string(), "bazaar.address");
} }
_ => assert!(false), _ => assert!(false),
} }
@ -303,9 +302,9 @@ fn parse_alter_table_constraint_foreign_key() {
let sql = String::from("\ let sql = String::from("\
ALTER TABLE public.customer \ ALTER TABLE public.customer \
ADD CONSTRAINT customer_address_id_fkey FOREIGN KEY (address_id) REFERENCES public.address(address_id)"); ADD CONSTRAINT customer_address_id_fkey FOREIGN KEY (address_id) REFERENCES public.address(address_id)");
match verified(&sql) { match verified_stmt(&sql) {
ASTNode::SQLAlterTable { name, .. } => { SQLStatement::SQLAlterTable { name, .. } => {
assert_eq!(name, "public.customer"); assert_eq!(name.to_string(), "public.customer");
} }
_ => assert!(false), _ => assert!(false),
} }
@ -333,7 +332,7 @@ Kwara & Kogi
PHP USD $ PHP USD $
\N Some other value \N Some other value
\\."#); \\."#);
let ast = parse_sql(&sql); let ast = one_statement_parses_to(&sql, "");
println!("{:#?}", ast); println!("{:#?}", ast);
//assert_eq!(sql, ast.to_string()); //assert_eq!(sql, ast.to_string());
} }
@ -341,7 +340,7 @@ PHP ₱ USD $
#[test] #[test]
fn parse_timestamps_example() { fn parse_timestamps_example() {
let sql = "2016-02-15 09:43:33"; let sql = "2016-02-15 09:43:33";
let _ = parse_sql(sql); let _ = parse_sql_expr(sql);
//TODO add assertion //TODO add assertion
//assert_eq!(sql, ast.to_string()); //assert_eq!(sql, ast.to_string());
} }
@ -349,7 +348,7 @@ fn parse_timestamps_example() {
#[test] #[test]
fn parse_timestamps_with_millis_example() { fn parse_timestamps_with_millis_example() {
let sql = "2017-11-02 19:15:42.308637"; let sql = "2017-11-02 19:15:42.308637";
let _ = parse_sql(sql); let _ = parse_sql_expr(sql);
//TODO add assertion //TODO add assertion
//assert_eq!(sql, ast.to_string()); //assert_eq!(sql, ast.to_string());
} }
@ -357,27 +356,43 @@ fn parse_timestamps_with_millis_example() {
#[test] #[test]
fn parse_example_value() { fn parse_example_value() {
let sql = "SARAH.LEWIS@sakilacustomer.org"; let sql = "SARAH.LEWIS@sakilacustomer.org";
let ast = parse_sql(sql); let ast = parse_sql_expr(sql);
assert_eq!(sql, ast.to_string()); assert_eq!(sql, ast.to_string());
} }
#[test] #[test]
fn parse_function_now() { fn parse_function_now() {
let sql = "now()"; let sql = "now()";
let ast = parse_sql(sql); let ast = parse_sql_expr(sql);
assert_eq!(sql, ast.to_string()); assert_eq!(sql, ast.to_string());
} }
fn verified(query: &str) -> ASTNode { fn verified_stmt(query: &str) -> SQLStatement {
let ast = parse_sql(query); one_statement_parses_to(query, query)
assert_eq!(query, &ast.to_string());
ast
} }
fn parse_sql(sql: &str) -> ASTNode { /// Ensures that `sql` parses as a single statement, optionally checking that
/// converting AST back to string equals to `canonical` (unless an empty string
/// is provided).
fn one_statement_parses_to(sql: &str, canonical: &str) -> SQLStatement {
let mut statements = parse_sql_statements(&sql).unwrap();
assert_eq!(statements.len(), 1);
let only_statement = statements.pop().unwrap();
if !canonical.is_empty() {
assert_eq!(canonical, only_statement.to_string())
}
only_statement
}
fn parse_sql_statements(sql: &str) -> Result<Vec<SQLStatement>, ParserError> {
Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string())
}
fn parse_sql_expr(sql: &str) -> ASTNode {
debug!("sql: {}", sql); debug!("sql: {}", sql);
let mut parser = parser(sql); let mut parser = parser(sql);
let ast = parser.parse().unwrap(); let ast = parser.parse_expr().unwrap();
ast ast
} }