diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml new file mode 100644 index 00000000..c851bff3 --- /dev/null +++ b/.github/workflows/license.yml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: license + +# trigger for all PRs and changes to main +on: + push: + branches: + - main + pull_request: + +jobs: + + rat: + name: Release Audit Tool (RAT) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.8 + - name: Audit licenses + run: ./dev/release/run-rat.sh . diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b5744e86..3abf9d38 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,6 +19,9 @@ name: Rust on: [push, pull_request] +permissions: + contents: read + jobs: codestyle: @@ -85,11 +88,8 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: ${{ matrix.rust }} + - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 - name: Install Tarpaulin - uses: actions-rs/install@v0.1 - with: - crate: cargo-tarpaulin - version: 0.14.2 - use-tool-cache: true + run: cargo install cargo-tarpaulin - name: Test run: cargo test --all-features diff --git a/CHANGELOG.md b/CHANGELOG.md index 362a637d..a5511a05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ technically be breaking and thus will result in a `0.(N+1)` version. - Unreleased: Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +- `0.56.0`: [changelog/0.56.0.md](changelog/0.56.0.md) - `0.55.0`: [changelog/0.55.0.md](changelog/0.55.0.md) - `0.54.0`: [changelog/0.54.0.md](changelog/0.54.0.md) - `0.53.0`: [changelog/0.53.0.md](changelog/0.53.0.md) diff --git a/Cargo.toml b/Cargo.toml index 99bfdc24..07e44f66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.55.0" +version = "0.57.0" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/datafusion-sqlparser-rs" documentation = "https://docs.rs/sqlparser/" diff --git a/README.md b/README.md index d18a76b5..666be17c 100644 --- a/README.md +++ b/README.md @@ -89,10 +89,14 @@ keywords, the following should hold true for all SQL: ```rust // Parse SQL +let sql = "SELECT 'hello'"; let ast = Parser::parse_sql(&GenericDialect, sql).unwrap(); // The original SQL text can be generated from the AST assert_eq!(ast[0].to_string(), sql); + +// The SQL can also be pretty-printed with newlines and indentation +assert_eq!(format!("{:#}", ast[0]), "SELECT\n 'hello'"); ``` There are still some cases in this crate where different SQL with seemingly @@ -156,7 +160,8 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users This parser is currently being used by the [DataFusion] query engine, [LocustDB], -[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. +[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], [ParadeDB], [CipherStash Proxy], +and [GreptimeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. @@ -275,3 +280,5 @@ licensed as above, without any additional terms or conditions. [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 [`Dialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html [`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html +[CipherStash Proxy]: https://github.com/cipherstash/proxy +[GreptimeDB]: https://github.com/GreptimeTeam/greptimedb diff --git a/changelog/0.56.0.md b/changelog/0.56.0.md new file mode 100644 index 00000000..b3c8a67a --- /dev/null +++ b/changelog/0.56.0.md @@ -0,0 +1,102 @@ + + +# sqlparser-rs 0.56.0 Changelog + +This release consists of 48 commits from 19 contributors. See credits at the end of this changelog for more information. + +**Other:** + +- Ignore escaped LIKE wildcards in MySQL [#1735](https://github.com/apache/datafusion-sqlparser-rs/pull/1735) (mvzink) +- Parse SET NAMES syntax in Postgres [#1752](https://github.com/apache/datafusion-sqlparser-rs/pull/1752) (mvzink) +- re-add support for nested comments in mssql [#1754](https://github.com/apache/datafusion-sqlparser-rs/pull/1754) (lovasoa) +- Extend support for INDEX parsing [#1707](https://github.com/apache/datafusion-sqlparser-rs/pull/1707) (LucaCappelletti94) +- Parse MySQL `ALTER TABLE DROP FOREIGN KEY` syntax [#1762](https://github.com/apache/datafusion-sqlparser-rs/pull/1762) (mvzink) +- add support for `with` clauses (CTEs) in `delete` statements [#1764](https://github.com/apache/datafusion-sqlparser-rs/pull/1764) (lovasoa) +- SET with a list of comma separated assignments [#1757](https://github.com/apache/datafusion-sqlparser-rs/pull/1757) (MohamedAbdeen21) +- Preserve MySQL-style `LIMIT , ` syntax [#1765](https://github.com/apache/datafusion-sqlparser-rs/pull/1765) (mvzink) +- Add support for `DROP MATERIALIZED VIEW` [#1743](https://github.com/apache/datafusion-sqlparser-rs/pull/1743) (iffyio) +- Add `CASE` and `IF` statement support [#1741](https://github.com/apache/datafusion-sqlparser-rs/pull/1741) (iffyio) +- BigQuery: Add support for `CREATE SCHEMA` options [#1742](https://github.com/apache/datafusion-sqlparser-rs/pull/1742) (iffyio) +- Snowflake: Support dollar quoted comments [#1755](https://github.com/apache/datafusion-sqlparser-rs/pull/1755) +- Add LOCK operation for ALTER TABLE [#1768](https://github.com/apache/datafusion-sqlparser-rs/pull/1768) (MohamedAbdeen21) +- Add support for `RAISE` statement [#1766](https://github.com/apache/datafusion-sqlparser-rs/pull/1766) (iffyio) +- Add GLOBAL context/modifier to SET statements [#1767](https://github.com/apache/datafusion-sqlparser-rs/pull/1767) (MohamedAbdeen21) +- Parse `SUBSTR` as alias for `SUBSTRING` [#1769](https://github.com/apache/datafusion-sqlparser-rs/pull/1769) (mvzink) +- SET statements: scope modifier for multiple assignments [#1772](https://github.com/apache/datafusion-sqlparser-rs/pull/1772) (MohamedAbdeen21) +- Support qualified column names in `MATCH AGAINST` clause [#1774](https://github.com/apache/datafusion-sqlparser-rs/pull/1774) (tomershaniii) +- Mysql: Add support for := operator [#1779](https://github.com/apache/datafusion-sqlparser-rs/pull/1779) (barsela1) +- Add cipherstash-proxy to list of users in README.md [#1782](https://github.com/apache/datafusion-sqlparser-rs/pull/1782) (coderdan) +- Fix typos [#1785](https://github.com/apache/datafusion-sqlparser-rs/pull/1785) (jayvdb) +- Add support for Databricks TIMESTAMP_NTZ. [#1781](https://github.com/apache/datafusion-sqlparser-rs/pull/1781) (romanb) +- Enable double-dot-notation for mssql. [#1787](https://github.com/apache/datafusion-sqlparser-rs/pull/1787) (romanb) +- Fix: Snowflake ALTER SESSION cannot be followed by other statements. [#1786](https://github.com/apache/datafusion-sqlparser-rs/pull/1786) (romanb) +- Add GreptimeDB to the "Users" in README [#1788](https://github.com/apache/datafusion-sqlparser-rs/pull/1788) (MichaelScofield) +- Extend snowflake grant options support [#1794](https://github.com/apache/datafusion-sqlparser-rs/pull/1794) (yoavcloud) +- Fix clippy lint on rust 1.86 [#1796](https://github.com/apache/datafusion-sqlparser-rs/pull/1796) (iffyio) +- Allow single quotes in EXTRACT() for Redshift. [#1795](https://github.com/apache/datafusion-sqlparser-rs/pull/1795) (romanb) +- MSSQL: Add support for functionality `MERGE` output clause [#1790](https://github.com/apache/datafusion-sqlparser-rs/pull/1790) (dilovancelik) +- Support additional DuckDB integer types such as HUGEINT, UHUGEINT, etc [#1797](https://github.com/apache/datafusion-sqlparser-rs/pull/1797) (alexander-beedie) +- Add support for MSSQL IF/ELSE statements. [#1791](https://github.com/apache/datafusion-sqlparser-rs/pull/1791) (romanb) +- Allow literal backslash escapes for string literals in Redshift dialect. [#1801](https://github.com/apache/datafusion-sqlparser-rs/pull/1801) (romanb) +- Add support for MySQL's STRAIGHT_JOIN join operator. [#1802](https://github.com/apache/datafusion-sqlparser-rs/pull/1802) (romanb) +- Snowflake COPY INTO target columns, select items and optional alias [#1805](https://github.com/apache/datafusion-sqlparser-rs/pull/1805) (yoavcloud) +- Fix tokenization of qualified identifiers with numeric prefix. [#1803](https://github.com/apache/datafusion-sqlparser-rs/pull/1803) (romanb) +- Add support for `INHERITS` option in `CREATE TABLE` statement [#1806](https://github.com/apache/datafusion-sqlparser-rs/pull/1806) (LucaCappelletti94) +- Add `DROP TRIGGER` support for SQL Server [#1813](https://github.com/apache/datafusion-sqlparser-rs/pull/1813) (aharpervc) +- Snowflake: support nested join without parentheses [#1799](https://github.com/apache/datafusion-sqlparser-rs/pull/1799) (barsela1) +- Add support for parenthesized subquery as `IN` predicate [#1793](https://github.com/apache/datafusion-sqlparser-rs/pull/1793) (adamchainz) +- Fix `STRAIGHT_JOIN` constraint when table alias is absent [#1812](https://github.com/apache/datafusion-sqlparser-rs/pull/1812) (killertux) +- Add support for `PRINT` statement for SQL Server [#1811](https://github.com/apache/datafusion-sqlparser-rs/pull/1811) (aharpervc) +- enable `supports_filter_during_aggregation` for Generic dialect [#1815](https://github.com/apache/datafusion-sqlparser-rs/pull/1815) (goldmedal) +- Add support for `XMLTABLE` [#1817](https://github.com/apache/datafusion-sqlparser-rs/pull/1817) (lovasoa) +- Add `CREATE FUNCTION` support for SQL Server [#1808](https://github.com/apache/datafusion-sqlparser-rs/pull/1808) (aharpervc) +- Add `OR ALTER` support for `CREATE VIEW` [#1818](https://github.com/apache/datafusion-sqlparser-rs/pull/1818) (aharpervc) +- Add `DECLARE ... CURSOR FOR` support for SQL Server [#1821](https://github.com/apache/datafusion-sqlparser-rs/pull/1821) (aharpervc) +- Handle missing login in changelog generate script [#1823](https://github.com/apache/datafusion-sqlparser-rs/pull/1823) (iffyio) +- Snowflake: Add support for `CONNECT_BY_ROOT` [#1780](https://github.com/apache/datafusion-sqlparser-rs/pull/1780) (tomershaniii) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 8 Roman Borschel + 6 Ifeanyi Ubah + 5 Andrew Harper + 5 Michael Victor Zink + 4 Mohamed Abdeen + 3 Ophir LOJKINE + 2 Luca Cappelletti + 2 Yoav Cohen + 2 bar sela + 2 tomershaniii + 1 Adam Johnson + 1 Aleksei Piianin + 1 Alexander Beedie + 1 Bruno Clemente + 1 Dan Draper + 1 DilovanCelik + 1 Jax Liu + 1 John Vandenberg + 1 LFC +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/changelog/0.57.0.md b/changelog/0.57.0.md new file mode 100644 index 00000000..200bb73a --- /dev/null +++ b/changelog/0.57.0.md @@ -0,0 +1,95 @@ + + +# sqlparser-rs 0.57.0 Changelog + +This release consists of 39 commits from 19 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: Hive: support `SORT BY` direction [#1873](https://github.com/apache/datafusion-sqlparser-rs/pull/1873) (chenkovsky) + +**Other:** + +- Support some of pipe operators [#1759](https://github.com/apache/datafusion-sqlparser-rs/pull/1759) (simonvandel) +- Added support for `DROP DOMAIN` [#1828](https://github.com/apache/datafusion-sqlparser-rs/pull/1828) (LucaCappelletti94) +- Improve support for cursors for SQL Server [#1831](https://github.com/apache/datafusion-sqlparser-rs/pull/1831) (aharpervc) +- Add all missing table options to be handled in any order [#1747](https://github.com/apache/datafusion-sqlparser-rs/pull/1747) (benrsatori) +- Add `CREATE TRIGGER` support for SQL Server [#1810](https://github.com/apache/datafusion-sqlparser-rs/pull/1810) (aharpervc) +- Added support for `CREATE DOMAIN` [#1830](https://github.com/apache/datafusion-sqlparser-rs/pull/1830) (LucaCappelletti94) +- Allow stored procedures to be defined without `BEGIN`/`END` [#1834](https://github.com/apache/datafusion-sqlparser-rs/pull/1834) (aharpervc) +- Add support for the MATCH and REGEXP binary operators [#1840](https://github.com/apache/datafusion-sqlparser-rs/pull/1840) (lovasoa) +- Fix: parsing ident starting with underscore in certain dialects [#1835](https://github.com/apache/datafusion-sqlparser-rs/pull/1835) (MohamedAbdeen21) +- implement pretty-printing with `{:#}` [#1847](https://github.com/apache/datafusion-sqlparser-rs/pull/1847) (lovasoa) +- Fix big performance issue in string serialization [#1848](https://github.com/apache/datafusion-sqlparser-rs/pull/1848) (lovasoa) +- Add support for `DENY` statements [#1836](https://github.com/apache/datafusion-sqlparser-rs/pull/1836) (aharpervc) +- Postgresql: Add `REPLICA IDENTITY` operation for `ALTER TABLE` [#1844](https://github.com/apache/datafusion-sqlparser-rs/pull/1844) (MohamedAbdeen21) +- Add support for INCLUDE/EXCLUDE NULLS for UNPIVOT [#1849](https://github.com/apache/datafusion-sqlparser-rs/pull/1849) (Vedin) +- pretty print improvements [#1851](https://github.com/apache/datafusion-sqlparser-rs/pull/1851) (lovasoa) +- fix new rust 1.87 cargo clippy warnings [#1856](https://github.com/apache/datafusion-sqlparser-rs/pull/1856) (lovasoa) +- Update criterion requirement from 0.5 to 0.6 in /sqlparser_bench [#1857](https://github.com/apache/datafusion-sqlparser-rs/pull/1857) (dependabot[bot]) +- pretty-print CREATE TABLE statements [#1854](https://github.com/apache/datafusion-sqlparser-rs/pull/1854) (lovasoa) +- pretty-print CREATE VIEW statements [#1855](https://github.com/apache/datafusion-sqlparser-rs/pull/1855) (lovasoa) +- Handle optional datatypes properly in `CREATE FUNCTION` statements [#1826](https://github.com/apache/datafusion-sqlparser-rs/pull/1826) (LucaCappelletti94) +- Mysql: Add `SRID` column option [#1852](https://github.com/apache/datafusion-sqlparser-rs/pull/1852) (MohamedAbdeen21) +- Add support for table valued functions for SQL Server [#1839](https://github.com/apache/datafusion-sqlparser-rs/pull/1839) (aharpervc) +- Keep the COLUMN keyword only if it exists when dropping the column [#1862](https://github.com/apache/datafusion-sqlparser-rs/pull/1862) (git-hulk) +- Add support for parameter default values in SQL Server [#1866](https://github.com/apache/datafusion-sqlparser-rs/pull/1866) (aharpervc) +- Add support for `TABLESAMPLE` pipe operator [#1860](https://github.com/apache/datafusion-sqlparser-rs/pull/1860) (hendrikmakait) +- Adds support for mysql's drop index [#1864](https://github.com/apache/datafusion-sqlparser-rs/pull/1864) (dmzmk) +- Fix: GROUPING SETS accept values without parenthesis [#1867](https://github.com/apache/datafusion-sqlparser-rs/pull/1867) (Vedin) +- Add ICEBERG keyword support to ALTER TABLE statement [#1869](https://github.com/apache/datafusion-sqlparser-rs/pull/1869) (osipovartem) +- MySQL: Support `index_name` in FK constraints [#1871](https://github.com/apache/datafusion-sqlparser-rs/pull/1871) (MohamedAbdeen21) +- Postgres: Apply `ONLY` keyword per table in TRUNCATE stmt [#1872](https://github.com/apache/datafusion-sqlparser-rs/pull/1872) (MohamedAbdeen21) +- Fix `CASE` expression spans [#1874](https://github.com/apache/datafusion-sqlparser-rs/pull/1874) (eliaperantoni) +- MySQL: `[[NOT] ENFORCED]` in CHECK constraint [#1870](https://github.com/apache/datafusion-sqlparser-rs/pull/1870) (MohamedAbdeen21) +- Add support for `CREATE SCHEMA WITH ( )` [#1877](https://github.com/apache/datafusion-sqlparser-rs/pull/1877) (utay) +- Add support for `ALTER TABLE DROP INDEX` [#1865](https://github.com/apache/datafusion-sqlparser-rs/pull/1865) (vimko) +- chore: Replace archived actions-rs/install action [#1876](https://github.com/apache/datafusion-sqlparser-rs/pull/1876) (assignUser) +- Allow `IF NOT EXISTS` after table name for Snowflake [#1881](https://github.com/apache/datafusion-sqlparser-rs/pull/1881) (bombsimon) +- Support `DISTINCT AS { STRUCT | VALUE }` for BigQuery [#1880](https://github.com/apache/datafusion-sqlparser-rs/pull/1880) (bombsimon) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 7 Ophir LOJKINE + 6 Andrew Harper + 6 Mohamed Abdeen + 3 Luca Cappelletti + 2 Denys Tsomenko + 2 Simon Sawert + 1 Andrew Lamb + 1 Artem Osipov + 1 Chen Chongchen + 1 Dmitriy Mazurin + 1 Elia Perantoni + 1 Hendrik Makait + 1 Jacob Wujciak-Jens + 1 Simon Vandel Sillesen + 1 Yannick Utard + 1 benrsatori + 1 dependabot[bot] + 1 hulk + 1 vimko +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 52fd2e54..6f2b7c41 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -28,7 +28,8 @@ def print_pulls(repo_name, title, pulls): print() for (pull, commit) in pulls: url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) - print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login)) + author = f"({commit.author.login})" if commit.author else '' + print("- {} [#{}]({}) {}".format(pull.title, pull.number, url, author)) print() @@ -161,4 +162,4 @@ def cli(args=None): generate_changelog(repo, project, args.tag1, args.tag2, args.version) if __name__ == "__main__": - cli() \ No newline at end of file + cli() diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 562eec2f..280b1bce 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -1,7 +1,8 @@ -# Files to exclude from the Apache Rat (license) check -.gitignore .tool-versions +target/* +**.gitignore +rat.txt dev/release/rat_exclude_files.txt -fuzz/.gitignore sqlparser_bench/img/flamegraph.svg - +**Cargo.lock +filtered_rat.txt diff --git a/examples/cli.rs b/examples/cli.rs index 0252fca7..08a40a6d 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -63,7 +63,7 @@ $ cargo run --example cli - [--dialectname] }; let contents = if filename == "-" { - println!("Parsing from stdin using {:?}", dialect); + println!("Parsing from stdin using {dialect:?}"); let mut buf = Vec::new(); stdin() .read_to_end(&mut buf) diff --git a/sqlparser_bench/Cargo.toml b/sqlparser_bench/Cargo.toml index 2c1f0ae4..01c59be7 100644 --- a/sqlparser_bench/Cargo.toml +++ b/sqlparser_bench/Cargo.toml @@ -26,7 +26,7 @@ edition = "2018" sqlparser = { path = "../" } [dev-dependencies] -criterion = "0.5" +criterion = "0.6" [[bench]] name = "sqlparser_bench" diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index a7768cbc..6132ee43 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -45,30 +45,29 @@ fn basic_queries(c: &mut Criterion) { let large_statement = { let expressions = (0..1000) - .map(|n| format!("FN_{}(COL_{})", n, n)) + .map(|n| format!("FN_{n}(COL_{n})")) .collect::>() .join(", "); let tables = (0..1000) - .map(|n| format!("TABLE_{}", n)) + .map(|n| format!("TABLE_{n}")) .collect::>() .join(" JOIN "); let where_condition = (0..1000) - .map(|n| format!("COL_{} = {}", n, n)) + .map(|n| format!("COL_{n} = {n}")) .collect::>() .join(" OR "); let order_condition = (0..1000) - .map(|n| format!("COL_{} DESC", n)) + .map(|n| format!("COL_{n} DESC")) .collect::>() .join(", "); format!( - "SELECT {} FROM {} WHERE {} ORDER BY {}", - expressions, tables, where_condition, order_condition + "SELECT {expressions} FROM {tables} WHERE {where_condition} ORDER BY {order_condition}" ) }; group.bench_function("parse_large_statement", |b| { - b.iter(|| Parser::parse_sql(&dialect, criterion::black_box(large_statement.as_str()))); + b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(large_statement.as_str()))); }); let large_statement = Parser::parse_sql(&dialect, large_statement.as_str()) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 57bc6744..0897f2db 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -36,7 +36,7 @@ pub enum EnumMember { Name(String), /// ClickHouse allows to specify an integer value for each enum value. /// - /// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum) + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum) NamedValue(String, Expr), } @@ -45,292 +45,327 @@ pub enum EnumMember { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DataType { - /// Table type in [postgresql]. e.g. CREATE FUNCTION RETURNS TABLE(...) + /// Table type in [PostgreSQL], e.g. CREATE FUNCTION RETURNS TABLE(...). /// - /// [postgresql]: https://www.postgresql.org/docs/15/sql-createfunction.html - Table(Vec), - /// Fixed-length character type e.g. CHARACTER(10) + /// [PostgreSQL]: https://www.postgresql.org/docs/15/sql-createfunction.html + /// [MsSQL]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#c-create-a-multi-statement-table-valued-function + Table(Option>), + /// Table type with a name, e.g. CREATE FUNCTION RETURNS @result TABLE(...). + /// + /// [MsSQl]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#table + NamedTable { + /// Table name. + name: ObjectName, + /// Table columns. + columns: Vec, + }, + /// Fixed-length character type, e.g. CHARACTER(10). Character(Option), - /// Fixed-length char type e.g. CHAR(10) + /// Fixed-length char type, e.g. CHAR(10). Char(Option), - /// Character varying type e.g. CHARACTER VARYING(10) + /// Character varying type, e.g. CHARACTER VARYING(10). CharacterVarying(Option), - /// Char varying type e.g. CHAR VARYING(10) + /// Char varying type, e.g. CHAR VARYING(10). CharVarying(Option), - /// Variable-length character type e.g. VARCHAR(10) + /// Variable-length character type, e.g. VARCHAR(10). Varchar(Option), - /// Variable-length character type e.g. NVARCHAR(10) + /// Variable-length character type, e.g. NVARCHAR(10). Nvarchar(Option), - /// Uuid type + /// Uuid type. Uuid, - /// Large character object with optional length e.g. CHARACTER LARGE OBJECT, CHARACTER LARGE OBJECT(1000), [standard] + /// Large character object with optional length, + /// e.g. CHARACTER LARGE OBJECT, CHARACTER LARGE OBJECT(1000), [SQL Standard]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type CharacterLargeObject(Option), - /// Large character object with optional length e.g. CHAR LARGE OBJECT, CHAR LARGE OBJECT(1000), [standard] + /// Large character object with optional length, + /// e.g. CHAR LARGE OBJECT, CHAR LARGE OBJECT(1000), [SQL Standard]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type CharLargeObject(Option), - /// Large character object with optional length e.g. CLOB, CLOB(1000), [standard] + /// Large character object with optional length, + /// e.g. CLOB, CLOB(1000), [SQL Standard]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type /// [Oracle]: https://docs.oracle.com/javadb/10.10.1.2/ref/rrefclob.html Clob(Option), - /// Fixed-length binary type with optional length e.g. [standard], [MS SQL Server] + /// Fixed-length binary type with optional length, + /// see [SQL Standard], [MS SQL Server]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type /// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16 Binary(Option), - /// Variable-length binary with optional length type e.g. [standard], [MS SQL Server] + /// Variable-length binary with optional length type, + /// see [SQL Standard], [MS SQL Server]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type /// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16 Varbinary(Option), - /// Large binary object with optional length e.g. BLOB, BLOB(1000), [standard], [Oracle] + /// Large binary object with optional length, + /// see [SQL Standard], [Oracle]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type /// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html Blob(Option), - /// [MySQL] blob with up to 2**8 bytes + /// [MySQL] blob with up to 2**8 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html TinyBlob, - /// [MySQL] blob with up to 2**24 bytes + /// [MySQL] blob with up to 2**24 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html MediumBlob, - /// [MySQL] blob with up to 2**32 bytes + /// [MySQL] blob with up to 2**32 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html LongBlob, /// Variable-length binary data with optional length. /// - /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type Bytes(Option), - /// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1] + /// Numeric type with optional precision and scale, e.g. NUMERIC(10,2), [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type Numeric(ExactNumberInfo), - /// Decimal type with optional precision and scale e.g. DECIMAL(10,2), [standard][1] + /// Decimal type with optional precision and scale, e.g. DECIMAL(10,2), [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type Decimal(ExactNumberInfo), - /// [BigNumeric] type used in BigQuery + /// [BigNumeric] type used in BigQuery. /// /// [BigNumeric]: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#bignumeric_literals BigNumeric(ExactNumberInfo), - /// This is alias for `BigNumeric` type used in BigQuery + /// This is alias for `BigNumeric` type used in BigQuery. /// /// [BigDecimal]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types BigDecimal(ExactNumberInfo), - /// Dec type with optional precision and scale e.g. DEC(10,2), [standard][1] + /// Dec type with optional precision and scale, e.g. DEC(10,2), [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type Dec(ExactNumberInfo), - /// Floating point with optional precision e.g. FLOAT(8) + /// Floating point with optional precision, e.g. FLOAT(8). Float(Option), - /// Tiny integer with optional display width e.g. TINYINT or TINYINT(3) + /// Tiny integer with optional display width, e.g. TINYINT or TINYINT(3). TinyInt(Option), - /// Unsigned tiny integer with optional display width e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED + /// Unsigned tiny integer with optional display width, + /// e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED. TinyIntUnsigned(Option), - /// Int2 as alias for SmallInt in [postgresql] - /// Note: Int2 mean 2 bytes in postgres (not 2 bits) - /// Int2 with optional display width e.g. INT2 or INT2(5) + /// Unsigned tiny integer, e.g. UTINYINT + UTinyInt, + /// Int2 is an alias for SmallInt in [PostgreSQL]. + /// Note: Int2 means 2 bytes in PostgreSQL (not 2 bits). + /// Int2 with optional display width, e.g. INT2 or INT2(5). /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Int2(Option), - /// Unsigned Int2 with optional display width e.g. INT2 UNSIGNED or INT2(5) UNSIGNED + /// Unsigned Int2 with optional display width, e.g. INT2 UNSIGNED or INT2(5) UNSIGNED. Int2Unsigned(Option), - /// Small integer with optional display width e.g. SMALLINT or SMALLINT(5) + /// Small integer with optional display width, e.g. SMALLINT or SMALLINT(5). SmallInt(Option), - /// Unsigned small integer with optional display width e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED + /// Unsigned small integer with optional display width, + /// e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED. SmallIntUnsigned(Option), - /// MySQL medium integer ([1]) with optional display width e.g. MEDIUMINT or MEDIUMINT(5) + /// Unsigned small integer, e.g. USMALLINT. + USmallInt, + /// MySQL medium integer ([1]) with optional display width, + /// e.g. MEDIUMINT or MEDIUMINT(5). /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html MediumInt(Option), - /// Unsigned medium integer ([1]) with optional display width e.g. MEDIUMINT UNSIGNED or MEDIUMINT(5) UNSIGNED + /// Unsigned medium integer ([1]) with optional display width, + /// e.g. MEDIUMINT UNSIGNED or MEDIUMINT(5) UNSIGNED. /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html MediumIntUnsigned(Option), - /// Int with optional display width e.g. INT or INT(11) + /// Int with optional display width, e.g. INT or INT(11). Int(Option), - /// Int4 as alias for Integer in [postgresql] - /// Note: Int4 mean 4 bytes in postgres (not 4 bits) - /// Int4 with optional display width e.g. Int4 or Int4(11) + /// Int4 is an alias for Integer in [PostgreSQL]. + /// Note: Int4 means 4 bytes in PostgreSQL (not 4 bits). + /// Int4 with optional display width, e.g. Int4 or Int4(11). /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Int4(Option), - /// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse] - /// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits) - /// Int8 with optional display width e.g. INT8 or INT8(11) - /// Note: Int8 mean 8 bits in [clickhouse] + /// Int8 is an alias for BigInt in [PostgreSQL] and Integer type in [ClickHouse]. + /// Int8 with optional display width, e.g. INT8 or INT8(11). + /// Note: Int8 means 8 bytes in [PostgreSQL], but 8 bits in [ClickHouse]. /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int8(Option), - /// Integer type in [clickhouse] - /// Note: Int16 mean 16 bits in [clickhouse] + /// Integer type in [ClickHouse]. + /// Note: Int16 means 16 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int16, - /// Integer type in [clickhouse] - /// Note: Int16 mean 32 bits in [clickhouse] + /// Integer type in [ClickHouse]. + /// Note: Int32 means 32 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int32, - /// Integer type in [bigquery], [clickhouse] + /// Integer type in [BigQuery], [ClickHouse]. /// - /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int64, - /// Integer type in [clickhouse] - /// Note: Int128 mean 128 bits in [clickhouse] + /// Integer type in [ClickHouse]. + /// Note: Int128 means 128 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int128, - /// Integer type in [clickhouse] - /// Note: Int256 mean 256 bits in [clickhouse] + /// Integer type in [ClickHouse]. + /// Note: Int256 means 256 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int256, - /// Integer with optional display width e.g. INTEGER or INTEGER(11) + /// Integer with optional display width, e.g. INTEGER or INTEGER(11). Integer(Option), - /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED + /// Unsigned int with optional display width, e.g. INT UNSIGNED or INT(11) UNSIGNED. IntUnsigned(Option), - /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED + /// Unsigned int4 with optional display width, e.g. INT4 UNSIGNED or INT4(11) UNSIGNED. Int4Unsigned(Option), - /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED + /// Unsigned integer with optional display width, e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED. IntegerUnsigned(Option), - /// Unsigned integer type in [clickhouse] - /// Note: UInt8 mean 8 bits in [clickhouse] + /// 128-bit integer type, e.g. HUGEINT. + HugeInt, + /// Unsigned 128-bit integer type, e.g. UHUGEINT. + UHugeInt, + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt8 means 8 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt8, - /// Unsigned integer type in [clickhouse] - /// Note: UInt16 mean 16 bits in [clickhouse] + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt16 means 16 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt16, - /// Unsigned integer type in [clickhouse] - /// Note: UInt32 mean 32 bits in [clickhouse] + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt32 means 32 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt32, - /// Unsigned integer type in [clickhouse] - /// Note: UInt64 mean 64 bits in [clickhouse] + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt64 means 64 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt64, - /// Unsigned integer type in [clickhouse] - /// Note: UInt128 mean 128 bits in [clickhouse] + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt128 means 128 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt128, - /// Unsigned integer type in [clickhouse] - /// Note: UInt256 mean 256 bits in [clickhouse] + /// Unsigned integer type in [ClickHouse]. + /// Note: UInt256 means 256 bits in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint UInt256, - /// Big integer with optional display width e.g. BIGINT or BIGINT(20) + /// Big integer with optional display width, e.g. BIGINT or BIGINT(20). BigInt(Option), - /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED + /// Unsigned big integer with optional display width, e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED. BigIntUnsigned(Option), - /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED + /// Unsigned big integer, e.g. UBIGINT. + UBigInt, + /// Unsigned Int8 with optional display width, e.g. INT8 UNSIGNED or INT8(11) UNSIGNED. Int8Unsigned(Option), - /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix: - /// `SIGNED` + /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix, + /// e.g. `SIGNED` /// /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html Signed, - /// Signed integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: - /// `SIGNED INTEGER` + /// Signed integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix, + /// e.g. `SIGNED INTEGER` /// /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html SignedInteger, - /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix: - /// `SIGNED` + /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix, + /// e.g. `SIGNED` /// /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html Unsigned, - /// Unsigned integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: - /// `UNSIGNED INTEGER` + /// Unsigned integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix, + /// e.g. `UNSIGNED INTEGER`. /// /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html UnsignedInteger, - /// Float4 as alias for Real in [postgresql] + /// Float4 is an alias for Real in [PostgreSQL]. /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Float4, - /// Floating point in [clickhouse] + /// Floating point in [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float Float32, - /// Floating point in [bigquery] + /// Floating point in [BigQuery]. /// - /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float Float64, - /// Floating point e.g. REAL + /// Floating point, e.g. REAL. Real, - /// Float8 as alias for Double in [postgresql] + /// Float8 is an alias for Double in [PostgreSQL]. /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Float8, /// Double Double(ExactNumberInfo), - /// Double PRECISION e.g. [standard], [postgresql] + /// Double Precision, see [SQL Standard], [PostgreSQL]. /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#approximate-numeric-type - /// [postgresql]: https://www.postgresql.org/docs/current/datatype-numeric.html + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#approximate-numeric-type + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype-numeric.html DoublePrecision, - /// Bool as alias for Boolean in [postgresql] + /// Bool is an alias for Boolean, see [PostgreSQL]. /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Bool, - /// Boolean + /// Boolean type. Boolean, - /// Date + /// Date type. Date, - /// Date32 with the same range as Datetime64 + /// Date32 with the same range as Datetime64. /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32 Date32, - /// Time with optional time precision and time zone information e.g. [standard][1]. + /// Time with optional time precision and time zone information, see [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type Time(Option, TimezoneInfo), - /// Datetime with optional time precision e.g. [MySQL][1]. + /// Datetime with optional time precision, see [MySQL][1]. /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html Datetime(Option), - /// Datetime with time precision and optional timezone e.g. [ClickHouse][1]. + /// Datetime with time precision and optional timezone, see [ClickHouse][1]. /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 Datetime64(u64, Option), - /// Timestamp with optional time precision and time zone information e.g. [standard][1]. + /// Timestamp with optional time precision and time zone information, see [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type Timestamp(Option, TimezoneInfo), - /// Interval + /// Databricks timestamp without time zone. See [1]. + /// + /// [1]: https://docs.databricks.com/aws/en/sql/language-manual/data-types/timestamp-ntz-type + TimestampNtz, + /// Interval type. Interval, - /// JSON type + /// JSON type. JSON, - /// Binary JSON type + /// Binary JSON type. JSONB, - /// Regclass used in postgresql serial + /// Regclass used in [PostgreSQL] serial. + /// + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html Regclass, - /// Text + /// Text type. Text, - /// [MySQL] text with up to 2**8 bytes + /// [MySQL] text with up to 2**8 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html TinyText, - /// [MySQL] text with up to 2**24 bytes + /// [MySQL] text with up to 2**24 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html MediumText, - /// [MySQL] text with up to 2**32 bytes + /// [MySQL] text with up to 2**32 bytes. /// /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/blob.html LongText, @@ -340,76 +375,85 @@ pub enum DataType { /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring FixedString(u64), - /// Bytea - Bytea, - /// Bit string, e.g. [Postgres], [MySQL], or [MSSQL] + /// Bytea type, see [PostgreSQL]. /// - /// [Postgres]: https://www.postgresql.org/docs/current/datatype-bit.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype-bit.html + Bytea, + /// Bit string, see [PostgreSQL], [MySQL], or [MSSQL]. + /// + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype-bit.html /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/bit-type.html /// [MSSQL]: https://learn.microsoft.com/en-us/sql/t-sql/data-types/bit-transact-sql?view=sql-server-ver16 Bit(Option), - /// `BIT VARYING(n)`: Variable-length bit string e.g. [Postgres] + /// `BIT VARYING(n)`: Variable-length bit string, see [PostgreSQL]. /// - /// [Postgres]: https://www.postgresql.org/docs/current/datatype-bit.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype-bit.html BitVarying(Option), - /// `VARBIT(n)`: Variable-length bit string. [Postgres] alias for `BIT VARYING` + /// `VARBIT(n)`: Variable-length bit string. [PostgreSQL] alias for `BIT VARYING`. /// - /// [Postgres]: https://www.postgresql.org/docs/current/datatype.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/datatype.html VarBit(Option), - /// - /// Custom type such as enums + /// Custom types. Custom(ObjectName, Vec), - /// Arrays + /// Arrays. Array(ArrayElemTypeDef), - /// Map + /// Map, see [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map Map(Box, Box), - /// Tuple + /// Tuple, see [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple Tuple(Vec), - /// Nested + /// Nested type, see [ClickHouse]. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested Nested(Vec), - /// Enums + /// Enum type. Enum(Vec, Option), - /// Set + /// Set type. Set(Vec), - /// Struct + /// Struct type, see [Hive], [BigQuery]. /// - /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html - /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + /// [Hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec, StructBracketKind), - /// Union + /// Union type, see [DuckDB]. /// - /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html + /// [DuckDB]: https://duckdb.org/docs/sql/data_types/union.html Union(Vec), /// Nullable - special marker NULL represents in ClickHouse as a data type. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable Nullable(Box), /// LowCardinality - changes the internal representation of other data types to be dictionary-encoded. /// - /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality + /// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality LowCardinality(Box), /// No type specified - only used with /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. Unspecified, - /// Trigger data type, returned by functions associated with triggers + /// Trigger data type, returned by functions associated with triggers, see [PostgreSQL]. /// - /// [postgresql]: https://www.postgresql.org/docs/current/plpgsql-trigger.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/plpgsql-trigger.html Trigger, - /// Any data type, used in BigQuery UDF definitions for templated parameters + /// Any data type, used in BigQuery UDF definitions for templated parameters, see [BigQuery]. /// - /// [bigquery]: https://cloud.google.com/bigquery/docs/user-defined-functions#templated-sql-udf-parameters + /// [BigQuery]: https://cloud.google.com/bigquery/docs/user-defined-functions#templated-sql-udf-parameters AnyType, - /// geometric type + /// Geometric type, see [PostgreSQL]. /// - /// [Postgres]: https://www.postgresql.org/docs/9.5/functions-geometry.html + /// [PostgreSQL]: https://www.postgresql.org/docs/9.5/functions-geometry.html GeometricType(GeometricTypeKind), + /// PostgreSQL text search vectors, see [PostgreSQL]. + /// + /// [PostgreSQL]: https://www.postgresql.org/docs/17/datatype-textsearch.html + TsVector, + /// PostgreSQL text search query, see [PostgreSQL]. + /// + /// [PostgreSQL]: https://www.postgresql.org/docs/17/datatype-textsearch.html + TsQuery, } impl fmt::Display for DataType { @@ -499,6 +543,9 @@ impl fmt::Display for DataType { DataType::Int256 => { write!(f, "Int256") } + DataType::HugeInt => { + write!(f, "HUGEINT") + } DataType::Int4Unsigned(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -517,6 +564,18 @@ impl fmt::Display for DataType { DataType::Int8Unsigned(zerofill) => { format_type_with_optional_length(f, "INT8", zerofill, true) } + DataType::UTinyInt => { + write!(f, "UTINYINT") + } + DataType::USmallInt => { + write!(f, "USMALLINT") + } + DataType::UBigInt => { + write!(f, "UBIGINT") + } + DataType::UHugeInt => { + write!(f, "UHUGEINT") + } DataType::UInt8 => { write!(f, "UInt8") } @@ -567,6 +626,7 @@ impl fmt::Display for DataType { DataType::Timestamp(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info) } + DataType::TimestampNtz => write!(f, "TIMESTAMP_NTZ"), DataType::Datetime64(precision, timezone) => { format_clickhouse_datetime_precision_and_timezone( f, @@ -606,7 +666,7 @@ impl fmt::Display for DataType { } DataType::Enum(vals, bits) => { match bits { - Some(bits) => write!(f, "ENUM{}", bits), + Some(bits) => write!(f, "ENUM{bits}"), None => write!(f, "ENUM"), }?; write!(f, "(")?; @@ -654,16 +714,16 @@ impl fmt::Display for DataType { } // ClickHouse DataType::Nullable(data_type) => { - write!(f, "Nullable({})", data_type) + write!(f, "Nullable({data_type})") } DataType::FixedString(character_length) => { - write!(f, "FixedString({})", character_length) + write!(f, "FixedString({character_length})") } DataType::LowCardinality(data_type) => { - write!(f, "LowCardinality({})", data_type) + write!(f, "LowCardinality({data_type})") } DataType::Map(key_data_type, value_data_type) => { - write!(f, "Map({}, {})", key_data_type, value_data_type) + write!(f, "Map({key_data_type}, {value_data_type})") } DataType::Tuple(fields) => { write!(f, "Tuple({})", display_comma_separated(fields)) @@ -674,8 +734,20 @@ impl fmt::Display for DataType { DataType::Unspecified => Ok(()), DataType::Trigger => write!(f, "TRIGGER"), DataType::AnyType => write!(f, "ANY TYPE"), - DataType::Table(fields) => write!(f, "TABLE({})", display_comma_separated(fields)), - DataType::GeometricType(kind) => write!(f, "{}", kind), + DataType::Table(fields) => match fields { + Some(fields) => { + write!(f, "TABLE({})", display_comma_separated(fields)) + } + None => { + write!(f, "TABLE") + } + }, + DataType::NamedTable { name, columns } => { + write!(f, "{} TABLE ({})", name, display_comma_separated(columns)) + } + DataType::GeometricType(kind) => write!(f, "{kind}"), + DataType::TsVector => write!(f, "TSVECTOR"), + DataType::TsQuery => write!(f, "TSQUERY"), } } } @@ -777,19 +849,19 @@ pub enum StructBracketKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TimezoneInfo { - /// No information about time zone. E.g., TIMESTAMP + /// No information about time zone, e.g. TIMESTAMP None, - /// Temporal type 'WITH TIME ZONE'. E.g., TIMESTAMP WITH TIME ZONE, [standard], [Oracle] + /// Temporal type 'WITH TIME ZONE', e.g. TIMESTAMP WITH TIME ZONE, [SQL Standard], [Oracle] /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type /// [Oracle]: https://docs.oracle.com/en/database/oracle/oracle-database/12.2/nlspg/datetime-data-types-and-time-zone-support.html#GUID-3F1C388E-C651-43D5-ADBC-1A49E5C2CA05 WithTimeZone, - /// Temporal type 'WITHOUT TIME ZONE'. E.g., TIME WITHOUT TIME ZONE, [standard], [Postgresql] + /// Temporal type 'WITHOUT TIME ZONE', e.g. TIME WITHOUT TIME ZONE, [SQL Standard], [Postgresql] /// - /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type + /// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type /// [Postgresql]: https://www.postgresql.org/docs/current/datatype-datetime.html WithoutTimeZone, - /// Postgresql specific `WITH TIME ZONE` formatting, for both TIME and TIMESTAMP. E.g., TIMETZ, [Postgresql] + /// Postgresql specific `WITH TIME ZONE` formatting, for both TIME and TIMESTAMP, e.g. TIMETZ, [Postgresql] /// /// [Postgresql]: https://www.postgresql.org/docs/current/datatype-datetime.html Tz, @@ -818,18 +890,18 @@ impl fmt::Display for TimezoneInfo { } /// Additional information for `NUMERIC`, `DECIMAL`, and `DEC` data types -/// following the 2016 [standard]. +/// following the 2016 [SQL Standard]. /// -/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type +/// [SQL Standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ExactNumberInfo { - /// No additional information e.g. `DECIMAL` + /// No additional information, e.g. `DECIMAL` None, - /// Only precision information e.g. `DECIMAL(10)` + /// Only precision information, e.g. `DECIMAL(10)` Precision(u64), - /// Precision and scale information e.g. `DECIMAL(10,2)` + /// Precision and scale information, e.g. `DECIMAL(10,2)` PrecisionAndScale(u64, u64), } @@ -870,7 +942,7 @@ impl fmt::Display for CharacterLength { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { CharacterLength::IntegerLength { length, unit } => { - write!(f, "{}", length)?; + write!(f, "{length}")?; if let Some(unit) = unit { write!(f, " {unit}")?; } @@ -883,7 +955,7 @@ impl fmt::Display for CharacterLength { } } -/// Possible units for characters, initially based on 2016 ANSI [standard][1]. +/// Possible units for characters, initially based on 2016 ANSI [SQL Standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -925,7 +997,7 @@ impl fmt::Display for BinaryLength { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BinaryLength::IntegerLength { length } => { - write!(f, "{}", length)?; + write!(f, "{length}")?; } BinaryLength::Max => { write!(f, "MAX")?; @@ -956,7 +1028,7 @@ pub enum ArrayElemTypeDef { /// Represents different types of geometric shapes which are commonly used in /// PostgreSQL/Redshift for spatial operations and geometry-related computations. /// -/// [Postgres]: https://www.postgresql.org/docs/9.5/functions-geometry.html +/// [PostgreSQL]: https://www.postgresql.org/docs/9.5/functions-geometry.html #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index 735ab0cc..07989407 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -173,7 +173,7 @@ impl fmt::Display for AlterRoleOperation { in_database, } => { if let Some(database_name) = in_database { - write!(f, "IN DATABASE {} ", database_name)?; + write!(f, "IN DATABASE {database_name} ")?; } match config_value { @@ -187,7 +187,7 @@ impl fmt::Display for AlterRoleOperation { in_database, } => { if let Some(database_name) = in_database { - write!(f, "IN DATABASE {} ", database_name)?; + write!(f, "IN DATABASE {database_name} ")?; } match config_name { @@ -218,15 +218,15 @@ impl fmt::Display for Use { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("USE ")?; match self { - Use::Catalog(name) => write!(f, "CATALOG {}", name), - Use::Schema(name) => write!(f, "SCHEMA {}", name), - Use::Database(name) => write!(f, "DATABASE {}", name), - Use::Warehouse(name) => write!(f, "WAREHOUSE {}", name), - Use::Role(name) => write!(f, "ROLE {}", name), + Use::Catalog(name) => write!(f, "CATALOG {name}"), + Use::Schema(name) => write!(f, "SCHEMA {name}"), + Use::Database(name) => write!(f, "DATABASE {name}"), + Use::Warehouse(name) => write!(f, "WAREHOUSE {name}"), + Use::Role(name) => write!(f, "ROLE {name}"), Use::SecondaryRoles(secondary_roles) => { - write!(f, "SECONDARY ROLES {}", secondary_roles) + write!(f, "SECONDARY ROLES {secondary_roles}") } - Use::Object(name) => write!(f, "{}", name), + Use::Object(name) => write!(f, "{name}"), Use::Default => write!(f, "DEFAULT"), } } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index bb85eb06..51e05784 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -30,22 +30,48 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::value::escape_single_quote_string; use crate::ast::{ - display_comma_separated, display_separated, CommentDef, CreateFunctionBody, + display_comma_separated, display_separated, ArgMode, CommentDef, CreateFunctionBody, CreateFunctionUsing, DataType, Expr, FunctionBehavior, FunctionCalledOnNull, - FunctionDeterminismSpecifier, FunctionParallel, Ident, MySQLColumnPosition, ObjectName, - OperateFunctionArg, OrderByExpr, ProjectionSelect, SequenceOptions, SqlOption, Tag, Value, - ValueWithSpan, + FunctionDeterminismSpecifier, FunctionParallel, Ident, IndexColumn, MySQLColumnPosition, + ObjectName, OperateFunctionArg, OrderByExpr, ProjectionSelect, SequenceOptions, SqlOption, Tag, + Value, ValueWithSpan, }; use crate::keywords::Keyword; use crate::tokenizer::Token; +/// ALTER TABLE operation REPLICA IDENTITY values +/// See [Postgres ALTER TABLE docs](https://www.postgresql.org/docs/current/sql-altertable.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ReplicaIdentity { + None, + Full, + Default, + Index(Ident), +} + +impl fmt::Display for ReplicaIdentity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ReplicaIdentity::None => f.write_str("NONE"), + ReplicaIdentity::Full => f.write_str("FULL"), + ReplicaIdentity::Default => f.write_str("DEFAULT"), + ReplicaIdentity::Index(idx) => write!(f, "USING INDEX {idx}"), + } + } +} + /// An `ALTER TABLE` (`Statement::AlterTable`) operation #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterTableOperation { - /// `ADD ` - AddConstraint(TableConstraint), + /// `ADD [NOT VALID]` + AddConstraint { + constraint: TableConstraint, + not_valid: bool, + }, /// `ADD [COLUMN] [IF NOT EXISTS] ` AddColumn { /// `[COLUMN]`. @@ -114,9 +140,10 @@ pub enum AlterTableOperation { name: Ident, drop_behavior: Option, }, - /// `DROP [ COLUMN ] [ IF EXISTS ] [ CASCADE ]` + /// `DROP [ COLUMN ] [ IF EXISTS ] [ , , ... ] [ CASCADE ]` DropColumn { - column_name: Ident, + has_column_keyword: bool, + column_names: Vec, if_exists: bool, drop_behavior: Option, }, @@ -151,8 +178,24 @@ pub enum AlterTableOperation { }, /// `DROP PRIMARY KEY` /// - /// Note: this is a MySQL-specific operation. + /// Note: this is a [MySQL]-specific operation. + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html DropPrimaryKey, + /// `DROP FOREIGN KEY ` + /// + /// Note: this is a [MySQL]-specific operation. + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html + DropForeignKey { + name: Ident, + }, + /// `DROP INDEX ` + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html + DropIndex { + name: Ident, + }, /// `ENABLE ALWAYS RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. @@ -198,6 +241,13 @@ pub enum AlterTableOperation { old_partitions: Vec, new_partitions: Vec, }, + /// REPLICA IDENTITY { DEFAULT | USING INDEX index_name | FULL | NOTHING } + /// + /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) + ReplicaIdentity { + identity: ReplicaIdentity, + }, /// Add Partitions AddPartitions { if_not_exists: bool, @@ -278,6 +328,16 @@ pub enum AlterTableOperation { equals: bool, algorithm: AlterTableAlgorithm, }, + + /// `LOCK [=] { DEFAULT | NONE | SHARED | EXCLUSIVE }` + /// + /// [MySQL]-specific table alter lock. + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html + Lock { + equals: bool, + lock: AlterTableLock, + }, /// `AUTO_INCREMENT [=] ` /// /// [MySQL]-specific table option for raising current auto increment value. @@ -287,6 +347,10 @@ pub enum AlterTableOperation { equals: bool, value: ValueWithSpan, }, + /// `VALIDATE CONSTRAINT ` + ValidateConstraint { + name: Ident, + }, } /// An `ALTER Policy` (`Statement::AlterPolicy`) operation @@ -356,6 +420,30 @@ impl fmt::Display for AlterTableAlgorithm { } } +/// [MySQL] `ALTER TABLE` lock. +/// +/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterTableLock { + Default, + None, + Shared, + Exclusive, +} + +impl fmt::Display for AlterTableLock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + Self::Default => "DEFAULT", + Self::None => "NONE", + Self::Shared => "SHARED", + Self::Exclusive => "EXCLUSIVE", + }) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -369,7 +457,7 @@ pub enum Owner { impl fmt::Display for Owner { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Owner::Ident(ident) => write!(f, "{}", ident), + Owner::Ident(ident) => write!(f, "{ident}"), Owner::CurrentRole => write!(f, "CURRENT_ROLE"), Owner::CurrentUser => write!(f, "CURRENT_USER"), Owner::SessionUser => write!(f, "SESSION_USER"), @@ -413,7 +501,16 @@ impl fmt::Display for AlterTableOperation { display_separated(new_partitions, " "), ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } ), - AlterTableOperation::AddConstraint(c) => write!(f, "ADD {c}"), + AlterTableOperation::AddConstraint { + not_valid, + constraint, + } => { + write!(f, "ADD {constraint}")?; + if *not_valid { + write!(f, " NOT VALID")?; + } + Ok(()) + } AlterTableOperation::AddColumn { column_keyword, if_not_exists, @@ -444,7 +541,7 @@ impl fmt::Display for AlterTableOperation { if *if_not_exists { write!(f, " IF NOT EXISTS")?; } - write!(f, " {} ({})", name, query) + write!(f, " {name} ({query})") } AlterTableOperation::Algorithm { equals, algorithm } => { write!( @@ -459,7 +556,7 @@ impl fmt::Display for AlterTableOperation { if *if_exists { write!(f, " IF EXISTS")?; } - write!(f, " {}", name) + write!(f, " {name}") } AlterTableOperation::MaterializeProjection { if_exists, @@ -470,9 +567,9 @@ impl fmt::Display for AlterTableOperation { if *if_exists { write!(f, " IF EXISTS")?; } - write!(f, " {}", name)?; + write!(f, " {name}")?; if let Some(partition) = partition { - write!(f, " IN PARTITION {}", partition)?; + write!(f, " IN PARTITION {partition}")?; } Ok(()) } @@ -485,9 +582,9 @@ impl fmt::Display for AlterTableOperation { if *if_exists { write!(f, " IF EXISTS")?; } - write!(f, " {}", name)?; + write!(f, " {name}")?; if let Some(partition) = partition { - write!(f, " IN PARTITION {}", partition)?; + write!(f, " IN PARTITION {partition}")?; } Ok(()) } @@ -530,15 +627,19 @@ impl fmt::Display for AlterTableOperation { ) } AlterTableOperation::DropPrimaryKey => write!(f, "DROP PRIMARY KEY"), + AlterTableOperation::DropForeignKey { name } => write!(f, "DROP FOREIGN KEY {name}"), + AlterTableOperation::DropIndex { name } => write!(f, "DROP INDEX {name}"), AlterTableOperation::DropColumn { - column_name, + has_column_keyword, + column_names: column_name, if_exists, drop_behavior, } => write!( f, - "DROP COLUMN {}{}{}", + "DROP {}{}{}{}", + if *has_column_keyword { "COLUMN " } else { "" }, if *if_exists { "IF EXISTS " } else { "" }, - column_name, + display_comma_separated(column_name), match drop_behavior { None => "", Some(DropBehavior::Restrict) => " RESTRICT", @@ -681,6 +782,15 @@ impl fmt::Display for AlterTableOperation { value ) } + AlterTableOperation::Lock { equals, lock } => { + write!(f, "LOCK {}{}", if *equals { "= " } else { "" }, lock) + } + AlterTableOperation::ReplicaIdentity { identity } => { + write!(f, "REPLICA IDENTITY {identity}") + } + AlterTableOperation::ValidateConstraint { name } => { + write!(f, "VALIDATE CONSTRAINT {name}") + } } } } @@ -802,7 +912,10 @@ pub enum AlterColumnOperation { data_type: DataType, /// PostgreSQL specific using: Option, + /// Set to true if the statement includes the `SET DATA TYPE` keywords + had_set: bool, }, + /// `ADD GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ]` /// /// Note: this is a PostgreSQL-specific operation. @@ -820,15 +933,22 @@ impl fmt::Display for AlterColumnOperation { AlterColumnOperation::SetDefault { value } => { write!(f, "SET DEFAULT {value}") } - AlterColumnOperation::DropDefault {} => { + AlterColumnOperation::DropDefault => { write!(f, "DROP DEFAULT") } - AlterColumnOperation::SetDataType { data_type, using } => { - if let Some(expr) = using { - write!(f, "SET DATA TYPE {data_type} USING {expr}") - } else { - write!(f, "SET DATA TYPE {data_type}") + AlterColumnOperation::SetDataType { + data_type, + using, + had_set, + } => { + if *had_set { + write!(f, "SET DATA ")?; } + write!(f, "TYPE {data_type}")?; + if let Some(expr) = using { + write!(f, " USING {expr}")?; + } + Ok(()) } AlterColumnOperation::AddGenerated { generated_as, @@ -888,7 +1008,7 @@ pub enum TableConstraint { /// [1]: IndexType index_type: Option, /// Identifiers of the columns that are unique. - columns: Vec, + columns: Vec, index_options: Vec, characteristics: Option, /// Optional Postgres nulls handling: `[ NULLS [ NOT ] DISTINCT ]` @@ -924,7 +1044,7 @@ pub enum TableConstraint { /// [1]: IndexType index_type: Option, /// Identifiers of the columns that form the primary key. - columns: Vec, + columns: Vec, index_options: Vec, characteristics: Option, }, @@ -935,6 +1055,9 @@ pub enum TableConstraint { /// }`). ForeignKey { name: Option, + /// MySQL-specific field + /// + index_name: Option, columns: Vec, foreign_table: ObjectName, referred_columns: Vec, @@ -942,10 +1065,13 @@ pub enum TableConstraint { on_update: Option, characteristics: Option, }, - /// `[ CONSTRAINT ] CHECK ()` + /// `[ CONSTRAINT ] CHECK () [[NOT] ENFORCED]` Check { name: Option, expr: Box, + /// MySQL-specific syntax + /// + enforced: Option, }, /// MySQLs [index definition][1] for index creation. Not present on ANSI so, for now, the usage /// is restricted to MySQL, as no other dialects that support this syntax were found. @@ -963,7 +1089,7 @@ pub enum TableConstraint { /// [1]: IndexType index_type: Option, /// Referred column identifier list. - columns: Vec, + columns: Vec, }, /// MySQLs [fulltext][1] definition. Since the [`SPATIAL`][2] definition is exactly the same, /// and MySQL displays both the same way, it is part of this definition as well. @@ -986,7 +1112,7 @@ pub enum TableConstraint { /// Optional index name. opt_index_name: Option, /// Referred column identifier list. - columns: Vec, + columns: Vec, }, } @@ -1045,6 +1171,7 @@ impl fmt::Display for TableConstraint { } TableConstraint::ForeignKey { name, + index_name, columns, foreign_table, referred_columns, @@ -1054,8 +1181,9 @@ impl fmt::Display for TableConstraint { } => { write!( f, - "{}FOREIGN KEY ({}) REFERENCES {}", + "{}FOREIGN KEY{} ({}) REFERENCES {}", display_constraint_name(name), + display_option_spaced(index_name), display_comma_separated(columns), foreign_table, )?; @@ -1069,12 +1197,21 @@ impl fmt::Display for TableConstraint { write!(f, " ON UPDATE {action}")?; } if let Some(characteristics) = characteristics { - write!(f, " {}", characteristics)?; + write!(f, " {characteristics}")?; } Ok(()) } - TableConstraint::Check { name, expr } => { - write!(f, "{}CHECK ({})", display_constraint_name(name), expr) + TableConstraint::Check { + name, + expr, + enforced, + } => { + write!(f, "{}CHECK ({})", display_constraint_name(name), expr)?; + if let Some(b) = enforced { + write!(f, " {}", if *b { "ENFORCED" } else { "NOT ENFORCED" }) + } else { + Ok(()) + } } TableConstraint::Index { display_as_key, @@ -1174,13 +1311,20 @@ impl fmt::Display for KeyOrIndexDisplay { /// [1]: https://dev.mysql.com/doc/refman/8.0/en/create-table.html /// [2]: https://dev.mysql.com/doc/refman/8.0/en/create-index.html /// [3]: https://www.postgresql.org/docs/14/sql-createindex.html -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IndexType { BTree, Hash, - // TODO add Postgresql's possible indexes + GIN, + GiST, + SPGiST, + BRIN, + Bloom, + /// Users may define their own index types, which would + /// not be covered by the above variants. + Custom(Ident), } impl fmt::Display for IndexType { @@ -1188,6 +1332,12 @@ impl fmt::Display for IndexType { match self { Self::BTree => write!(f, "BTREE"), Self::Hash => write!(f, "HASH"), + Self::GIN => write!(f, "GIN"), + Self::GiST => write!(f, "GIST"), + Self::SPGiST => write!(f, "SPGIST"), + Self::BRIN => write!(f, "BRIN"), + Self::Bloom => write!(f, "BLOOM"), + Self::Custom(name) => write!(f, "{name}"), } } } @@ -1215,9 +1365,9 @@ impl fmt::Display for IndexOption { } } -/// [Postgres] unique index nulls handling option: `[ NULLS [ NOT ] DISTINCT ]` +/// [PostgreSQL] unique index nulls handling option: `[ NULLS [ NOT ] DISTINCT ]` /// -/// [Postgres]: https://www.postgresql.org/docs/17/sql-altertable.html +/// [PostgreSQL]: https://www.postgresql.org/docs/17/sql-altertable.html #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -1246,11 +1396,16 @@ impl fmt::Display for NullsDistinctOption { pub struct ProcedureParam { pub name: Ident, pub data_type: DataType, + pub mode: Option, } impl fmt::Display for ProcedureParam { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} {}", self.name, self.data_type) + if let Some(mode) = &self.mode { + write!(f, "{mode} {} {}", self.name, self.data_type) + } else { + write!(f, "{} {}", self.name, self.data_type) + } } } @@ -1300,17 +1455,41 @@ impl fmt::Display for ColumnDef { pub struct ViewColumnDef { pub name: Ident, pub data_type: Option, - pub options: Option>, + pub options: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ColumnOptions { + CommaSeparated(Vec), + SpaceSeparated(Vec), +} + +impl ColumnOptions { + pub fn as_slice(&self) -> &[ColumnOption] { + match self { + ColumnOptions::CommaSeparated(options) => options.as_slice(), + ColumnOptions::SpaceSeparated(options) => options.as_slice(), + } + } } impl fmt::Display for ViewColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; if let Some(data_type) = self.data_type.as_ref() { - write!(f, " {}", data_type)?; + write!(f, " {data_type}")?; } if let Some(options) = self.options.as_ref() { - write!(f, " {}", display_comma_separated(options.as_slice()))?; + match options { + ColumnOptions::CommaSeparated(column_options) => { + write!(f, " {}", display_comma_separated(column_options.as_slice()))?; + } + ColumnOptions::SpaceSeparated(column_options) => { + write!(f, " {}", display_separated(column_options.as_slice(), " "))? + } + } } Ok(()) } @@ -1530,7 +1709,7 @@ pub struct ColumnPolicyProperty { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table pub with: bool, - pub policy_name: Ident, + pub policy_name: ObjectName, pub using_columns: Option>, } @@ -1664,6 +1843,13 @@ pub enum ColumnOption { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table Tags(TagsColumnOption), + /// MySQL specific: Spatial reference identifier + /// Syntax: + /// ```sql + /// CREATE TABLE geom (g GEOMETRY NOT NULL SRID 4326); + /// ``` + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/creating-spatial-indexes.html + Srid(Box), } impl fmt::Display for ColumnOption { @@ -1688,7 +1874,7 @@ impl fmt::Display for ColumnOption { } => { write!(f, "{}", if *is_primary { "PRIMARY KEY" } else { "UNIQUE" })?; if let Some(characteristics) = characteristics { - write!(f, " {}", characteristics)?; + write!(f, " {characteristics}")?; } Ok(()) } @@ -1710,7 +1896,7 @@ impl fmt::Display for ColumnOption { write!(f, " ON UPDATE {action}")?; } if let Some(characteristics) = characteristics { - write!(f, " {}", characteristics)?; + write!(f, " {characteristics}")?; } Ok(()) } @@ -1770,7 +1956,7 @@ impl fmt::Display for ColumnOption { write!(f, "{parameters}") } OnConflict(keyword) => { - write!(f, "ON CONFLICT {:?}", keyword)?; + write!(f, "ON CONFLICT {keyword:?}")?; Ok(()) } Policy(parameters) => { @@ -1779,6 +1965,9 @@ impl fmt::Display for ColumnOption { Tags(tags) => { write!(f, "{tags}") } + Srid(srid) => { + write!(f, "SRID {srid}") + } } } } @@ -2092,10 +2281,63 @@ impl fmt::Display for ClusteredBy { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// ```sql +/// CREATE DOMAIN name [ AS ] data_type +/// [ COLLATE collation ] +/// [ DEFAULT expression ] +/// [ domain_constraint [ ... ] ] +/// +/// where domain_constraint is: +/// +/// [ CONSTRAINT constraint_name ] +/// { NOT NULL | NULL | CHECK (expression) } +/// ``` +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createdomain.html) +pub struct CreateDomain { + /// The name of the domain to be created. + pub name: ObjectName, + /// The data type of the domain. + pub data_type: DataType, + /// The collation of the domain. + pub collation: Option, + /// The default value of the domain. + pub default: Option, + /// The constraints of the domain. + pub constraints: Vec, +} + +impl fmt::Display for CreateDomain { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE DOMAIN {name} AS {data_type}", + name = self.name, + data_type = self.data_type + )?; + if let Some(collation) = &self.collation { + write!(f, " COLLATE {collation}")?; + } + if let Some(default) = &self.default { + write!(f, " DEFAULT {default}")?; + } + if !self.constraints.is_empty() { + write!(f, " {}", display_separated(&self.constraints, " "))?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateFunction { + /// True if this is a `CREATE OR ALTER FUNCTION` statement + /// + /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#or-alter) + pub or_alter: bool, pub or_replace: bool, pub temporary: bool, pub if_not_exists: bool, @@ -2114,15 +2356,15 @@ pub struct CreateFunction { /// /// IMMUTABLE | STABLE | VOLATILE /// - /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) pub behavior: Option, /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT /// - /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) pub called_on_null: Option, /// PARALLEL { UNSAFE | RESTRICTED | SAFE } /// - /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) pub parallel: Option, /// USING ... (Hive only) pub using: Option, @@ -2158,9 +2400,10 @@ impl fmt::Display for CreateFunction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - "CREATE {or_replace}{temp}FUNCTION {if_not_exists}{name}", + "CREATE {or_alter}{or_replace}{temp}FUNCTION {if_not_exists}{name}", name = self.name, temp = if self.temporary { "TEMPORARY " } else { "" }, + or_alter = if self.or_alter { "OR ALTER " } else { "" }, or_replace = if self.or_replace { "OR REPLACE " } else { "" }, if_not_exists = if self.if_not_exists { "IF NOT EXISTS " @@ -2198,6 +2441,12 @@ impl fmt::Display for CreateFunction { if let Some(CreateFunctionBody::Return(function_body)) = &self.function_body { write!(f, " RETURN {function_body}")?; } + if let Some(CreateFunctionBody::AsReturnExpr(function_body)) = &self.function_body { + write!(f, " AS RETURN {function_body}")?; + } + if let Some(CreateFunctionBody::AsReturnSelect(function_body)) = &self.function_body { + write!(f, " AS RETURN {function_body}")?; + } if let Some(using) = &self.using { write!(f, " {using}")?; } @@ -2211,6 +2460,9 @@ impl fmt::Display for CreateFunction { if let Some(CreateFunctionBody::AsAfterOptions(function_body)) = &self.function_body { write!(f, " AS {function_body}")?; } + if let Some(CreateFunctionBody::AsBeginEnd(bes)) = &self.function_body { + write!(f, " AS {bes}")?; + } Ok(()) } } diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 8cfc6741..e179f5d7 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -29,17 +29,38 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +use crate::display_utils::{indented_list, DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; + pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy, - CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, - HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, - OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, - SqliteOnConflict, StorageSerializationPolicy, TableEngine, TableObject, TableWithJoins, Tag, + CommentDef, CreateTableOptions, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, + HiveIOFormat, HiveRowFormat, Ident, IndexType, InsertAliases, MysqlInsertPriority, ObjectName, + OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, + Setting, SqliteOnConflict, StorageSerializationPolicy, TableObject, TableWithJoins, Tag, WrappedCollection, }; +/// Index column type. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct IndexColumn { + pub column: OrderByExpr, + pub operator_class: Option, +} + +impl Display for IndexColumn { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.column)?; + if let Some(operator_class) = &self.operator_class { + write!(f, " {operator_class}")?; + } + Ok(()) + } +} + /// CREATE INDEX statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -49,8 +70,8 @@ pub struct CreateIndex { pub name: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub table_name: ObjectName, - pub using: Option, - pub columns: Vec, + pub using: Option, + pub columns: Vec, pub unique: bool, pub concurrently: bool, pub if_not_exists: bool, @@ -127,19 +148,17 @@ pub struct CreateTable { pub constraints: Vec, pub hive_distribution: HiveDistributionStyle, pub hive_formats: Option, - pub table_properties: Vec, - pub with_options: Vec, + pub table_options: CreateTableOptions, pub file_format: Option, pub location: Option, pub query: Option>, pub without_rowid: bool, pub like: Option, pub clone: Option, - pub engine: Option, + // For Hive dialect, the table comment is after the column definitions without `=`, + // so the `comment` field is optional and different than the comment field in the general options list. + // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) pub comment: Option, - pub auto_increment_offset: Option, - pub default_charset: Option, - pub collation: Option, pub on_commit: Option, /// ClickHouse "ON CLUSTER" clause: /// @@ -156,13 +175,17 @@ pub struct CreateTable { pub partition_by: Option>, /// BigQuery: Table clustering column list. /// - pub cluster_by: Option>>, + /// Snowflake: Table clustering list which contains base column, expressions on base columns. + /// + pub cluster_by: Option>>, /// Hive: Table clustering column list. /// pub clustered_by: Option, - /// BigQuery: Table options list. - /// - pub options: Option>, + /// Postgres `INHERITs` clause, which contains the list of tables from which + /// the new table inherits. + /// + /// + pub inherits: Option>, /// SQLite "STRICT" clause. /// if the "STRICT" table-option keyword is added to the end, after the closing ")", /// then strict typing rules apply to that table. @@ -243,22 +266,27 @@ impl Display for CreateTable { name = self.name, )?; if let Some(on_cluster) = &self.on_cluster { - write!(f, " ON CLUSTER {}", on_cluster)?; + write!(f, " ON CLUSTER {on_cluster}")?; } if !self.columns.is_empty() || !self.constraints.is_empty() { - write!(f, " ({}", display_comma_separated(&self.columns))?; + f.write_str(" (")?; + NewLine.fmt(f)?; + Indent(DisplayCommaSeparated(&self.columns)).fmt(f)?; if !self.columns.is_empty() && !self.constraints.is_empty() { - write!(f, ", ")?; + f.write_str(",")?; + SpaceOrNewline.fmt(f)?; } - write!(f, "{})", display_comma_separated(&self.constraints))?; + Indent(DisplayCommaSeparated(&self.constraints)).fmt(f)?; + NewLine.fmt(f)?; + f.write_str(")")?; } else if self.query.is_none() && self.like.is_none() && self.clone.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens - write!(f, " ()")?; + f.write_str(" ()")?; } // Hive table comment should be after column definitions, please refer to: // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) - if let Some(CommentDef::AfterColumnDefsWithoutEq(comment)) = &self.comment { + if let Some(comment) = &self.comment { write!(f, " COMMENT '{comment}'")?; } @@ -351,40 +379,22 @@ impl Display for CreateTable { } write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; } - if !self.table_properties.is_empty() { - write!( - f, - " TBLPROPERTIES ({})", - display_comma_separated(&self.table_properties) - )?; - } - if !self.with_options.is_empty() { - write!(f, " WITH ({})", display_comma_separated(&self.with_options))?; - } - if let Some(engine) = &self.engine { - write!(f, " ENGINE={engine}")?; - } - if let Some(comment_def) = &self.comment { - match comment_def { - CommentDef::WithEq(comment) => { - write!(f, " COMMENT = '{comment}'")?; - } - CommentDef::WithoutEq(comment) => { - write!(f, " COMMENT '{comment}'")?; - } - // For CommentDef::AfterColumnDefsWithoutEq will be displayed after column definition - CommentDef::AfterColumnDefsWithoutEq(_) => (), - } + + match &self.table_options { + options @ CreateTableOptions::With(_) + | options @ CreateTableOptions::Plain(_) + | options @ CreateTableOptions::TableProperties(_) => write!(f, " {options}")?, + _ => (), } - if let Some(auto_increment_offset) = self.auto_increment_offset { - write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; - } if let Some(primary_key) = &self.primary_key { - write!(f, " PRIMARY KEY {}", primary_key)?; + write!(f, " PRIMARY KEY {primary_key}")?; } if let Some(order_by) = &self.order_by { - write!(f, " ORDER BY {}", order_by)?; + write!(f, " ORDER BY {order_by}")?; + } + if let Some(inherits) = &self.inherits { + write!(f, " INHERITS ({})", display_comma_separated(inherits))?; } if let Some(partition_by) = self.partition_by.as_ref() { write!(f, " PARTITION BY {partition_by}")?; @@ -392,15 +402,9 @@ impl Display for CreateTable { if let Some(cluster_by) = self.cluster_by.as_ref() { write!(f, " CLUSTER BY {cluster_by}")?; } - - if let Some(options) = self.options.as_ref() { - write!( - f, - " OPTIONS({})", - display_comma_separated(options.as_slice()) - )?; + if let options @ CreateTableOptions::Options(_) = &self.table_options { + write!(f, " {options}")?; } - if let Some(external_volume) = self.external_volume.as_ref() { write!(f, " EXTERNAL_VOLUME = '{external_volume}'")?; } @@ -476,13 +480,6 @@ impl Display for CreateTable { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } - if let Some(default_charset) = &self.default_charset { - write!(f, " DEFAULT CHARSET={default_charset}")?; - } - if let Some(collation) = &self.collation { - write!(f, " COLLATE={collation}")?; - } - if self.on_commit.is_some() { let on_commit = match self.on_commit { Some(OnCommit::DeleteRows) => "ON COMMIT DELETE ROWS", @@ -591,28 +588,32 @@ impl Display for Insert { )?; } if !self.columns.is_empty() { - write!(f, "({}) ", display_comma_separated(&self.columns))?; + write!(f, "({})", display_comma_separated(&self.columns))?; + SpaceOrNewline.fmt(f)?; } if let Some(ref parts) = self.partitioned { if !parts.is_empty() { - write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; + write!(f, "PARTITION ({})", display_comma_separated(parts))?; + SpaceOrNewline.fmt(f)?; } } if !self.after_columns.is_empty() { - write!(f, "({}) ", display_comma_separated(&self.after_columns))?; + write!(f, "({})", display_comma_separated(&self.after_columns))?; + SpaceOrNewline.fmt(f)?; } if let Some(settings) = &self.settings { - write!(f, "SETTINGS {} ", display_comma_separated(settings))?; + write!(f, "SETTINGS {}", display_comma_separated(settings))?; + SpaceOrNewline.fmt(f)?; } if let Some(source) = &self.source { - write!(f, "{source}")?; + source.fmt(f)?; } else if !self.assignments.is_empty() { - write!(f, "SET ")?; - write!(f, "{}", display_comma_separated(&self.assignments))?; + write!(f, "SET")?; + indented_list(f, &self.assignments)?; } else if let Some(format_clause) = &self.format_clause { - write!(f, "{format_clause}")?; + format_clause.fmt(f)?; } else if self.columns.is_empty() { write!(f, "DEFAULT VALUES")?; } @@ -632,7 +633,9 @@ impl Display for Insert { } if let Some(returning) = &self.returning { - write!(f, " RETURNING {}", display_comma_separated(returning))?; + SpaceOrNewline.fmt(f)?; + f.write_str("RETURNING")?; + indented_list(f, returning)?; } Ok(()) } @@ -661,32 +664,45 @@ pub struct Delete { impl Display for Delete { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "DELETE ")?; + f.write_str("DELETE")?; if !self.tables.is_empty() { - write!(f, "{} ", display_comma_separated(&self.tables))?; + indented_list(f, &self.tables)?; } match &self.from { FromTable::WithFromKeyword(from) => { - write!(f, "FROM {}", display_comma_separated(from))?; + f.write_str(" FROM")?; + indented_list(f, from)?; } FromTable::WithoutKeyword(from) => { - write!(f, "{}", display_comma_separated(from))?; + indented_list(f, from)?; } } if let Some(using) = &self.using { - write!(f, " USING {}", display_comma_separated(using))?; + SpaceOrNewline.fmt(f)?; + f.write_str("USING")?; + indented_list(f, using)?; } if let Some(selection) = &self.selection { - write!(f, " WHERE {selection}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("WHERE")?; + SpaceOrNewline.fmt(f)?; + Indent(selection).fmt(f)?; } if let Some(returning) = &self.returning { - write!(f, " RETURNING {}", display_comma_separated(returning))?; + SpaceOrNewline.fmt(f)?; + f.write_str("RETURNING")?; + indented_list(f, returning)?; } if !self.order_by.is_empty() { - write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?; + SpaceOrNewline.fmt(f)?; + f.write_str("ORDER BY")?; + indented_list(f, &self.order_by)?; } if let Some(limit) = &self.limit { - write!(f, " LIMIT {limit}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("LIMIT")?; + SpaceOrNewline.fmt(f)?; + Indent(limit).fmt(f)?; } Ok(()) } diff --git a/src/ast/helpers/key_value_options.rs b/src/ast/helpers/key_value_options.rs index 06f028dd..796bfd5e 100644 --- a/src/ast/helpers/key_value_options.rs +++ b/src/ast/helpers/key_value_options.rs @@ -67,7 +67,7 @@ impl fmt::Display for KeyValueOptions { } else { f.write_str(" ")?; } - write!(f, "{}", option)?; + write!(f, "{option}")?; } } Ok(()) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 344e9dec..d66a869b 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -26,10 +26,12 @@ use sqlparser_derive::{Visit, VisitMut}; use super::super::dml::CreateTable; use crate::ast::{ - ClusteredBy, ColumnDef, CommentDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, - ObjectName, OnCommit, OneOrManyWithParens, Query, RowAccessPolicy, SqlOption, Statement, - StorageSerializationPolicy, TableConstraint, TableEngine, Tag, WrappedCollection, + ClusteredBy, ColumnDef, CommentDef, CreateTableOptions, Expr, FileFormat, + HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, OneOrManyWithParens, Query, + RowAccessPolicy, Statement, StorageSerializationPolicy, TableConstraint, Tag, + WrappedCollection, }; + use crate::parser::ParserError; /// Builder for create table statement variant ([1]). @@ -76,27 +78,21 @@ pub struct CreateTableBuilder { pub constraints: Vec, pub hive_distribution: HiveDistributionStyle, pub hive_formats: Option, - pub table_properties: Vec, - pub with_options: Vec, pub file_format: Option, pub location: Option, pub query: Option>, pub without_rowid: bool, pub like: Option, pub clone: Option, - pub engine: Option, pub comment: Option, - pub auto_increment_offset: Option, - pub default_charset: Option, - pub collation: Option, pub on_commit: Option, pub on_cluster: Option, pub primary_key: Option>, pub order_by: Option>, pub partition_by: Option>, - pub cluster_by: Option>>, + pub cluster_by: Option>>, pub clustered_by: Option, - pub options: Option>, + pub inherits: Option>, pub strict: bool, pub copy_grants: bool, pub enable_schema_evolution: Option, @@ -112,6 +108,7 @@ pub struct CreateTableBuilder { pub catalog: Option, pub catalog_sync: Option, pub storage_serialization_policy: Option, + pub table_options: CreateTableOptions, } impl CreateTableBuilder { @@ -130,19 +127,13 @@ impl CreateTableBuilder { constraints: vec![], hive_distribution: HiveDistributionStyle::NONE, hive_formats: None, - table_properties: vec![], - with_options: vec![], file_format: None, location: None, query: None, without_rowid: false, like: None, clone: None, - engine: None, comment: None, - auto_increment_offset: None, - default_charset: None, - collation: None, on_commit: None, on_cluster: None, primary_key: None, @@ -150,7 +141,7 @@ impl CreateTableBuilder { partition_by: None, cluster_by: None, clustered_by: None, - options: None, + inherits: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -166,6 +157,7 @@ impl CreateTableBuilder { catalog: None, catalog_sync: None, storage_serialization_policy: None, + table_options: CreateTableOptions::None, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -228,15 +220,6 @@ impl CreateTableBuilder { self } - pub fn table_properties(mut self, table_properties: Vec) -> Self { - self.table_properties = table_properties; - self - } - - pub fn with_options(mut self, with_options: Vec) -> Self { - self.with_options = with_options; - self - } pub fn file_format(mut self, file_format: Option) -> Self { self.file_format = file_format; self @@ -266,31 +249,11 @@ impl CreateTableBuilder { self } - pub fn engine(mut self, engine: Option) -> Self { - self.engine = engine; - self - } - - pub fn comment(mut self, comment: Option) -> Self { + pub fn comment_after_column_def(mut self, comment: Option) -> Self { self.comment = comment; self } - pub fn auto_increment_offset(mut self, offset: Option) -> Self { - self.auto_increment_offset = offset; - self - } - - pub fn default_charset(mut self, default_charset: Option) -> Self { - self.default_charset = default_charset; - self - } - - pub fn collation(mut self, collation: Option) -> Self { - self.collation = collation; - self - } - pub fn on_commit(mut self, on_commit: Option) -> Self { self.on_commit = on_commit; self @@ -316,7 +279,7 @@ impl CreateTableBuilder { self } - pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { + pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { self.cluster_by = cluster_by; self } @@ -326,8 +289,8 @@ impl CreateTableBuilder { self } - pub fn options(mut self, options: Option>) -> Self { - self.options = options; + pub fn inherits(mut self, inherits: Option>) -> Self { + self.inherits = inherits; self } @@ -415,6 +378,11 @@ impl CreateTableBuilder { self } + pub fn table_options(mut self, table_options: CreateTableOptions) -> Self { + self.table_options = table_options; + self + } + pub fn build(self) -> Statement { Statement::CreateTable(CreateTable { or_replace: self.or_replace, @@ -430,19 +398,13 @@ impl CreateTableBuilder { constraints: self.constraints, hive_distribution: self.hive_distribution, hive_formats: self.hive_formats, - table_properties: self.table_properties, - with_options: self.with_options, file_format: self.file_format, location: self.location, query: self.query, without_rowid: self.without_rowid, like: self.like, clone: self.clone, - engine: self.engine, comment: self.comment, - auto_increment_offset: self.auto_increment_offset, - default_charset: self.default_charset, - collation: self.collation, on_commit: self.on_commit, on_cluster: self.on_cluster, primary_key: self.primary_key, @@ -450,7 +412,7 @@ impl CreateTableBuilder { partition_by: self.partition_by, cluster_by: self.cluster_by, clustered_by: self.clustered_by, - options: self.options, + inherits: self.inherits, strict: self.strict, copy_grants: self.copy_grants, enable_schema_evolution: self.enable_schema_evolution, @@ -466,6 +428,7 @@ impl CreateTableBuilder { catalog: self.catalog, catalog_sync: self.catalog_sync, storage_serialization_policy: self.storage_serialization_policy, + table_options: self.table_options, }) } } @@ -491,19 +454,13 @@ impl TryFrom for CreateTableBuilder { constraints, hive_distribution, hive_formats, - table_properties, - with_options, file_format, location, query, without_rowid, like, clone, - engine, comment, - auto_increment_offset, - default_charset, - collation, on_commit, on_cluster, primary_key, @@ -511,7 +468,7 @@ impl TryFrom for CreateTableBuilder { partition_by, cluster_by, clustered_by, - options, + inherits, strict, copy_grants, enable_schema_evolution, @@ -527,6 +484,7 @@ impl TryFrom for CreateTableBuilder { catalog, catalog_sync, storage_serialization_policy, + table_options, }) => Ok(Self { or_replace, temporary, @@ -539,19 +497,13 @@ impl TryFrom for CreateTableBuilder { constraints, hive_distribution, hive_formats, - table_properties, - with_options, file_format, location, query, without_rowid, like, clone, - engine, comment, - auto_increment_offset, - default_charset, - collation, on_commit, on_cluster, primary_key, @@ -559,7 +511,7 @@ impl TryFrom for CreateTableBuilder { partition_by, cluster_by, clustered_by, - options, + inherits, strict, iceberg, copy_grants, @@ -577,6 +529,7 @@ impl TryFrom for CreateTableBuilder { catalog, catalog_sync, storage_serialization_policy, + table_options, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" @@ -589,8 +542,9 @@ impl TryFrom for CreateTableBuilder { #[derive(Default)] pub(crate) struct CreateTableConfiguration { pub partition_by: Option>, - pub cluster_by: Option>>, - pub options: Option>, + pub cluster_by: Option>>, + pub inherits: Option>, + pub table_options: CreateTableOptions, } #[cfg(test)] diff --git a/src/ast/helpers/stmt_data_loading.rs b/src/ast/helpers/stmt_data_loading.rs index cc4fa12f..92a72727 100644 --- a/src/ast/helpers/stmt_data_loading.rs +++ b/src/ast/helpers/stmt_data_loading.rs @@ -21,15 +21,13 @@ #[cfg(not(feature = "std"))] use alloc::string::String; -#[cfg(not(feature = "std"))] -use alloc::vec::Vec; use core::fmt; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::ast::helpers::key_value_options::KeyValueOptions; -use crate::ast::{Ident, ObjectName}; +use crate::ast::{Ident, ObjectName, SelectItem}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; @@ -44,6 +42,25 @@ pub struct StageParamsObject { pub credentials: KeyValueOptions, } +/// This enum enables support for both standard SQL select item expressions +/// and Snowflake-specific ones for data loading. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum StageLoadSelectItemKind { + SelectItem(SelectItem), + StageLoadSelectItem(StageLoadSelectItem), +} + +impl fmt::Display for StageLoadSelectItemKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self { + StageLoadSelectItemKind::SelectItem(item) => write!(f, "{item}"), + StageLoadSelectItemKind::StageLoadSelectItem(item) => write!(f, "{item}"), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 72be3ff6..75e88f8a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -23,8 +23,12 @@ use alloc::{ string::{String, ToString}, vec::Vec, }; -use helpers::{attached_token::AttachedToken, stmt_data_loading::FileStagingCommand}; +use helpers::{ + attached_token::AttachedToken, + stmt_data_loading::{FileStagingCommand, StageLoadSelectItemKind}, +}; +use core::cmp::Ordering; use core::ops::Deref; use core::{ fmt::{self, Display}, @@ -37,7 +41,14 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::tokenizer::Span; +use crate::{ + display_utils::{indented_list, SpaceOrNewline}, + tokenizer::{Span, Token}, +}; +use crate::{ + display_utils::{Indent, NewLine}, + keywords::Keyword, +}; pub use self::data_type::{ ArrayElemTypeDef, BinaryLength, CharLengthUnits, CharacterLength, DataType, EnumMember, @@ -48,35 +59,38 @@ pub use self::dcl::{ }; pub use self::ddl::{ AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterPolicyOperation, - AlterTableAlgorithm, AlterTableOperation, AlterType, AlterTypeAddValue, + AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, - ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, - ConstraintCharacteristics, CreateConnector, CreateFunction, Deduplicate, DeferrableInitial, - DropBehavior, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, - IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexOption, - IndexType, KeyOrIndexDisplay, NullsDistinctOption, Owner, Partition, ProcedureParam, - ReferentialAction, TableConstraint, TagsColumnOption, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeRepresentation, ViewColumnDef, + ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, + ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, CreateFunction, + Deduplicate, DeferrableInitial, DropBehavior, GeneratedAs, GeneratedExpressionMode, + IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, + IdentityPropertyOrder, IndexOption, IndexType, KeyOrIndexDisplay, NullsDistinctOption, Owner, + Partition, ProcedureParam, ReferentialAction, ReplicaIdentity, TableConstraint, + TagsColumnOption, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ViewColumnDef, }; -pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; +pub use self::dml::{CreateIndex, CreateTable, Delete, IndexColumn, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, - ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, - InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, - LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, - NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn, - OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PivotValueSource, ProjectionSelect, Query, - RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, - Select, SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, - SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, - TableFactor, TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, + ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause, + ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, + IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, + JsonTableNestedColumn, LateralView, LimitClause, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, + PipeOperator, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, + SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, + TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, - ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, + XmlPassingArgument, XmlPassingClause, XmlTableColumn, XmlTableColumnOption, }; pub use self::trigger::{ @@ -90,7 +104,8 @@ pub use self::value::{ }; use crate::ast::helpers::key_value_options::KeyValueOptions; -use crate::ast::helpers::stmt_data_loading::{StageLoadSelectItem, StageParamsObject}; +use crate::ast::helpers::stmt_data_loading::StageParamsObject; + #[cfg(feature = "visitor")] pub use visitor::*; @@ -127,9 +142,9 @@ where fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut delim = ""; for t in self.slice { - write!(f, "{delim}")?; + f.write_str(delim)?; delim = self.sep; - write!(f, "{t}")?; + t.fmt(f)?; } Ok(()) } @@ -149,8 +164,17 @@ where DisplaySeparated { slice, sep: ", " } } +/// Writes the given statements to the formatter, each ending with +/// a semicolon and space separated. +fn format_statement_list(f: &mut fmt::Formatter, statements: &[Statement]) -> fmt::Result { + write!(f, "{}", display_separated(statements, "; "))?; + // We manually insert semicolon for the last statement, + // since display_separated doesn't handle that case. + write!(f, ";") +} + /// An identifier, decomposed into its value or character data and the quote style. -#[derive(Debug, Clone, PartialOrd, Ord)] +#[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Ident { @@ -192,6 +216,35 @@ impl core::hash::Hash for Ident { impl Eq for Ident {} +impl PartialOrd for Ident { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Ident { + fn cmp(&self, other: &Self) -> Ordering { + let Ident { + value, + quote_style, + // exhaustiveness check; we ignore spans in ordering + span: _, + } = self; + + let Ident { + value: other_value, + quote_style: other_quote_style, + // exhaustiveness check; we ignore spans in ordering + span: _, + } = other; + + // First compare by value, then by quote_style + value + .cmp(other_value) + .then_with(|| quote_style.cmp(other_quote_style)) + } +} + impl Ident { /// Create a new identifier with the given value and no quotes and an empty span. pub fn new(value: S) -> Self @@ -291,12 +344,14 @@ impl fmt::Display for ObjectName { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ObjectNamePart { Identifier(Ident), + Function(ObjectNamePartFunction), } impl ObjectNamePart { pub fn as_ident(&self) -> Option<&Ident> { match self { ObjectNamePart::Identifier(ident) => Some(ident), + ObjectNamePart::Function(_) => None, } } } @@ -304,11 +359,31 @@ impl ObjectNamePart { impl fmt::Display for ObjectNamePart { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - ObjectNamePart::Identifier(ident) => write!(f, "{}", ident), + ObjectNamePart::Identifier(ident) => write!(f, "{ident}"), + ObjectNamePart::Function(func) => write!(f, "{func}"), } } } +/// An object name part that consists of a function that dynamically +/// constructs identifiers. +/// +/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ObjectNamePartFunction { + pub name: Ident, + pub args: Vec, +} + +impl fmt::Display for ObjectNamePartFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}(", self.name)?; + write!(f, "{})", display_comma_separated(&self.args)) + } +} + /// Represents an Array Expression, either /// `ARRAY[..]`, or `[..]` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -399,28 +474,36 @@ impl fmt::Display for Interval { /// A field definition within a struct /// -/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type +/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct StructField { pub field_name: Option, pub field_type: DataType, + /// Struct field options. + /// See [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#column_name_and_column_schema) + pub options: Option>, } impl fmt::Display for StructField { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(name) = &self.field_name { - write!(f, "{name} {}", self.field_type) + write!(f, "{name} {}", self.field_type)?; } else { - write!(f, "{}", self.field_type) + write!(f, "{}", self.field_type)?; + } + if let Some(options) = &self.options { + write!(f, " OPTIONS({})", display_separated(options, ", ")) + } else { + Ok(()) } } } /// A field definition within a union /// -/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html +/// [DuckDB]: https://duckdb.org/docs/sql/data_types/union.html #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -437,7 +520,7 @@ impl fmt::Display for UnionField { /// A dictionary field within a dictionary. /// -/// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs +/// [DuckDB]: https://duckdb.org/docs/sql/data_types/struct#creating-structs #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -468,7 +551,7 @@ impl Display for Map { /// A map field within a map. /// -/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +/// [DuckDB]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -612,7 +695,12 @@ pub struct CaseWhen { impl fmt::Display for CaseWhen { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "WHEN {} THEN {}", self.condition, self.result) + f.write_str("WHEN ")?; + self.condition.fmt(f)?; + f.write_str(" THEN")?; + SpaceOrNewline.fmt(f)?; + Indent(&self.result).fmt(f)?; + Ok(()) } } @@ -651,17 +739,17 @@ pub enum Expr { /// such as maps, arrays, and lists: /// - Array /// - A 1-dim array `a[1]` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` /// - A 2-dim array `a[1][2]` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` /// - Map or Struct (Bracket-style) /// - A map `a['field1']` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` /// - A 2-dim map `a['field1']['field2']` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) /// - A struct access `a[field1].field2` will be represented like: - /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) CompoundFieldAccess { root: Box, @@ -743,7 +831,7 @@ pub enum Expr { any: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// `ILIKE` (case-insensitive `LIKE`) ILike { @@ -753,14 +841,14 @@ pub enum Expr { any: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// SIMILAR TO regex SimilarTo { negated: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// MySQL: RLIKE regex or REGEXP regex RLike { @@ -879,6 +967,10 @@ pub enum Expr { /// true if the expression is represented using the `SUBSTRING(expr, start, len)` syntax /// This flag is used for formatting. special: bool, + + /// true if the expression is represented using the `SUBSTR` shorthand + /// This flag is used for formatting. + shorthand: bool, }, /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) @@ -910,12 +1002,14 @@ pub enum Expr { Nested(Box), /// A literal value, such as string, number, date or NULL Value(ValueWithSpan), + /// Prefixed expression, e.g. introducer strings, projection prefix /// - IntroducedString { - introducer: String, + /// + Prefixed { + prefix: Ident, /// The value of the constant. /// Hint: you can unwrap the string value using `value.into_string()`. - value: Value, + value: Box, }, /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), @@ -924,7 +1018,7 @@ pub enum Expr { data_type: DataType, /// The value of the constant. /// Hint: you can unwrap the string value using `value.into_string()`. - value: Value, + value: ValueWithSpan, }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), @@ -934,6 +1028,8 @@ pub enum Expr { /// not `< 0` nor `1, 2, 3` as allowed in a `` per /// Case { + case_token: AttachedToken, + end_token: AttachedToken, operand: Option>, conditions: Vec, else_result: Option>, @@ -1012,7 +1108,7 @@ pub enum Expr { /// [(1)]: https://dev.mysql.com/doc/refman/8.0/en/fulltext-search.html#function_match MatchAgainst { /// `(, , ...)`. - columns: Vec, + columns: Vec, /// ``. match_value: Value, /// `` @@ -1050,6 +1146,8 @@ pub enum Expr { /// [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-lambda-functions.html) /// [DuckDb](https://duckdb.org/docs/sql/functions/lambda.html) Lambda(LambdaFunction), + /// Checks membership of a value in a JSON array + MemberOf(MemberOf), } impl Expr { @@ -1136,8 +1234,8 @@ pub enum AccessExpr { impl fmt::Display for AccessExpr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - AccessExpr::Dot(expr) => write!(f, ".{}", expr), - AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript), + AccessExpr::Dot(expr) => write!(f, ".{expr}"), + AccessExpr::Subscript(subscript) => write!(f, "[{subscript}]"), } } } @@ -1339,12 +1437,12 @@ impl fmt::Display for Expr { match self { Expr::Identifier(s) => write!(f, "{s}"), Expr::Wildcard(_) => f.write_str("*"), - Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), + Expr::QualifiedWildcard(prefix, _) => write!(f, "{prefix}.*"), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), Expr::CompoundFieldAccess { root, access_chain } => { - write!(f, "{}", root)?; + write!(f, "{root}")?; for field in access_chain { - write!(f, "{}", field)?; + write!(f, "{field}")?; } Ok(()) } @@ -1412,7 +1510,7 @@ impl fmt::Display for Expr { } => match escape_char { Some(ch) => write!( f, - "{} {}LIKE {}{} ESCAPE '{}'", + "{} {}LIKE {}{} ESCAPE {}", expr, if *negated { "NOT " } else { "" }, if *any { "ANY " } else { "" }, @@ -1437,7 +1535,7 @@ impl fmt::Display for Expr { } => match escape_char { Some(ch) => write!( f, - "{} {}ILIKE {}{} ESCAPE '{}'", + "{} {}ILIKE {}{} ESCAPE {}", expr, if *negated { "NOT " } else { "" }, if *any { "ANY" } else { "" }, @@ -1473,7 +1571,7 @@ impl fmt::Display for Expr { } => { let not_ = if *negated { "NOT " } else { "" }; if form.is_none() { - write!(f, "{} IS {}NORMALIZED", expr, not_) + write!(f, "{expr} IS {not_}NORMALIZED") } else { write!( f, @@ -1492,7 +1590,7 @@ impl fmt::Display for Expr { } => match escape_char { Some(ch) => write!( f, - "{} {}SIMILAR TO {} ESCAPE '{}'", + "{} {}SIMILAR TO {} ESCAPE {}", expr, if *negated { "NOT " } else { "" }, pattern, @@ -1635,28 +1733,36 @@ impl fmt::Display for Expr { Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"), Expr::Nested(ast) => write!(f, "({ast})"), Expr::Value(v) => write!(f, "{v}"), - Expr::IntroducedString { introducer, value } => write!(f, "{introducer} {value}"), + Expr::Prefixed { prefix, value } => write!(f, "{prefix} {value}"), Expr::TypedString { data_type, value } => { write!(f, "{data_type}")?; write!(f, " {value}") } - Expr::Function(fun) => write!(f, "{fun}"), + Expr::Function(fun) => fun.fmt(f), Expr::Case { + case_token: _, + end_token: _, operand, conditions, else_result, } => { - write!(f, "CASE")?; + f.write_str("CASE")?; if let Some(operand) = operand { - write!(f, " {operand}")?; + f.write_str(" ")?; + operand.fmt(f)?; } for when in conditions { - write!(f, " {when}")?; + SpaceOrNewline.fmt(f)?; + Indent(when).fmt(f)?; } if let Some(else_result) = else_result { - write!(f, " ELSE {else_result}")?; + SpaceOrNewline.fmt(f)?; + Indent("ELSE").fmt(f)?; + SpaceOrNewline.fmt(f)?; + Indent(Indent(else_result)).fmt(f)?; } - write!(f, " END") + SpaceOrNewline.fmt(f)?; + f.write_str("END") } Expr::Exists { subquery, negated } => write!( f, @@ -1708,8 +1814,13 @@ impl fmt::Display for Expr { substring_from, substring_for, special, + shorthand, } => { - write!(f, "SUBSTRING({expr}")?; + f.write_str("SUBSTR")?; + if !*shorthand { + f.write_str("ING")?; + } + write!(f, "({expr}")?; if let Some(from_part) = substring_from { if *special { write!(f, ", {from_part}")?; @@ -1782,7 +1893,7 @@ impl fmt::Display for Expr { } } Expr::Named { expr, name } => { - write!(f, "{} AS {}", expr, name) + write!(f, "{expr} AS {name}") } Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) @@ -1825,6 +1936,7 @@ impl fmt::Display for Expr { } Expr::Prior(expr) => write!(f, "PRIOR {expr}"), Expr::Lambda(lambda) => write!(f, "{lambda}"), + Expr::MemberOf(member_of) => write!(f, "{member_of}"), } } } @@ -1840,8 +1952,14 @@ pub enum WindowType { impl Display for WindowType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - WindowType::WindowSpec(spec) => write!(f, "({})", spec), - WindowType::NamedWindow(name) => write!(f, "{}", name), + WindowType::WindowSpec(spec) => { + f.write_str("(")?; + NewLine.fmt(f)?; + Indent(spec).fmt(f)?; + NewLine.fmt(f)?; + f.write_str(")") + } + WindowType::NamedWindow(name) => name.fmt(f), } } } @@ -1869,14 +1987,19 @@ pub struct WindowSpec { impl fmt::Display for WindowSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut delim = ""; + let mut is_first = true; if let Some(window_name) = &self.window_name { - delim = " "; + if !is_first { + SpaceOrNewline.fmt(f)?; + } + is_first = false; write!(f, "{window_name}")?; } if !self.partition_by.is_empty() { - f.write_str(delim)?; - delim = " "; + if !is_first { + SpaceOrNewline.fmt(f)?; + } + is_first = false; write!( f, "PARTITION BY {}", @@ -1884,12 +2007,16 @@ impl fmt::Display for WindowSpec { )?; } if !self.order_by.is_empty() { - f.write_str(delim)?; - delim = " "; + if !is_first { + SpaceOrNewline.fmt(f)?; + } + is_first = false; write!(f, "ORDER BY {}", display_comma_separated(&self.order_by))?; } if let Some(window_frame) = &self.window_frame { - f.write_str(delim)?; + if !is_first { + SpaceOrNewline.fmt(f)?; + } if let Some(end_bound) = &window_frame.end_bound { write!( f, @@ -2078,6 +2205,345 @@ pub enum Password { NullPassword, } +/// A `CASE` statement. +/// +/// Examples: +/// ```sql +/// CASE +/// WHEN EXISTS(SELECT 1) +/// THEN SELECT 1 FROM T; +/// WHEN EXISTS(SELECT 2) +/// THEN SELECT 1 FROM U; +/// ELSE +/// SELECT 1 FROM V; +/// END CASE; +/// ``` +/// +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#case_search_expression) +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/snowflake-scripting/case) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CaseStatement { + /// The `CASE` token that starts the statement. + pub case_token: AttachedToken, + pub match_expr: Option, + pub when_blocks: Vec, + pub else_block: Option, + /// The last token of the statement (`END` or `CASE`). + pub end_case_token: AttachedToken, +} + +impl fmt::Display for CaseStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let CaseStatement { + case_token: _, + match_expr, + when_blocks, + else_block, + end_case_token: AttachedToken(end), + } = self; + + write!(f, "CASE")?; + + if let Some(expr) = match_expr { + write!(f, " {expr}")?; + } + + if !when_blocks.is_empty() { + write!(f, " {}", display_separated(when_blocks, " "))?; + } + + if let Some(else_block) = else_block { + write!(f, " {else_block}")?; + } + + write!(f, " END")?; + + if let Token::Word(w) = &end.token { + if w.keyword == Keyword::CASE { + write!(f, " CASE")?; + } + } + + Ok(()) + } +} + +/// An `IF` statement. +/// +/// Example (BigQuery or Snowflake): +/// ```sql +/// IF TRUE THEN +/// SELECT 1; +/// SELECT 2; +/// ELSEIF TRUE THEN +/// SELECT 3; +/// ELSE +/// SELECT 4; +/// END IF +/// ``` +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#if) +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/snowflake-scripting/if) +/// +/// Example (MSSQL): +/// ```sql +/// IF 1=1 SELECT 1 ELSE SELECT 2 +/// ``` +/// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/if-else-transact-sql?view=sql-server-ver16) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct IfStatement { + pub if_block: ConditionalStatementBlock, + pub elseif_blocks: Vec, + pub else_block: Option, + pub end_token: Option, +} + +impl fmt::Display for IfStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let IfStatement { + if_block, + elseif_blocks, + else_block, + end_token, + } = self; + + write!(f, "{if_block}")?; + + for elseif_block in elseif_blocks { + write!(f, " {elseif_block}")?; + } + + if let Some(else_block) = else_block { + write!(f, " {else_block}")?; + } + + if let Some(AttachedToken(end_token)) = end_token { + write!(f, " END {end_token}")?; + } + + Ok(()) + } +} + +/// A `WHILE` statement. +/// +/// Example: +/// ```sql +/// WHILE @@FETCH_STATUS = 0 +/// BEGIN +/// FETCH NEXT FROM c1 INTO @var1, @var2; +/// END +/// ``` +/// +/// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/while-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WhileStatement { + pub while_block: ConditionalStatementBlock, +} + +impl fmt::Display for WhileStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let WhileStatement { while_block } = self; + write!(f, "{while_block}")?; + Ok(()) + } +} + +/// A block within a [Statement::Case] or [Statement::If] or [Statement::While]-like statement +/// +/// Example 1: +/// ```sql +/// WHEN EXISTS(SELECT 1) THEN SELECT 1; +/// ``` +/// +/// Example 2: +/// ```sql +/// IF TRUE THEN SELECT 1; SELECT 2; +/// ``` +/// +/// Example 3: +/// ```sql +/// ELSE SELECT 1; SELECT 2; +/// ``` +/// +/// Example 4: +/// ```sql +/// WHILE @@FETCH_STATUS = 0 +/// BEGIN +/// FETCH NEXT FROM c1 INTO @var1, @var2; +/// END +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ConditionalStatementBlock { + pub start_token: AttachedToken, + pub condition: Option, + pub then_token: Option, + pub conditional_statements: ConditionalStatements, +} + +impl ConditionalStatementBlock { + pub fn statements(&self) -> &Vec { + self.conditional_statements.statements() + } +} + +impl fmt::Display for ConditionalStatementBlock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ConditionalStatementBlock { + start_token: AttachedToken(start_token), + condition, + then_token, + conditional_statements, + } = self; + + write!(f, "{start_token}")?; + + if let Some(condition) = condition { + write!(f, " {condition}")?; + } + + if then_token.is_some() { + write!(f, " THEN")?; + } + + if !conditional_statements.statements().is_empty() { + write!(f, " {conditional_statements}")?; + } + + Ok(()) + } +} + +/// A list of statements in a [ConditionalStatementBlock]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ConditionalStatements { + /// SELECT 1; SELECT 2; SELECT 3; ... + Sequence { statements: Vec }, + /// BEGIN SELECT 1; SELECT 2; SELECT 3; ... END + BeginEnd(BeginEndStatements), +} + +impl ConditionalStatements { + pub fn statements(&self) -> &Vec { + match self { + ConditionalStatements::Sequence { statements } => statements, + ConditionalStatements::BeginEnd(bes) => &bes.statements, + } + } +} + +impl fmt::Display for ConditionalStatements { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ConditionalStatements::Sequence { statements } => { + if !statements.is_empty() { + format_statement_list(f, statements)?; + } + Ok(()) + } + ConditionalStatements::BeginEnd(bes) => write!(f, "{bes}"), + } + } +} + +/// Represents a list of statements enclosed within `BEGIN` and `END` keywords. +/// Example: +/// ```sql +/// BEGIN +/// SELECT 1; +/// SELECT 2; +/// END +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct BeginEndStatements { + pub begin_token: AttachedToken, + pub statements: Vec, + pub end_token: AttachedToken, +} + +impl fmt::Display for BeginEndStatements { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let BeginEndStatements { + begin_token: AttachedToken(begin_token), + statements, + end_token: AttachedToken(end_token), + } = self; + + if begin_token.token != Token::EOF { + write!(f, "{begin_token} ")?; + } + if !statements.is_empty() { + format_statement_list(f, statements)?; + } + if end_token.token != Token::EOF { + write!(f, " {end_token}")?; + } + Ok(()) + } +} + +/// A `RAISE` statement. +/// +/// Examples: +/// ```sql +/// RAISE USING MESSAGE = 'error'; +/// +/// RAISE myerror; +/// ``` +/// +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#raise) +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/snowflake-scripting/raise) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct RaiseStatement { + pub value: Option, +} + +impl fmt::Display for RaiseStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let RaiseStatement { value } = self; + + write!(f, "RAISE")?; + if let Some(value) = value { + write!(f, " {value}")?; + } + + Ok(()) + } +} + +/// Represents the error value of a [RaiseStatement]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum RaiseStatementValue { + /// `RAISE USING MESSAGE = 'error'` + UsingMessage(Expr), + /// `RAISE myerror` + Expr(Expr), +} + +impl fmt::Display for RaiseStatementValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RaiseStatementValue::Expr(expr) => write!(f, "{expr}"), + RaiseStatementValue::UsingMessage(expr) => write!(f, "USING MESSAGE = {expr}"), + } + } +} + /// Represents an expression assignment within a variable `DECLARE` statement. /// /// Examples: @@ -2147,10 +2613,11 @@ impl fmt::Display for DeclareAssignment { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DeclareType { - /// Cursor variable type. e.g. [Snowflake] [Postgres] + /// Cursor variable type. e.g. [Snowflake] [PostgreSQL] [MsSql] /// /// [Snowflake]: https://docs.snowflake.com/en/developer-guide/snowflake-scripting/cursors#declaring-a-cursor - /// [Postgres]: https://www.postgresql.org/docs/current/plpgsql-cursors.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/plpgsql-cursors.html + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-cursor-transact-sql Cursor, /// Result set variable type. [Snowflake] @@ -2189,7 +2656,7 @@ impl fmt::Display for DeclareType { } /// A `DECLARE` statement. -/// [Postgres] [Snowflake] [BigQuery] +/// [PostgreSQL] [Snowflake] [BigQuery] /// /// Examples: /// ```sql @@ -2197,7 +2664,7 @@ impl fmt::Display for DeclareType { /// DECLARE liahona CURSOR FOR SELECT * FROM films; /// ``` /// -/// [Postgres]: https://www.postgresql.org/docs/current/sql-declare.html +/// [PostgreSQL]: https://www.postgresql.org/docs/current/sql-declare.html /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/snowflake-scripting/declare /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -2319,6 +2786,18 @@ pub enum CreateTableOptions { /// /// Options(Vec), + + /// Plain options, options which are not part on any declerative statement e.g. WITH/OPTIONS/... + /// + Plain(Vec), + + TableProperties(Vec), +} + +impl Default for CreateTableOptions { + fn default() -> Self { + Self::None + } } impl fmt::Display for CreateTableOptions { @@ -2330,6 +2809,12 @@ impl fmt::Display for CreateTableOptions { CreateTableOptions::Options(options) => { write!(f, "OPTIONS({})", display_comma_separated(options)) } + CreateTableOptions::TableProperties(options) => { + write!(f, "TBLPROPERTIES ({})", display_comma_separated(options)) + } + CreateTableOptions::Plain(options) => { + write!(f, "{}", display_separated(options, " ")) + } CreateTableOptions::None => Ok(()), } } @@ -2393,6 +2878,202 @@ pub enum CreatePolicyCommand { Delete, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Set { + /// SQL Standard-style + /// SET a = 1; + SingleAssignment { + scope: Option, + hivevar: bool, + variable: ObjectName, + values: Vec, + }, + /// Snowflake-style + /// SET (a, b, ..) = (1, 2, ..); + ParenthesizedAssignments { + variables: Vec, + values: Vec, + }, + /// MySQL-style + /// SET a = 1, b = 2, ..; + MultipleAssignments { assignments: Vec }, + /// MS-SQL session + /// + /// See + SetSessionParam(SetSessionParamKind), + /// ```sql + /// SET [ SESSION | LOCAL ] ROLE role_name + /// ``` + /// + /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] + /// + /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement + /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html + /// [3]: https://dev.mysql.com/doc/refman/8.0/en/set-role.html + /// [4]: https://docs.oracle.com/cd/B19306_01/server.102/b14200/statements_10004.htm + SetRole { + /// Non-ANSI optional identifier to inform if the role is defined inside the current session (`SESSION`) or transaction (`LOCAL`). + context_modifier: Option, + /// Role name. If NONE is specified, then the current role name is removed. + role_name: Option, + }, + /// ```sql + /// SET TIME ZONE + /// ``` + /// + /// Note: this is a PostgreSQL-specific statements + /// `SET TIME ZONE ` is an alias for `SET timezone TO ` in PostgreSQL + /// However, we allow it for all dialects. + SetTimeZone { local: bool, value: Expr }, + /// ```sql + /// SET NAMES 'charset_name' [COLLATE 'collation_name'] + /// ``` + SetNames { + charset_name: Ident, + collation_name: Option, + }, + /// ```sql + /// SET NAMES DEFAULT + /// ``` + /// + /// Note: this is a MySQL-specific statement. + SetNamesDefault {}, + /// ```sql + /// SET TRANSACTION ... + /// ``` + SetTransaction { + modes: Vec, + snapshot: Option, + session: bool, + }, +} + +impl Display for Set { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::ParenthesizedAssignments { variables, values } => write!( + f, + "SET ({}) = ({})", + display_comma_separated(variables), + display_comma_separated(values) + ), + Self::MultipleAssignments { assignments } => { + write!(f, "SET {}", display_comma_separated(assignments)) + } + Self::SetRole { + context_modifier, + role_name, + } => { + let role_name = role_name.clone().unwrap_or_else(|| Ident::new("NONE")); + write!( + f, + "SET {modifier}ROLE {role_name}", + modifier = context_modifier.map(|m| format!("{m}")).unwrap_or_default() + ) + } + Self::SetSessionParam(kind) => write!(f, "SET {kind}"), + Self::SetTransaction { + modes, + snapshot, + session, + } => { + if *session { + write!(f, "SET SESSION CHARACTERISTICS AS TRANSACTION")?; + } else { + write!(f, "SET TRANSACTION")?; + } + if !modes.is_empty() { + write!(f, " {}", display_comma_separated(modes))?; + } + if let Some(snapshot_id) = snapshot { + write!(f, " SNAPSHOT {snapshot_id}")?; + } + Ok(()) + } + Self::SetTimeZone { local, value } => { + f.write_str("SET ")?; + if *local { + f.write_str("LOCAL ")?; + } + write!(f, "TIME ZONE {value}") + } + Self::SetNames { + charset_name, + collation_name, + } => { + write!(f, "SET NAMES {charset_name}")?; + + if let Some(collation) = collation_name { + f.write_str(" COLLATE ")?; + f.write_str(collation)?; + }; + + Ok(()) + } + Self::SetNamesDefault {} => { + f.write_str("SET NAMES DEFAULT")?; + + Ok(()) + } + Set::SingleAssignment { + scope, + hivevar, + variable, + values, + } => { + write!( + f, + "SET {}{}{} = {}", + scope.map(|s| format!("{s}")).unwrap_or_default(), + if *hivevar { "HIVEVAR:" } else { "" }, + variable, + display_comma_separated(values) + ) + } + } + } +} + +/// Convert a `Set` into a `Statement`. +/// Convenience function, instead of writing `Statement::Set(Set::Set...{...})` +impl From for Statement { + fn from(set: Set) -> Self { + Statement::Set(set) + } +} + +/// A representation of a `WHEN` arm with all the identifiers catched and the statements to execute +/// for the arm. +/// +/// Snowflake: +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ExceptionWhen { + pub idents: Vec, + pub statements: Vec, +} + +impl Display for ExceptionWhen { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WHEN {idents} THEN", + idents = display_separated(&self.idents, " OR ") + )?; + + if !self.statements.is_empty() { + write!(f, " ")?; + format_statement_list(f, &self.statements)?; + } + + Ok(()) + } +} + /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -2418,6 +3099,7 @@ pub enum Statement { compute_statistics: bool, has_table_keyword: bool, }, + Set(Set), /// ```sql /// TRUNCATE /// ``` @@ -2428,9 +3110,6 @@ pub enum Statement { /// TABLE - optional keyword; table: bool, /// Postgres-specific option - /// [ TRUNCATE TABLE ONLY ] - only: bool, - /// Postgres-specific option /// [ RESTART IDENTITY | CONTINUE IDENTITY ] identity: Option, /// Postgres-specific option @@ -2482,6 +3161,14 @@ pub enum Statement { file_format: Option, source: Box, }, + /// A `CASE` statement. + Case(CaseStatement), + /// An `IF` statement. + If(IfStatement), + /// A `WHILE` statement. + While(WhileStatement), + /// A `RAISE` statement. + Raise(RaiseStatement), /// ```sql /// CALL /// ``` @@ -2517,10 +3204,11 @@ pub enum Statement { CopyIntoSnowflake { kind: CopyIntoSnowflakeKind, into: ObjectName, + into_columns: Option>, from_obj: Option, from_obj_alias: Option, stage_params: StageParamsObject, - from_transformations: Option>, + from_transformations: Option>, from_query: Option>, files: Option>, pattern: Option, @@ -2530,6 +3218,11 @@ pub enum Statement { partition: Option>, }, /// ```sql + /// OPEN cursor_name + /// ``` + /// Opens a cursor. + Open(OpenStatement), + /// ```sql /// CLOSE /// ``` /// Closes the portal underlying an open cursor. @@ -2562,6 +3255,10 @@ pub enum Statement { /// CREATE VIEW /// ``` CreateView { + /// True if this is a `CREATE OR ALTER VIEW` statement + /// + /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql) + or_alter: bool, or_replace: bool, materialized: bool, /// View name @@ -2607,7 +3304,7 @@ pub enum Statement { /// ```sql /// CREATE ROLE /// ``` - /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createrole.html) CreateRole { names: Vec, if_not_exists: bool, @@ -2633,7 +3330,7 @@ pub enum Statement { /// ```sql /// CREATE SECRET /// ``` - /// See [duckdb](https://duckdb.org/docs/sql/statements/create_secret.html) + /// See [DuckDB](https://duckdb.org/docs/sql/statements/create_secret.html) CreateSecret { or_replace: bool, temporary: Option, @@ -2643,6 +3340,8 @@ pub enum Statement { secret_type: Ident, options: Vec, }, + /// A `CREATE SERVER` statement. + CreateServer(CreateServerStatement), /// ```sql /// CREATE POLICY /// ``` @@ -2677,6 +3376,9 @@ pub enum Statement { /// For example: `ALTER TABLE table_name ON CLUSTER cluster_name ADD COLUMN c UInt32` /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/update) on_cluster: Option, + /// Snowflake "ICEBERG" clause for Iceberg tables + /// + iceberg: bool, }, /// ```sql /// ALTER INDEX @@ -2801,6 +3503,9 @@ pub enum Statement { purge: bool, /// MySQL-specific "TEMPORARY" keyword temporary: bool, + /// MySQL-specific drop index syntax, which requires table specification + /// See + table: Option, }, /// ```sql /// DROP FUNCTION @@ -2813,6 +3518,14 @@ pub enum Statement { drop_behavior: Option, }, /// ```sql + /// DROP DOMAIN + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-dropdomain.html) + /// + /// DROP DOMAIN [ IF EXISTS ] name [, ...] [ CASCADE | RESTRICT ] + /// + DropDomain(DropDomain), + /// ```sql /// DROP PROCEDURE /// ``` DropProcedure { @@ -2845,7 +3558,10 @@ pub enum Statement { /// DROP CONNECTOR /// ``` /// See [Hive](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-DropConnector) - DropConnector { if_exists: bool, name: Ident }, + DropConnector { + if_exists: bool, + name: Ident, + }, /// ```sql /// DECLARE /// ``` @@ -2853,7 +3569,9 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. - Declare { stmts: Vec }, + Declare { + stmts: Vec, + }, /// ```sql /// CREATE EXTENSION [ IF NOT EXISTS ] extension_name /// [ WITH ] [ SCHEMA schema_name ] @@ -2892,6 +3610,7 @@ pub enum Statement { /// Cursor name name: Ident, direction: FetchDirection, + position: FetchPosition, /// Optional, It's possible to fetch rows form cursor to the table into: Option, }, @@ -2915,69 +3634,23 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. - Discard { object_type: DiscardObject }, - /// ```sql - /// SET [ SESSION | LOCAL ] ROLE role_name - /// ``` - /// - /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] - /// - /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement - /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html - /// [3]: https://dev.mysql.com/doc/refman/8.0/en/set-role.html - /// [4]: https://docs.oracle.com/cd/B19306_01/server.102/b14200/statements_10004.htm - SetRole { - /// Non-ANSI optional identifier to inform if the role is defined inside the current session (`SESSION`) or transaction (`LOCAL`). - context_modifier: ContextModifier, - /// Role name. If NONE is specified, then the current role name is removed. - role_name: Option, + Discard { + object_type: DiscardObject, }, - /// ```sql - /// SET = expression; - /// SET (variable[, ...]) = (expression[, ...]); - /// ``` - /// - /// Note: this is not a standard SQL statement, but it is supported by at - /// least MySQL and PostgreSQL. Not all MySQL-specific syntactic forms are - /// supported yet. - SetVariable { - local: bool, - hivevar: bool, - variables: OneOrManyWithParens, - value: Vec, - }, - /// ```sql - /// SET TIME ZONE - /// ``` - /// - /// Note: this is a PostgreSQL-specific statements - /// `SET TIME ZONE ` is an alias for `SET timezone TO ` in PostgreSQL - SetTimeZone { local: bool, value: Expr }, - /// ```sql - /// SET NAMES 'charset_name' [COLLATE 'collation_name'] - /// ``` - /// - /// Note: this is a MySQL-specific statement. - SetNames { - charset_name: String, - collation_name: Option, - }, - /// ```sql - /// SET NAMES DEFAULT - /// ``` - /// - /// Note: this is a MySQL-specific statement. - SetNamesDefault {}, /// `SHOW FUNCTIONS` /// /// Note: this is a Presto-specific statement. - ShowFunctions { filter: Option }, + ShowFunctions { + filter: Option, + }, /// ```sql /// SHOW /// ``` /// /// Note: this is a PostgreSQL-specific statement. - ShowVariable { variable: Vec }, + ShowVariable { + variable: Vec, + }, /// ```sql /// SHOW [GLOBAL | SESSION] STATUS [LIKE 'pattern' | WHERE expr] /// ``` @@ -3061,7 +3734,9 @@ pub enum Statement { /// ``` /// /// Note: this is a MySQL-specific statement. - ShowCollation { filter: Option }, + ShowCollation { + filter: Option, + }, /// ```sql /// `USE ...` /// ``` @@ -3089,29 +3764,24 @@ pub enum Statement { /// END; /// ``` statements: Vec, - /// Statements of an exception clause. + /// Exception handling with exception clauses. /// Example: /// ```sql - /// BEGIN - /// SELECT 1; - /// EXCEPTION WHEN ERROR THEN - /// SELECT 2; - /// SELECT 3; - /// END; + /// EXCEPTION + /// WHEN EXCEPTION_1 THEN + /// SELECT 2; + /// WHEN EXCEPTION_2 OR EXCEPTION_3 THEN + /// SELECT 3; + /// WHEN OTHER THEN + /// SELECT 4; + /// ``` /// - exception_statements: Option>, + /// + exception: Option>, /// TRUE if the statement has an `END` keyword. has_end_keyword: bool, }, /// ```sql - /// SET TRANSACTION ... - /// ``` - SetTransaction { - modes: Vec, - snapshot: Option, - session: bool, - }, - /// ```sql /// COMMENT ON ... /// ``` /// @@ -3152,6 +3822,30 @@ pub enum Statement { /// ` | AUTHORIZATION | AUTHORIZATION ` schema_name: SchemaName, if_not_exists: bool, + /// Schema properties. + /// + /// ```sql + /// CREATE SCHEMA myschema WITH (key1='value1'); + /// ``` + /// + /// [Trino](https://trino.io/docs/current/sql/create-schema.html) + with: Option>, + /// Schema options. + /// + /// ```sql + /// CREATE SCHEMA myschema OPTIONS(key1='value1'); + /// ``` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_schema_statement) + options: Option>, + /// Default collation specification for the schema. + /// + /// ```sql + /// CREATE SCHEMA myschema DEFAULT COLLATE 'und:ci'; + /// ``` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_schema_statement) + default_collate_spec: Option, }, /// ```sql /// CREATE DATABASE @@ -3168,8 +3862,9 @@ pub enum Statement { /// /// Supported variants: /// 1. [Hive](https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction) - /// 2. [Postgres](https://www.postgresql.org/docs/15/sql-createfunction.html) + /// 2. [PostgreSQL](https://www.postgresql.org/docs/15/sql-createfunction.html) /// 3. [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement) + /// 4. [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql) CreateFunction(CreateFunction), /// CREATE TRIGGER /// @@ -3183,7 +3878,12 @@ pub enum Statement { /// ``` /// /// Postgres: + /// SQL Server: CreateTrigger { + /// True if this is a `CREATE OR ALTER TRIGGER` statement + /// + /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql?view=sql-server-ver16#arguments) + or_alter: bool, /// The `OR REPLACE` clause is used to re-create the trigger if it already exists. /// /// Example: @@ -3244,7 +3944,9 @@ pub enum Statement { /// Triggering conditions condition: Option, /// Execute logic block - exec_body: TriggerExecBody, + exec_body: Option, + /// For SQL dialects with statement(s) for a body + statements: Option, /// The characteristic of the trigger, which include whether the trigger is `DEFERRABLE`, `INITIALLY DEFERRED`, or `INITIALLY IMMEDIATE`, characteristics: Option, }, @@ -3268,7 +3970,8 @@ pub enum Statement { or_alter: bool, name: ObjectName, params: Option>, - body: Vec, + language: Option, + body: ConditionalStatements, }, /// ```sql /// CREATE MACRO @@ -3313,9 +4016,15 @@ pub enum Statement { objects: Option, grantees: Vec, with_grant_option: bool, + as_grantor: Option, granted_by: Option, + current_grants: Option, }, /// ```sql + /// DENY privileges ON object TO grantees + /// ``` + Deny(DenyStatement), + /// ```sql /// REVOKE privileges ON objects FROM grantees /// ``` Revoke { @@ -3330,7 +4039,10 @@ pub enum Statement { /// ``` /// /// Note: this is a PostgreSQL-specific statement. - Deallocate { name: Ident, prepare: bool }, + Deallocate { + name: Ident, + prepare: bool, + }, /// ```sql /// An `EXECUTE` statement /// ``` @@ -3416,11 +4128,15 @@ pub enum Statement { /// SAVEPOINT /// ``` /// Define a new savepoint within the current transaction - Savepoint { name: Ident }, + Savepoint { + name: Ident, + }, /// ```sql /// RELEASE [ SAVEPOINT ] savepoint_name /// ``` - ReleaseSavepoint { name: Ident }, + ReleaseSavepoint { + name: Ident, + }, /// A `MERGE` statement. /// /// ```sql @@ -3428,6 +4144,7 @@ pub enum Statement { /// ``` /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + /// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/statements/merge-transact-sql?view=sql-server-ver16) Merge { /// optional INTO keyword into: bool, @@ -3439,6 +4156,8 @@ pub enum Statement { on: Box, /// Specifies the actions to perform when values match or do not match. clauses: Vec, + // Specifies the output to save changes in MSSQL + output: Option, }, /// ```sql /// CACHE [ FLAG ] TABLE [ OPTIONS('K1' = 'V1', 'K2' = V2) ] [ AS ] [ ] @@ -3481,6 +4200,8 @@ pub enum Statement { sequence_options: Vec, owned_by: Option, }, + /// A `CREATE DOMAIN` statement. + CreateDomain(CreateDomain), /// ```sql /// CREATE TYPE /// ``` @@ -3500,7 +4221,9 @@ pub enum Statement { /// LOCK TABLES [READ [LOCAL] | [LOW_PRIORITY] WRITE] /// ``` /// Note: this is a MySQL-specific statement. See - LockTables { tables: Vec }, + LockTables { + tables: Vec, + }, /// ```sql /// UNLOCK TABLES /// ``` @@ -3534,18 +4257,22 @@ pub enum Statement { /// listen for a notification channel /// /// See Postgres - LISTEN { channel: Ident }, + LISTEN { + channel: Ident, + }, /// ```sql /// UNLISTEN /// ``` /// stop listening for a notification /// /// See Postgres - UNLISTEN { channel: Ident }, + UNLISTEN { + channel: Ident, + }, /// ```sql /// NOTIFY channel [ , payload ] /// ``` - /// send a notification event together with an optional “payload” string to channel + /// send a notification event together with an optional "payload" string to channel /// /// See Postgres NOTIFY { @@ -3581,10 +4308,6 @@ pub enum Statement { /// Snowflake `REMOVE` /// See: Remove(FileStagingCommand), - /// MS-SQL session - /// - /// See - SetSessionParam(SetSessionParamKind), /// RaiseError (MSSQL) /// RAISERROR ( { msg_id | msg_str | @local_variable } /// { , severity , state } @@ -3598,6 +4321,40 @@ pub enum Statement { arguments: Vec, options: Vec, }, + /// ```sql + /// PRINT msg_str | @local_variable | string_expr + /// ``` + /// + /// See: + Print(PrintStatement), + /// ```sql + /// RETURN [ expression ] + /// ``` + /// + /// See [ReturnStatement] + Return(ReturnStatement), +} + +/// ```sql +/// {COPY | REVOKE} CURRENT GRANTS +/// ``` +/// +/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/grant-ownership#optional-parameters) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CurrentGrantsKind { + CopyCurrentGrants, + RevokeCurrentGrants, +} + +impl fmt::Display for CurrentGrantsKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CurrentGrantsKind::CopyCurrentGrants => write!(f, "COPY CURRENT GRANTS"), + CurrentGrantsKind::RevokeCurrentGrants => write!(f, "REVOKE CURRENT GRANTS"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -3620,6 +4377,28 @@ impl fmt::Display for RaisErrorOption { } impl fmt::Display for Statement { + /// Formats a SQL statement with support for pretty printing. + /// + /// When using the alternate flag (`{:#}`), the statement will be formatted with proper + /// indentation and line breaks. For example: + /// + /// ``` + /// # use sqlparser::dialect::GenericDialect; + /// # use sqlparser::parser::Parser; + /// let sql = "SELECT a, b FROM table_1"; + /// let ast = Parser::parse_sql(&GenericDialect, sql).unwrap(); + /// + /// // Regular formatting + /// assert_eq!(format!("{}", ast[0]), "SELECT a, b FROM table_1"); + /// + /// // Pretty printing + /// assert_eq!(format!("{:#}", ast[0]), + /// r#"SELECT + /// a, + /// b + /// FROM + /// table_1"#); + /// ``` // Clippy thinks this function is too complicated, but it is painful to // split up without extracting structs for each `Statement` variant. #[allow(clippy::cognitive_complexity)] @@ -3635,7 +4414,8 @@ impl fmt::Display for Statement { } => { write!(f, "FLUSH")?; if let Some(location) = location { - write!(f, " {location}")?; + f.write_str(" ")?; + location.fmt(f)?; } write!(f, " {object_type}")?; @@ -3673,7 +4453,7 @@ impl fmt::Display for Statement { write!(f, "{describe_alias} ")?; if let Some(format) = hive_format { - write!(f, "{} ", format)?; + write!(f, "{format} ")?; } if *has_table_keyword { write!(f, "TABLE ")?; @@ -3717,7 +4497,7 @@ impl fmt::Display for Statement { write!(f, "{statement}") } - Statement::Query(s) => write!(f, "{s}"), + Statement::Query(s) => s.fmt(f), Statement::Declare { stmts } => { write!(f, "DECLARE ")?; write!(f, "{}", display_separated(stmts, "; ")) @@ -3725,11 +4505,10 @@ impl fmt::Display for Statement { Statement::Fetch { name, direction, + position, into, } => { - write!(f, "FETCH {direction} ")?; - - write!(f, "IN {name}")?; + write!(f, "FETCH {direction} {position} {name}")?; if let Some(into) = into { write!(f, " INTO {into}")?; @@ -3776,17 +4555,15 @@ impl fmt::Display for Statement { table_names, partitions, table, - only, identity, cascade, on_cluster, } => { let table = if *table { "TABLE " } else { "" }; - let only = if *only { "ONLY " } else { "" }; write!( f, - "TRUNCATE {table}{only}{table_names}", + "TRUNCATE {table}{table_names}", table_names = display_comma_separated(table_names) )?; @@ -3813,6 +4590,18 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Case(stmt) => { + write!(f, "{stmt}") + } + Statement::If(stmt) => { + write!(f, "{stmt}") + } + Statement::While(stmt) => { + write!(f, "{stmt}") + } + Statement::Raise(stmt) => { + write!(f, "{stmt}") + } Statement::AttachDatabase { schema_name, database_file_name, @@ -3893,7 +4682,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Insert(insert) => write!(f, "{insert}"), + Statement::Insert(insert) => insert.fmt(f), Statement::Install { extension_name: name, } => write!(f, "INSTALL {name}"), @@ -3956,29 +4745,42 @@ impl fmt::Display for Statement { returning, or, } => { - write!(f, "UPDATE ")?; + f.write_str("UPDATE ")?; if let Some(or) = or { - write!(f, "{or} ")?; + or.fmt(f)?; + f.write_str(" ")?; } - write!(f, "{table}")?; + table.fmt(f)?; if let Some(UpdateTableFromKind::BeforeSet(from)) = from { - write!(f, " FROM {}", display_comma_separated(from))?; + SpaceOrNewline.fmt(f)?; + f.write_str("FROM")?; + indented_list(f, from)?; } if !assignments.is_empty() { - write!(f, " SET {}", display_comma_separated(assignments))?; + SpaceOrNewline.fmt(f)?; + f.write_str("SET")?; + indented_list(f, assignments)?; } if let Some(UpdateTableFromKind::AfterSet(from)) = from { - write!(f, " FROM {}", display_comma_separated(from))?; + SpaceOrNewline.fmt(f)?; + f.write_str("FROM")?; + indented_list(f, from)?; } if let Some(selection) = selection { - write!(f, " WHERE {selection}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("WHERE")?; + SpaceOrNewline.fmt(f)?; + Indent(selection).fmt(f)?; } if let Some(returning) = returning { - write!(f, " RETURNING {}", display_comma_separated(returning))?; + SpaceOrNewline.fmt(f)?; + f.write_str("RETURNING")?; + indented_list(f, returning)?; } Ok(()) } - Statement::Delete(delete) => write!(f, "{delete}"), + Statement::Delete(delete) => delete.fmt(f), + Statement::Open(open) => open.fmt(f), Statement::Close { cursor } => { write!(f, "CLOSE {cursor}")?; @@ -4004,7 +4806,9 @@ impl fmt::Display for Statement { Ok(()) } Statement::CreateFunction(create_function) => create_function.fmt(f), + Statement::CreateDomain(create_domain) => create_domain.fmt(f), Statement::CreateTrigger { + or_alter, or_replace, is_constraint, name, @@ -4017,19 +4821,30 @@ impl fmt::Display for Statement { condition, include_each, exec_body, + statements, characteristics, } => { write!( f, - "CREATE {or_replace}{is_constraint}TRIGGER {name} {period}", + "CREATE {or_alter}{or_replace}{is_constraint}TRIGGER {name} ", + or_alter = if *or_alter { "OR ALTER " } else { "" }, or_replace = if *or_replace { "OR REPLACE " } else { "" }, is_constraint = if *is_constraint { "CONSTRAINT " } else { "" }, )?; - if !events.is_empty() { - write!(f, " {}", display_separated(events, " OR "))?; + if exec_body.is_some() { + write!(f, "{period}")?; + if !events.is_empty() { + write!(f, " {}", display_separated(events, " OR "))?; + } + write!(f, " ON {table_name}")?; + } else { + write!(f, "ON {table_name}")?; + write!(f, " {period}")?; + if !events.is_empty() { + write!(f, " {}", display_separated(events, ", "))?; + } } - write!(f, " ON {table_name}")?; if let Some(referenced_table_name) = referenced_table_name { write!(f, " FROM {referenced_table_name}")?; @@ -4045,13 +4860,19 @@ impl fmt::Display for Statement { if *include_each { write!(f, " FOR EACH {trigger_object}")?; - } else { + } else if exec_body.is_some() { write!(f, " FOR {trigger_object}")?; } if let Some(condition) = condition { write!(f, " WHEN {condition}")?; } - write!(f, " EXECUTE {exec_body}") + if let Some(exec_body) = exec_body { + write!(f, " EXECUTE {exec_body}")?; + } + if let Some(statements) = statements { + write!(f, " AS {statements}")?; + } + Ok(()) } Statement::DropTrigger { if_exists, @@ -4076,6 +4897,7 @@ impl fmt::Display for Statement { name, or_alter, params, + language, body, } => { write!( @@ -4090,11 +4912,12 @@ impl fmt::Display for Statement { write!(f, " ({})", display_comma_separated(p))?; } } - write!( - f, - " AS BEGIN {body} END", - body = display_separated(body, "; ") - ) + + if let Some(language) = language { + write!(f, " LANGUAGE {language}")?; + } + + write!(f, " AS {body}") } Statement::CreateMacro { or_replace, @@ -4119,6 +4942,7 @@ impl fmt::Display for Statement { Ok(()) } Statement::CreateView { + or_alter, name, or_replace, columns, @@ -4135,7 +4959,8 @@ impl fmt::Display for Statement { } => { write!( f, - "CREATE {or_replace}", + "CREATE {or_alter}{or_replace}", + or_alter = if *or_alter { "OR ALTER " } else { "" }, or_replace = if *or_replace { "OR REPLACE " } else { "" }, )?; if let Some(params) = params { @@ -4172,7 +4997,9 @@ impl fmt::Display for Statement { if matches!(options, CreateTableOptions::Options(_)) { write!(f, " {options}")?; } - write!(f, " AS {query}")?; + f.write_str(" AS")?; + SpaceOrNewline.fmt(f)?; + query.fmt(f)?; if *with_no_schema_binding { write!(f, " WITH NO SCHEMA BINDING")?; } @@ -4398,6 +5225,9 @@ impl fmt::Display for Statement { write!(f, " )")?; Ok(()) } + Statement::CreateServer(stmt) => { + write!(f, "{stmt}") + } Statement::CreatePolicy { name, table_name, @@ -4448,15 +5278,21 @@ impl fmt::Display for Statement { operations, location, on_cluster, + iceberg, } => { - write!(f, "ALTER TABLE ")?; + if *iceberg { + write!(f, "ALTER ICEBERG TABLE ")?; + } else { + write!(f, "ALTER TABLE ")?; + } + if *if_exists { write!(f, "IF EXISTS ")?; } if *only { write!(f, "ONLY ")?; } - write!(f, "{name} ", name = name)?; + write!(f, "{name} ")?; if let Some(cluster) = on_cluster { write!(f, "ON CLUSTER {cluster} ")?; } @@ -4534,7 +5370,7 @@ impl fmt::Display for Statement { )?; if !session_params.options.is_empty() { if *set { - write!(f, " {}", session_params)?; + write!(f, " {session_params}")?; } else { let options = session_params .options @@ -4554,17 +5390,24 @@ impl fmt::Display for Statement { restrict, purge, temporary, - } => write!( - f, - "DROP {}{}{} {}{}{}{}", - if *temporary { "TEMPORARY " } else { "" }, - object_type, - if *if_exists { " IF EXISTS" } else { "" }, - display_comma_separated(names), - if *cascade { " CASCADE" } else { "" }, - if *restrict { " RESTRICT" } else { "" }, - if *purge { " PURGE" } else { "" } - ), + table, + } => { + write!( + f, + "DROP {}{}{} {}{}{}{}", + if *temporary { "TEMPORARY " } else { "" }, + object_type, + if *if_exists { " IF EXISTS" } else { "" }, + display_comma_separated(names), + if *cascade { " CASCADE" } else { "" }, + if *restrict { " RESTRICT" } else { "" }, + if *purge { " PURGE" } else { "" }, + )?; + if let Some(table_name) = table.as_ref() { + write!(f, " ON {table_name}")?; + }; + Ok(()) + } Statement::DropFunction { if_exists, func_desc, @@ -4581,6 +5424,21 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::DropDomain(DropDomain { + if_exists, + name, + drop_behavior, + }) => { + write!( + f, + "DROP DOMAIN{} {name}", + if *if_exists { " IF EXISTS" } else { "" }, + )?; + if let Some(op) = drop_behavior { + write!(f, " {op}")?; + } + Ok(()) + } Statement::DropProcedure { if_exists, proc_desc, @@ -4645,60 +5503,7 @@ impl fmt::Display for Statement { write!(f, "DISCARD {object_type}")?; Ok(()) } - Self::SetRole { - context_modifier, - role_name, - } => { - let role_name = role_name.clone().unwrap_or_else(|| Ident::new("NONE")); - write!(f, "SET{context_modifier} ROLE {role_name}") - } - Statement::SetVariable { - local, - variables, - hivevar, - value, - } => { - f.write_str("SET ")?; - if *local { - f.write_str("LOCAL ")?; - } - let parenthesized = matches!(variables, OneOrManyWithParens::Many(_)); - write!( - f, - "{hivevar}{name} = {l_paren}{value}{r_paren}", - hivevar = if *hivevar { "HIVEVAR:" } else { "" }, - name = variables, - l_paren = parenthesized.then_some("(").unwrap_or_default(), - value = display_comma_separated(value), - r_paren = parenthesized.then_some(")").unwrap_or_default(), - ) - } - Statement::SetTimeZone { local, value } => { - f.write_str("SET ")?; - if *local { - f.write_str("LOCAL ")?; - } - write!(f, "TIME ZONE {value}") - } - Statement::SetNames { - charset_name, - collation_name, - } => { - f.write_str("SET NAMES ")?; - f.write_str(charset_name)?; - - if let Some(collation) = collation_name { - f.write_str(" COLLATE ")?; - f.write_str(collation)?; - }; - - Ok(()) - } - Statement::SetNamesDefault {} => { - f.write_str("SET NAMES DEFAULT")?; - - Ok(()) - } + Self::Set(set) => write!(f, "{set}"), Statement::ShowVariable { variable } => { write!(f, "SHOW")?; if !variable.is_empty() { @@ -4849,12 +5654,12 @@ impl fmt::Display for Statement { transaction, modifier, statements, - exception_statements, + exception, has_end_keyword, } => { if *syntax_begin { if let Some(modifier) = *modifier { - write!(f, "BEGIN {}", modifier)?; + write!(f, "BEGIN {modifier}")?; } else { write!(f, "BEGIN")?; } @@ -4868,18 +5673,13 @@ impl fmt::Display for Statement { write!(f, " {}", display_comma_separated(modes))?; } if !statements.is_empty() { - write!(f, " {}", display_separated(statements, "; "))?; - // We manually insert semicolon for the last statement, - // since display_separated doesn't handle that case. - write!(f, ";")?; + write!(f, " ")?; + format_statement_list(f, statements)?; } - if let Some(exception_statements) = exception_statements { - write!(f, " EXCEPTION WHEN ERROR THEN")?; - if !exception_statements.is_empty() { - write!(f, " {}", display_separated(exception_statements, "; "))?; - // We manually insert semicolon for the last statement, - // since display_separated doesn't handle that case. - write!(f, ";")?; + if let Some(exception_when) = exception { + write!(f, " EXCEPTION")?; + for when in exception_when { + write!(f, " {when}")?; } } if *has_end_keyword { @@ -4887,24 +5687,6 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::SetTransaction { - modes, - snapshot, - session, - } => { - if *session { - write!(f, "SET SESSION CHARACTERISTICS AS TRANSACTION")?; - } else { - write!(f, "SET TRANSACTION")?; - } - if !modes.is_empty() { - write!(f, " {}", display_comma_separated(modes))?; - } - if let Some(snapshot_id) = snapshot { - write!(f, " SNAPSHOT {snapshot_id}")?; - } - Ok(()) - } Statement::Commit { chain, end: end_syntax, @@ -4913,7 +5695,7 @@ impl fmt::Display for Statement { if *end_syntax { write!(f, "END")?; if let Some(modifier) = *modifier { - write!(f, " {}", modifier)?; + write!(f, " {modifier}")?; } if *chain { write!(f, " AND CHAIN")?; @@ -4939,12 +5721,31 @@ impl fmt::Display for Statement { Statement::CreateSchema { schema_name, if_not_exists, - } => write!( - f, - "CREATE SCHEMA {if_not_exists}{name}", - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - name = schema_name - ), + with, + options, + default_collate_spec, + } => { + write!( + f, + "CREATE SCHEMA {if_not_exists}{name}", + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + name = schema_name + )?; + + if let Some(collate) = default_collate_spec { + write!(f, " DEFAULT COLLATE {collate}")?; + } + + if let Some(with) = with { + write!(f, " WITH ({})", display_comma_separated(with))?; + } + + if let Some(options) = options { + write!(f, " OPTIONS({})", display_comma_separated(options))?; + } + + Ok(()) + } Statement::Assert { condition, message } => { write!(f, "ASSERT {condition}")?; if let Some(m) = message { @@ -4957,7 +5758,9 @@ impl fmt::Display for Statement { objects, grantees, with_grant_option, + as_grantor, granted_by, + current_grants, } => { write!(f, "GRANT {privileges} ")?; if let Some(objects) = objects { @@ -4967,11 +5770,18 @@ impl fmt::Display for Statement { if *with_grant_option { write!(f, " WITH GRANT OPTION")?; } + if let Some(current_grants) = current_grants { + write!(f, " {current_grants}")?; + } + if let Some(grantor) = as_grantor { + write!(f, " AS {grantor}")?; + } if let Some(grantor) = granted_by { write!(f, " GRANTED BY {grantor}")?; } Ok(()) } + Statement::Deny(s) => write!(f, "{s}"), Statement::Revoke { privileges, objects, @@ -4988,7 +5798,7 @@ impl fmt::Display for Statement { write!(f, " GRANTED BY {grantor}")?; } if let Some(cascade) = cascade { - write!(f, " {}", cascade)?; + write!(f, " {cascade}")?; } Ok(()) } @@ -5068,6 +5878,7 @@ impl fmt::Display for Statement { source, on, clauses, + output, } => { write!( f, @@ -5075,7 +5886,11 @@ impl fmt::Display for Statement { int = if *into { " INTO" } else { "" } )?; write!(f, "ON {on} ")?; - write!(f, "{}", display_separated(clauses, " ")) + write!(f, "{}", display_separated(clauses, " "))?; + if let Some(output) = output { + write!(f, " {output}")?; + } + Ok(()) } Statement::Cache { table_name, @@ -5162,13 +5977,13 @@ impl fmt::Display for Statement { if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, )?; if !directory_table_params.options.is_empty() { - write!(f, " DIRECTORY=({})", directory_table_params)?; + write!(f, " DIRECTORY=({directory_table_params})")?; } if !file_format.options.is_empty() { - write!(f, " FILE_FORMAT=({})", file_format)?; + write!(f, " FILE_FORMAT=({file_format})")?; } if !copy_options.options.is_empty() { - write!(f, " COPY_OPTIONS=({})", copy_options)?; + write!(f, " COPY_OPTIONS=({copy_options})")?; } if comment.is_some() { write!(f, " COMMENT='{}'", comment.as_ref().unwrap())?; @@ -5178,6 +5993,7 @@ impl fmt::Display for Statement { Statement::CopyIntoSnowflake { kind, into, + into_columns, from_obj, from_obj_alias, stage_params, @@ -5190,7 +6006,10 @@ impl fmt::Display for Statement { validation_mode, partition, } => { - write!(f, "COPY INTO {}", into)?; + write!(f, "COPY INTO {into}")?; + if let Some(into_columns) = into_columns { + write!(f, " ({})", display_comma_separated(into_columns))?; + } if let Some(from_transformations) = from_transformations { // Data load with transformation if let Some(from_stage) = from_obj { @@ -5203,12 +6022,12 @@ impl fmt::Display for Statement { )?; } if let Some(from_obj_alias) = from_obj_alias { - write!(f, " AS {}", from_obj_alias)?; + write!(f, " AS {from_obj_alias}")?; } write!(f, ")")?; } else if let Some(from_obj) = from_obj { // Standard data load - write!(f, " FROM {}{}", from_obj, stage_params)?; + write!(f, " FROM {from_obj}{stage_params}")?; if let Some(from_obj_alias) = from_obj_alias { write!(f, " AS {from_obj_alias}")?; } @@ -5221,24 +6040,24 @@ impl fmt::Display for Statement { write!(f, " FILES = ('{}')", display_separated(files, "', '"))?; } if let Some(pattern) = pattern { - write!(f, " PATTERN = '{}'", pattern)?; + write!(f, " PATTERN = '{pattern}'")?; } if let Some(partition) = partition { write!(f, " PARTITION BY {partition}")?; } if !file_format.options.is_empty() { - write!(f, " FILE_FORMAT=({})", file_format)?; + write!(f, " FILE_FORMAT=({file_format})")?; } if !copy_options.options.is_empty() { match kind { CopyIntoSnowflakeKind::Table => { - write!(f, " COPY_OPTIONS=({})", copy_options)? + write!(f, " COPY_OPTIONS=({copy_options})")? } CopyIntoSnowflakeKind::Location => write!(f, " {copy_options}")?, } } if let Some(validation_mode) = validation_mode { - write!(f, " VALIDATION_MODE = {}", validation_mode)?; + write!(f, " VALIDATION_MODE = {validation_mode}")?; } Ok(()) } @@ -5284,10 +6103,10 @@ impl fmt::Display for Statement { } => { write!(f, "OPTIMIZE TABLE {name}")?; if let Some(on_cluster) = on_cluster { - write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?; + write!(f, " ON CLUSTER {on_cluster}")?; } if let Some(partition) = partition { - write!(f, " {partition}", partition = partition)?; + write!(f, " {partition}")?; } if *include_final { write!(f, " FINAL")?; @@ -5332,10 +6151,10 @@ impl fmt::Display for Statement { } Ok(()) } - + Statement::Print(s) => write!(f, "{s}"), + Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), Statement::Remove(command) => write!(f, "REMOVE {command}"), - Statement::SetSessionParam(kind) => write!(f, "SET {kind}"), } } } @@ -5399,6 +6218,28 @@ impl fmt::Display for SequenceOptions { } } +/// Assignment for a `SET` statement (name [=|TO] value) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SetAssignment { + pub scope: Option, + pub name: ObjectName, + pub value: Expr, +} + +impl fmt::Display for SetAssignment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}{} = {}", + self.scope.map(|s| format!("{s}")).unwrap_or_default(), + self.name, + self.value + ) + } +} + /// Target of a `TRUNCATE TABLE` command /// /// Note this is its own struct because `visit_relation` requires an `ObjectName` (not a `Vec`) @@ -5409,10 +6250,17 @@ pub struct TruncateTableTarget { /// name of the table being truncated #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, + /// Postgres-specific option + /// [ TRUNCATE TABLE ONLY ] + /// + pub only: bool, } impl fmt::Display for TruncateTableTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.only { + write!(f, "ONLY ")?; + }; write!(f, "{}", self.name) } } @@ -5681,6 +6529,28 @@ impl fmt::Display for FetchDirection { } } +/// The "position" for a FETCH statement. +/// +/// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/fetch-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FetchPosition { + From, + In, +} + +impl fmt::Display for FetchPosition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FetchPosition::From => f.write_str("FROM")?, + FetchPosition::In => f.write_str("IN")?, + }; + + Ok(()) + } +} + /// A privilege on a database object (table, sequence, etc.). #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -5704,6 +6574,9 @@ pub enum Action { }, Delete, EvolveSchema, + Exec { + obj_type: Option, + }, Execute { obj_type: Option, }, @@ -5719,10 +6592,10 @@ pub enum Action { ManageReleases, ManageVersions, Modify { - modify_type: ActionModifyType, + modify_type: Option, }, Monitor { - monitor_type: ActionMonitorType, + monitor_type: Option, }, Operate, OverrideShareRestrictions, @@ -5755,7 +6628,7 @@ impl fmt::Display for Action { match self { Action::AddSearchOptimization => f.write_str("ADD SEARCH OPTIMIZATION")?, Action::Apply { apply_type } => write!(f, "APPLY {apply_type}")?, - Action::ApplyBudget => f.write_str("APPLY BUDGET")?, + Action::ApplyBudget => f.write_str("APPLYBUDGET")?, Action::AttachListing => f.write_str("ATTACH LISTING")?, Action::AttachPolicy => f.write_str("ATTACH POLICY")?, Action::Audit => f.write_str("AUDIT")?, @@ -5770,6 +6643,12 @@ impl fmt::Display for Action { Action::DatabaseRole { role } => write!(f, "DATABASE ROLE {role}")?, Action::Delete => f.write_str("DELETE")?, Action::EvolveSchema => f.write_str("EVOLVE SCHEMA")?, + Action::Exec { obj_type } => { + f.write_str("EXEC")?; + if let Some(obj_type) = obj_type { + write!(f, " {obj_type}")? + } + } Action::Execute { obj_type } => { f.write_str("EXECUTE")?; if let Some(obj_type) = obj_type { @@ -5783,8 +6662,18 @@ impl fmt::Display for Action { Action::Manage { manage_type } => write!(f, "MANAGE {manage_type}")?, Action::ManageReleases => f.write_str("MANAGE RELEASES")?, Action::ManageVersions => f.write_str("MANAGE VERSIONS")?, - Action::Modify { modify_type } => write!(f, "MODIFY {modify_type}")?, - Action::Monitor { monitor_type } => write!(f, "MONITOR {monitor_type}")?, + Action::Modify { modify_type } => { + write!(f, "MODIFY")?; + if let Some(modify_type) = modify_type { + write!(f, " {modify_type}")?; + } + } + Action::Monitor { monitor_type } => { + write!(f, "MONITOR")?; + if let Some(monitor_type) = monitor_type { + write!(f, " {monitor_type}")? + } + } Action::Operate => f.write_str("OPERATE")?, Action::OverrideShareRestrictions => f.write_str("OVERRIDE SHARE RESTRICTIONS")?, Action::Ownership => f.write_str("OWNERSHIP")?, @@ -6073,7 +6962,7 @@ impl fmt::Display for GranteeName { match self { GranteeName::ObjectName(name) => name.fmt(f), GranteeName::UserHost { user, host } => { - write!(f, "{}@{}", user, host) + write!(f, "{user}@{host}") } } } @@ -6088,6 +6977,24 @@ pub enum GrantObjects { AllSequencesInSchema { schemas: Vec }, /// Grant privileges on `ALL TABLES IN SCHEMA [, ...]` AllTablesInSchema { schemas: Vec }, + /// Grant privileges on `ALL VIEWS IN SCHEMA [, ...]` + AllViewsInSchema { schemas: Vec }, + /// Grant privileges on `ALL MATERIALIZED VIEWS IN SCHEMA [, ...]` + AllMaterializedViewsInSchema { schemas: Vec }, + /// Grant privileges on `ALL EXTERNAL TABLES IN SCHEMA [, ...]` + AllExternalTablesInSchema { schemas: Vec }, + /// Grant privileges on `FUTURE SCHEMAS IN DATABASE [, ...]` + FutureSchemasInDatabase { databases: Vec }, + /// Grant privileges on `FUTURE TABLES IN SCHEMA [, ...]` + FutureTablesInSchema { schemas: Vec }, + /// Grant privileges on `FUTURE VIEWS IN SCHEMA [, ...]` + FutureViewsInSchema { schemas: Vec }, + /// Grant privileges on `FUTURE EXTERNAL TABLES IN SCHEMA [, ...]` + FutureExternalTablesInSchema { schemas: Vec }, + /// Grant privileges on `FUTURE MATERIALIZED VIEWS IN SCHEMA [, ...]` + FutureMaterializedViewsInSchema { schemas: Vec }, + /// Grant privileges on `FUTURE SEQUENCES IN SCHEMA [, ...]` + FutureSequencesInSchema { schemas: Vec }, /// Grant privileges on specific databases Databases(Vec), /// Grant privileges on specific schemas @@ -6102,6 +7009,39 @@ pub enum GrantObjects { Warehouses(Vec), /// Grant privileges on specific integrations Integrations(Vec), + /// Grant privileges on resource monitors + ResourceMonitors(Vec), + /// Grant privileges on users + Users(Vec), + /// Grant privileges on compute pools + ComputePools(Vec), + /// Grant privileges on connections + Connections(Vec), + /// Grant privileges on failover groups + FailoverGroup(Vec), + /// Grant privileges on replication group + ReplicationGroup(Vec), + /// Grant privileges on external volumes + ExternalVolumes(Vec), + /// Grant privileges on a procedure. In dialects that + /// support overloading, the argument types must be specified. + /// + /// For example: + /// `GRANT USAGE ON PROCEDURE foo(varchar) TO ROLE role1` + Procedure { + name: ObjectName, + arg_types: Vec, + }, + + /// Grant privileges on a function. In dialects that + /// support overloading, the argument types must be specified. + /// + /// For example: + /// `GRANT USAGE ON FUNCTION foo(varchar) TO ROLE role1` + Function { + name: ObjectName, + arg_types: Vec, + }, } impl fmt::Display for GrantObjects { @@ -6142,10 +7082,139 @@ impl fmt::Display for GrantObjects { display_comma_separated(schemas) ) } + GrantObjects::AllExternalTablesInSchema { schemas } => { + write!( + f, + "ALL EXTERNAL TABLES IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::AllViewsInSchema { schemas } => { + write!( + f, + "ALL VIEWS IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::AllMaterializedViewsInSchema { schemas } => { + write!( + f, + "ALL MATERIALIZED VIEWS IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::FutureSchemasInDatabase { databases } => { + write!( + f, + "FUTURE SCHEMAS IN DATABASE {}", + display_comma_separated(databases) + ) + } + GrantObjects::FutureTablesInSchema { schemas } => { + write!( + f, + "FUTURE TABLES IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::FutureExternalTablesInSchema { schemas } => { + write!( + f, + "FUTURE EXTERNAL TABLES IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::FutureViewsInSchema { schemas } => { + write!( + f, + "FUTURE VIEWS IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::FutureMaterializedViewsInSchema { schemas } => { + write!( + f, + "FUTURE MATERIALIZED VIEWS IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::FutureSequencesInSchema { schemas } => { + write!( + f, + "FUTURE SEQUENCES IN SCHEMA {}", + display_comma_separated(schemas) + ) + } + GrantObjects::ResourceMonitors(objects) => { + write!(f, "RESOURCE MONITOR {}", display_comma_separated(objects)) + } + GrantObjects::Users(objects) => { + write!(f, "USER {}", display_comma_separated(objects)) + } + GrantObjects::ComputePools(objects) => { + write!(f, "COMPUTE POOL {}", display_comma_separated(objects)) + } + GrantObjects::Connections(objects) => { + write!(f, "CONNECTION {}", display_comma_separated(objects)) + } + GrantObjects::FailoverGroup(objects) => { + write!(f, "FAILOVER GROUP {}", display_comma_separated(objects)) + } + GrantObjects::ReplicationGroup(objects) => { + write!(f, "REPLICATION GROUP {}", display_comma_separated(objects)) + } + GrantObjects::ExternalVolumes(objects) => { + write!(f, "EXTERNAL VOLUME {}", display_comma_separated(objects)) + } + GrantObjects::Procedure { name, arg_types } => { + write!(f, "PROCEDURE {name}")?; + if !arg_types.is_empty() { + write!(f, "({})", display_comma_separated(arg_types))?; + } + Ok(()) + } + GrantObjects::Function { name, arg_types } => { + write!(f, "FUNCTION {name}")?; + if !arg_types.is_empty() { + write!(f, "({})", display_comma_separated(arg_types))?; + } + Ok(()) + } } } } +/// A `DENY` statement +/// +/// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/deny-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct DenyStatement { + pub privileges: Privileges, + pub objects: GrantObjects, + pub grantees: Vec, + pub granted_by: Option, + pub cascade: Option, +} + +impl fmt::Display for DenyStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DENY {}", self.privileges)?; + write!(f, " ON {}", self.objects)?; + if !self.grantees.is_empty() { + write!(f, " TO {}", display_comma_separated(&self.grantees))?; + } + if let Some(cascade) = &self.cascade { + write!(f, " {cascade}")?; + } + if let Some(granted_by) = &self.granted_by { + write!(f, " AS {granted_by}")?; + } + Ok(()) + } +} + /// SQL assignment `foo = expr` as used in SQLUpdate #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -6177,7 +7246,7 @@ pub enum AssignmentTarget { impl fmt::Display for AssignmentTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - AssignmentTarget::ColumnName(column) => write!(f, "{}", column), + AssignmentTarget::ColumnName(column) => write!(f, "{column}"), AssignmentTarget::Tuple(columns) => write!(f, "({})", display_comma_separated(columns)), } } @@ -6301,6 +7370,19 @@ impl fmt::Display for CloseCursor { } } +/// A Drop Domain statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct DropDomain { + /// Whether to drop the domain if it exists + pub if_exists: bool, + /// The name of the domain to drop + pub name: ObjectName, + /// The behavior to apply when dropping the domain + pub drop_behavior: Option, +} + /// A function call #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -6377,7 +7459,8 @@ impl fmt::Display for Function { } if let Some(o) = &self.over { - write!(f, " OVER {o}")?; + f.write_str(" OVER ")?; + o.fmt(f)?; } if self.uses_odbc_syntax { @@ -6408,8 +7491,8 @@ impl fmt::Display for FunctionArguments { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { FunctionArguments::None => Ok(()), - FunctionArguments::Subquery(query) => write!(f, "({})", query), - FunctionArguments::List(args) => write!(f, "({})", args), + FunctionArguments::Subquery(query) => write!(f, "({query})"), + FunctionArguments::List(args) => write!(f, "({args})"), } } } @@ -6430,7 +7513,7 @@ pub struct FunctionArgumentList { impl fmt::Display for FunctionArgumentList { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(duplicate_treatment) = self.duplicate_treatment { - write!(f, "{} ", duplicate_treatment)?; + write!(f, "{duplicate_treatment} ")?; } write!(f, "{}", display_comma_separated(&self.args))?; if !self.clauses.is_empty() { @@ -6490,7 +7573,7 @@ impl fmt::Display for FunctionArgumentClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { FunctionArgumentClause::IgnoreOrRespectNulls(null_treatment) => { - write!(f, "{}", null_treatment) + write!(f, "{null_treatment}") } FunctionArgumentClause::OrderBy(order_by) => { write!(f, "ORDER BY {}", display_comma_separated(order_by)) @@ -6663,6 +7746,7 @@ impl fmt::Display for HavingBoundKind { pub enum ObjectType { Table, View, + MaterializedView, Index, Schema, Database, @@ -6677,6 +7761,7 @@ impl fmt::Display for ObjectType { f.write_str(match self { ObjectType::Table => "TABLE", ObjectType::View => "VIEW", + ObjectType::MaterializedView => "MATERIALIZED VIEW", ObjectType::Index => "INDEX", ObjectType::Schema => "SCHEMA", ObjectType::Database => "DATABASE", @@ -6927,17 +8012,29 @@ pub enum SqlOption { range_direction: Option, for_values: Vec, }, + /// Comment parameter (supports `=` and no `=` syntax) + Comment(CommentDef), + /// MySQL TableSpace option + /// + TableSpace(TablespaceOption), + /// An option representing a key value pair, where the value is a parenthesized list and with an optional name + /// e.g. + /// + /// UNION = (tbl_name\[,tbl_name\]...) + /// ENGINE = ReplicatedMergeTree('/table_name','{replica}', ver) + /// ENGINE = SummingMergeTree(\[columns\]) + NamedParenthesizedList(NamedParenthesizedList), } impl fmt::Display for SqlOption { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - SqlOption::Clustered(c) => write!(f, "{}", c), + SqlOption::Clustered(c) => write!(f, "{c}"), SqlOption::Ident(ident) => { - write!(f, "{}", ident) + write!(f, "{ident}") } SqlOption::KeyValue { key: name, value } => { - write!(f, "{} = {}", name, value) + write!(f, "{name} = {value}") } SqlOption::Partition { column_name, @@ -6958,10 +8055,54 @@ impl fmt::Display for SqlOption { display_comma_separated(for_values) ) } + SqlOption::TableSpace(tablespace_option) => { + write!(f, "TABLESPACE {}", tablespace_option.name)?; + match tablespace_option.storage { + Some(StorageType::Disk) => write!(f, " STORAGE DISK"), + Some(StorageType::Memory) => write!(f, " STORAGE MEMORY"), + None => Ok(()), + } + } + SqlOption::Comment(comment) => match comment { + CommentDef::WithEq(comment) => { + write!(f, "COMMENT = '{comment}'") + } + CommentDef::WithoutEq(comment) => { + write!(f, "COMMENT '{comment}'") + } + }, + SqlOption::NamedParenthesizedList(value) => { + write!(f, "{} = ", value.key)?; + if let Some(key) = &value.name { + write!(f, "{key}")?; + } + if !value.values.is_empty() { + write!(f, "({})", display_comma_separated(&value.values))? + } + Ok(()) + } } } } +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum StorageType { + Disk, + Memory, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySql TableSpace option +/// +pub struct TablespaceOption { + pub name: String, + pub storage: Option, +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -6976,6 +8117,70 @@ impl fmt::Display for SecretOption { } } +/// A `CREATE SERVER` statement. +/// +/// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createserver.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateServerStatement { + pub name: ObjectName, + pub if_not_exists: bool, + pub server_type: Option, + pub version: Option, + pub foreign_data_wrapper: ObjectName, + pub options: Option>, +} + +impl fmt::Display for CreateServerStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let CreateServerStatement { + name, + if_not_exists, + server_type, + version, + foreign_data_wrapper, + options, + } = self; + + write!( + f, + "CREATE SERVER {if_not_exists}{name} ", + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + )?; + + if let Some(st) = server_type { + write!(f, "TYPE {st} ")?; + } + + if let Some(v) = version { + write!(f, "VERSION {v} ")?; + } + + write!(f, "FOREIGN DATA WRAPPER {foreign_data_wrapper}")?; + + if let Some(o) = options { + write!(f, " OPTIONS ({o})", o = display_comma_separated(o))?; + } + + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateServerOption { + pub key: Ident, + pub value: Ident, +} + +impl fmt::Display for CreateServerOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.key, self.value) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -6990,7 +8195,7 @@ impl fmt::Display for AttachDuckDBDatabaseOption { AttachDuckDBDatabaseOption::ReadOnly(Some(true)) => write!(f, "READ_ONLY true"), AttachDuckDBDatabaseOption::ReadOnly(Some(false)) => write!(f, "READ_ONLY false"), AttachDuckDBDatabaseOption::ReadOnly(None) => write!(f, "READ_ONLY"), - AttachDuckDBDatabaseOption::Type(t) => write!(f, "TYPE {}", t), + AttachDuckDBDatabaseOption::Type(t) => write!(f, "TYPE {t}"), } } } @@ -7210,7 +8415,7 @@ impl fmt::Display for CopyTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use CopyTarget::*; match self { - Stdin { .. } => write!(f, "STDIN"), + Stdin => write!(f, "STDIN"), Stdout => write!(f, "STDOUT"), File { filename } => write!(f, "'{}'", value::escape_single_quote_string(filename)), Program { command } => write!( @@ -7538,6 +8743,35 @@ impl Display for MergeClause { } } +/// A Output Clause in the end of a 'MERGE' Statement +/// +/// Example: +/// OUTPUT $action, deleted.* INTO dbo.temp_products; +/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OutputClause { + pub select_items: Vec, + pub into_table: SelectInto, +} + +impl fmt::Display for OutputClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let OutputClause { + select_items, + into_table, + } = self; + + write!( + f, + "OUTPUT {} {}", + display_comma_separated(select_items), + into_table + ) + } +} + #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -7615,30 +8849,30 @@ impl fmt::Display for FlushLocation { } } -/// Optional context modifier for statements that can be or `LOCAL`, or `SESSION`. +/// Optional context modifier for statements that can be or `LOCAL`, `GLOBAL`, or `SESSION`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ContextModifier { - /// No context defined. Each dialect defines the default in this scenario. - None, /// `LOCAL` identifier, usually related to transactional states. Local, /// `SESSION` identifier Session, + /// `GLOBAL` identifier + Global, } impl fmt::Display for ContextModifier { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::None => { - write!(f, "") - } Self::Local => { - write!(f, " LOCAL") + write!(f, "LOCAL ") } Self::Session => { - write!(f, " SESSION") + write!(f, "SESSION ") + } + Self::Global => { + write!(f, "GLOBAL ") } } } @@ -7837,7 +9071,8 @@ impl fmt::Display for FunctionDeterminismSpecifier { /// where within the statement, the body shows up. /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 -/// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html +/// [PostgreSQL]: https://www.postgresql.org/docs/15/sql-createfunction.html +/// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -7866,6 +9101,22 @@ pub enum CreateFunctionBody { /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 AsAfterOptions(Expr), + /// Function body with statements before the `RETURN` keyword. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION my_scalar_udf(a INT, b INT) + /// RETURNS INT + /// AS + /// BEGIN + /// DECLARE c INT; + /// SET c = a + b; + /// RETURN c; + /// END + /// ``` + /// + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql + AsBeginEnd(BeginEndStatements), /// Function body expression using the 'RETURN' keyword. /// /// Example: @@ -7875,8 +9126,32 @@ pub enum CreateFunctionBody { /// RETURN a + b; /// ``` /// - /// [Postgres]: https://www.postgresql.org/docs/current/sql-createfunction.html + /// [PostgreSQL]: https://www.postgresql.org/docs/current/sql-createfunction.html Return(Expr), + + /// Function body expression using the 'AS RETURN' keywords + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(a INT, b INT) + /// RETURNS TABLE + /// AS RETURN (SELECT a + b AS sum); + /// ``` + /// + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql + AsReturnExpr(Expr), + + /// Function body expression using the 'AS RETURN' keywords, with an un-parenthesized SELECT query + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(a INT, b INT) + /// RETURNS TABLE + /// AS RETURN SELECT a + b AS sum; + /// ``` + /// + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#select_stmt + AsReturnSelect(Select), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -8181,27 +9456,20 @@ impl Display for CreateViewParams { } } -/// Engine of DB. Some warehouse has parameters of engine, e.g. [clickhouse] -/// -/// [clickhouse]: https://clickhouse.com/docs/en/engines/table-engines #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct TableEngine { - pub name: String, - pub parameters: Option>, -} - -impl Display for TableEngine { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.name)?; - - if let Some(parameters) = self.parameters.as_ref() { - write!(f, "({})", display_comma_separated(parameters))?; - } - - Ok(()) - } +/// Key/Value, where the value is a (optionally named) list of identifiers +/// +/// ```sql +/// UNION = (tbl_name[,tbl_name]...) +/// ENGINE = ReplicatedMergeTree('/table_name','{replica}', ver) +/// ENGINE = SummingMergeTree([columns]) +/// ``` +pub struct NamedParenthesizedList { + pub key: Ident, + pub name: Option, + pub values: Vec, } /// Snowflake `WITH ROW ACCESS POLICY policy_name ON (identifier, ...)` @@ -8240,12 +9508,12 @@ impl Display for RowAccessPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Tag { - pub key: Ident, + pub key: ObjectName, pub value: String, } impl Tag { - pub fn new(key: Ident, value: String) -> Self { + pub fn new(key: ObjectName, value: String) -> Self { Self { key, value } } } @@ -8265,18 +9533,12 @@ pub enum CommentDef { /// Does not include `=` when printing the comment, as `COMMENT 'comment'` WithEq(String), WithoutEq(String), - // For Hive dialect, the table comment is after the column definitions without `=`, - // so we need to add an extra variant to allow to identify this case when displaying. - // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) - AfterColumnDefsWithoutEq(String), } impl Display for CommentDef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - CommentDef::WithEq(comment) - | CommentDef::WithoutEq(comment) - | CommentDef::AfterColumnDefsWithoutEq(comment) => write!(f, "{comment}"), + CommentDef::WithEq(comment) | CommentDef::WithoutEq(comment) => write!(f, "{comment}"), } } } @@ -8456,10 +9718,10 @@ impl fmt::Display for ShowStatementIn { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.clause)?; if let Some(parent_type) = &self.parent_type { - write!(f, " {}", parent_type)?; + write!(f, " {parent_type}")?; } if let Some(parent_name) = &self.parent_name { - write!(f, " {}", parent_name)?; + write!(f, " {parent_name}")?; } Ok(()) } @@ -8540,7 +9802,7 @@ impl fmt::Display for TableObject { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::TableName(table_name) => write!(f, "{table_name}"), - Self::TableFunction(func) => write!(f, "FUNCTION {}", func), + Self::TableFunction(func) => write!(f, "FUNCTION {func}"), } } } @@ -8701,8 +9963,107 @@ pub enum CopyIntoSnowflakeKind { Location, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct PrintStatement { + pub message: Box, +} + +impl fmt::Display for PrintStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "PRINT {}", self.message) + } +} + +/// Represents a `Return` statement. +/// +/// [MsSql triggers](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql) +/// [MsSql functions](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ReturnStatement { + pub value: Option, +} + +impl fmt::Display for ReturnStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.value { + Some(ReturnStatementValue::Expr(expr)) => write!(f, "RETURN {expr}"), + None => write!(f, "RETURN"), + } + } +} + +/// Variants of a `RETURN` statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ReturnStatementValue { + Expr(Expr), +} + +/// Represents an `OPEN` statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OpenStatement { + /// Cursor name + pub cursor_name: Ident, +} + +impl fmt::Display for OpenStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "OPEN {}", self.cursor_name) + } +} + +/// Specifies Include / Exclude NULL within UNPIVOT command. +/// For example +/// `UNPIVOT (column1 FOR new_column IN (col3, col4, col5, col6))` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum NullInclusion { + IncludeNulls, + ExcludeNulls, +} + +impl fmt::Display for NullInclusion { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + NullInclusion::IncludeNulls => write!(f, "INCLUDE NULLS"), + NullInclusion::ExcludeNulls => write!(f, "EXCLUDE NULLS"), + } + } +} + +/// Checks membership of a value in a JSON array +/// +/// Syntax: +/// ```sql +/// MEMBER OF() +/// ``` +/// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/json-search-functions.html#operator_member-of) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MemberOf { + pub value: Box, + pub array: Box, +} + +impl fmt::Display for MemberOf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} MEMBER OF({})", self.value, self.array) + } +} + #[cfg(test)] mod tests { + use crate::tokenizer::Location; + use super::*; #[test] @@ -8998,4 +10359,16 @@ mod tests { test_steps(OneOrManyWithParens::Many(vec![2]), vec![2], 3); test_steps(OneOrManyWithParens::Many(vec![3, 4]), vec![3, 4], 4); } + + // Tests that the position in the code of an `Ident` does not affect its + // ordering. + #[test] + fn test_ident_ord() { + let mut a = Ident::with_span(Span::new(Location::new(1, 1), Location::new(1, 1)), "a"); + let mut b = Ident::with_span(Span::new(Location::new(2, 2), Location::new(2, 2)), "b"); + + assert!(a < b); + std::mem::swap(&mut a.span, &mut b.span); + assert!(a < b); + } } diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 66a35fee..d0bb05e3 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -139,6 +139,11 @@ pub enum BinaryOperator { DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, + /// MATCH operator, e.g. `a MATCH b` (SQLite-specific) + /// See + Match, + /// REGEXP operator, e.g. `a REGEXP b` (SQLite-specific) + Regexp, /// Support for custom operators (such as Postgres custom operators) Custom(String), /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) @@ -321,6 +326,9 @@ pub enum BinaryOperator { /// `~=` Same as? (PostgreSQL/Redshift geometric operator) /// See TildeEq, + /// ':=' Assignment Operator + /// See + Assignment, } impl fmt::Display for BinaryOperator { @@ -347,6 +355,8 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseXor => f.write_str("^"), BinaryOperator::DuckIntegerDivide => f.write_str("//"), BinaryOperator::MyIntegerDivide => f.write_str("DIV"), + BinaryOperator::Match => f.write_str("MATCH"), + BinaryOperator::Regexp => f.write_str("REGEXP"), BinaryOperator::Custom(s) => f.write_str(s), BinaryOperator::PGBitwiseXor => f.write_str("#"), BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), @@ -394,6 +404,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::QuestionDoublePipe => f.write_str("?||"), BinaryOperator::At => f.write_str("@"), BinaryOperator::TildeEq => f.write_str("~="), + BinaryOperator::Assignment => f.write_str(":="), } } } diff --git a/src/ast/query.rs b/src/ast/query.rs index bed99111..7ffb64d9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -27,6 +27,7 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::{ ast::*, + display_utils::{indented_list, SpaceOrNewline}, tokenizer::{Token, TokenWithSpan}, }; @@ -43,14 +44,8 @@ pub struct Query { pub body: Box, /// ORDER BY pub order_by: Option, - /// `LIMIT { | ALL }` - pub limit: Option, - - /// `LIMIT { } BY { ,,... } }` - pub limit_by: Vec, - - /// `OFFSET [ { ROW | ROWS } ]` - pub offset: Option, + /// `LIMIT ... OFFSET ... | LIMIT , ` + pub limit_clause: Option, /// `FETCH { FIRST | NEXT } [ PERCENT ] { ROW | ROWS } | { ONLY | WITH TIES }` pub fetch: Option, /// `FOR { UPDATE | SHARE } [ OF table_name ] [ SKIP LOCKED | NOWAIT ]` @@ -68,40 +63,49 @@ pub struct Query { /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) /// (ClickHouse-specific) pub format_clause: Option, + + /// Pipe operator + pub pipe_operators: Vec, } impl fmt::Display for Query { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(ref with) = self.with { - write!(f, "{with} ")?; + with.fmt(f)?; + SpaceOrNewline.fmt(f)?; } - write!(f, "{}", self.body)?; + self.body.fmt(f)?; if let Some(ref order_by) = self.order_by { - write!(f, " {order_by}")?; + f.write_str(" ")?; + order_by.fmt(f)?; } - if let Some(ref limit) = self.limit { - write!(f, " LIMIT {limit}")?; - } - if let Some(ref offset) = self.offset { - write!(f, " {offset}")?; - } - if !self.limit_by.is_empty() { - write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; + + if let Some(ref limit_clause) = self.limit_clause { + limit_clause.fmt(f)?; } if let Some(ref settings) = self.settings { - write!(f, " SETTINGS {}", display_comma_separated(settings))?; + f.write_str(" SETTINGS ")?; + display_comma_separated(settings).fmt(f)?; } if let Some(ref fetch) = self.fetch { - write!(f, " {fetch}")?; + f.write_str(" ")?; + fetch.fmt(f)?; } if !self.locks.is_empty() { - write!(f, " {}", display_separated(&self.locks, " "))?; + f.write_str(" ")?; + display_separated(&self.locks, " ").fmt(f)?; } if let Some(ref for_clause) = self.for_clause { - write!(f, " {}", for_clause)?; + f.write_str(" ")?; + for_clause.fmt(f)?; } if let Some(ref format) = self.format_clause { - write!(f, " {}", format)?; + f.write_str(" ")?; + format.fmt(f)?; + } + for pipe_operator in &self.pipe_operators { + f.write_str(" |> ")?; + pipe_operator.fmt(f)?; } Ok(()) } @@ -156,6 +160,7 @@ pub enum SetExpr { Values(Values), Insert(Statement), Update(Statement), + Delete(Statement), Table(Box), } @@ -173,28 +178,39 @@ impl SetExpr { impl fmt::Display for SetExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - SetExpr::Select(s) => write!(f, "{s}"), - SetExpr::Query(q) => write!(f, "({q})"), - SetExpr::Values(v) => write!(f, "{v}"), - SetExpr::Insert(v) => write!(f, "{v}"), - SetExpr::Update(v) => write!(f, "{v}"), - SetExpr::Table(t) => write!(f, "{t}"), + SetExpr::Select(s) => s.fmt(f), + SetExpr::Query(q) => { + f.write_str("(")?; + q.fmt(f)?; + f.write_str(")") + } + SetExpr::Values(v) => v.fmt(f), + SetExpr::Insert(v) => v.fmt(f), + SetExpr::Update(v) => v.fmt(f), + SetExpr::Delete(v) => v.fmt(f), + SetExpr::Table(t) => t.fmt(f), SetExpr::SetOperation { left, right, op, set_quantifier, } => { - write!(f, "{left} {op}")?; + left.fmt(f)?; + SpaceOrNewline.fmt(f)?; + op.fmt(f)?; match set_quantifier { SetQuantifier::All | SetQuantifier::Distinct | SetQuantifier::ByName | SetQuantifier::AllByName - | SetQuantifier::DistinctByName => write!(f, " {set_quantifier}")?, - SetQuantifier::None => write!(f, "{set_quantifier}")?, + | SetQuantifier::DistinctByName => { + f.write_str(" ")?; + set_quantifier.fmt(f)?; + } + SetQuantifier::None => {} } - write!(f, " {right}")?; + SpaceOrNewline.fmt(f)?; + right.fmt(f)?; Ok(()) } } @@ -245,7 +261,7 @@ impl fmt::Display for SetQuantifier { SetQuantifier::ByName => write!(f, "BY NAME"), SetQuantifier::AllByName => write!(f, "ALL BY NAME"), SetQuantifier::DistinctByName => write!(f, "DISTINCT BY NAME"), - SetQuantifier::None => write!(f, ""), + SetQuantifier::None => Ok(()), } } } @@ -305,6 +321,11 @@ pub struct Select { pub top_before_distinct: bool, /// projection expressions pub projection: Vec, + /// Excluded columns from the projection expression which are not specified + /// directly after a wildcard. + /// + /// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_EXCLUDE_list.html) + pub exclude: Option, /// INTO pub into: Option, /// FROM @@ -325,7 +346,7 @@ pub struct Select { /// DISTRIBUTE BY (Hive) pub distribute_by: Vec, /// SORT BY (Hive) - pub sort_by: Vec, + pub sort_by: Vec, /// HAVING pub having: Option, /// WINDOW AS @@ -360,90 +381,126 @@ impl fmt::Display for Select { } if let Some(value_table_mode) = self.value_table_mode { - write!(f, " {value_table_mode}")?; + f.write_str(" ")?; + value_table_mode.fmt(f)?; } if let Some(ref top) = self.top { if self.top_before_distinct { - write!(f, " {top}")?; + f.write_str(" ")?; + top.fmt(f)?; } } if let Some(ref distinct) = self.distinct { - write!(f, " {distinct}")?; + f.write_str(" ")?; + distinct.fmt(f)?; } if let Some(ref top) = self.top { if !self.top_before_distinct { - write!(f, " {top}")?; + f.write_str(" ")?; + top.fmt(f)?; } } if !self.projection.is_empty() { - write!(f, " {}", display_comma_separated(&self.projection))?; + indented_list(f, &self.projection)?; + } + + if let Some(exclude) = &self.exclude { + write!(f, " {exclude}")?; } if let Some(ref into) = self.into { - write!(f, " {into}")?; + f.write_str(" ")?; + into.fmt(f)?; } if self.flavor == SelectFlavor::Standard && !self.from.is_empty() { - write!(f, " FROM {}", display_comma_separated(&self.from))?; + SpaceOrNewline.fmt(f)?; + f.write_str("FROM")?; + indented_list(f, &self.from)?; } if !self.lateral_views.is_empty() { for lv in &self.lateral_views { - write!(f, "{lv}")?; + lv.fmt(f)?; } } if let Some(ref prewhere) = self.prewhere { - write!(f, " PREWHERE {prewhere}")?; + f.write_str(" PREWHERE ")?; + prewhere.fmt(f)?; } if let Some(ref selection) = self.selection { - write!(f, " WHERE {selection}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("WHERE")?; + SpaceOrNewline.fmt(f)?; + Indent(selection).fmt(f)?; } match &self.group_by { - GroupByExpr::All(_) => write!(f, " {}", self.group_by)?, + GroupByExpr::All(_) => { + SpaceOrNewline.fmt(f)?; + self.group_by.fmt(f)?; + } GroupByExpr::Expressions(exprs, _) => { if !exprs.is_empty() { - write!(f, " {}", self.group_by)? + SpaceOrNewline.fmt(f)?; + self.group_by.fmt(f)?; } } } if !self.cluster_by.is_empty() { - write!( - f, - " CLUSTER BY {}", - display_comma_separated(&self.cluster_by) - )?; + SpaceOrNewline.fmt(f)?; + f.write_str("CLUSTER BY")?; + SpaceOrNewline.fmt(f)?; + Indent(display_comma_separated(&self.cluster_by)).fmt(f)?; } if !self.distribute_by.is_empty() { - write!( - f, - " DISTRIBUTE BY {}", - display_comma_separated(&self.distribute_by) - )?; + SpaceOrNewline.fmt(f)?; + f.write_str("DISTRIBUTE BY")?; + SpaceOrNewline.fmt(f)?; + display_comma_separated(&self.distribute_by).fmt(f)?; } if !self.sort_by.is_empty() { - write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + SpaceOrNewline.fmt(f)?; + f.write_str("SORT BY")?; + SpaceOrNewline.fmt(f)?; + Indent(display_comma_separated(&self.sort_by)).fmt(f)?; } if let Some(ref having) = self.having { - write!(f, " HAVING {having}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("HAVING")?; + SpaceOrNewline.fmt(f)?; + Indent(having).fmt(f)?; } if self.window_before_qualify { if !self.named_window.is_empty() { - write!(f, " WINDOW {}", display_comma_separated(&self.named_window))?; + SpaceOrNewline.fmt(f)?; + f.write_str("WINDOW")?; + SpaceOrNewline.fmt(f)?; + display_comma_separated(&self.named_window).fmt(f)?; } if let Some(ref qualify) = self.qualify { - write!(f, " QUALIFY {qualify}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("QUALIFY")?; + SpaceOrNewline.fmt(f)?; + qualify.fmt(f)?; } } else { if let Some(ref qualify) = self.qualify { - write!(f, " QUALIFY {qualify}")?; + SpaceOrNewline.fmt(f)?; + f.write_str("QUALIFY")?; + SpaceOrNewline.fmt(f)?; + qualify.fmt(f)?; } if !self.named_window.is_empty() { - write!(f, " WINDOW {}", display_comma_separated(&self.named_window))?; + SpaceOrNewline.fmt(f)?; + f.write_str("WINDOW")?; + SpaceOrNewline.fmt(f)?; + display_comma_separated(&self.named_window).fmt(f)?; } } if let Some(ref connect_by) = self.connect_by { - write!(f, " {connect_by}")?; + SpaceOrNewline.fmt(f)?; + connect_by.fmt(f)?; } Ok(()) } @@ -549,12 +606,12 @@ pub struct With { impl fmt::Display for With { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "WITH {}{}", - if self.recursive { "RECURSIVE " } else { "" }, - display_comma_separated(&self.cte_tables) - ) + f.write_str("WITH ")?; + if self.recursive { + f.write_str("RECURSIVE ")?; + } + display_comma_separated(&self.cte_tables).fmt(f)?; + Ok(()) } } @@ -601,8 +658,24 @@ pub struct Cte { impl fmt::Display for Cte { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.materialized.as_ref() { - None => write!(f, "{} AS ({})", self.alias, self.query)?, - Some(materialized) => write!(f, "{} AS {materialized} ({})", self.alias, self.query)?, + None => { + self.alias.fmt(f)?; + f.write_str(" AS (")?; + NewLine.fmt(f)?; + Indent(&self.query).fmt(f)?; + NewLine.fmt(f)?; + f.write_str(")")?; + } + Some(materialized) => { + self.alias.fmt(f)?; + f.write_str(" AS ")?; + materialized.fmt(f)?; + f.write_str(" (")?; + NewLine.fmt(f)?; + Indent(&self.query).fmt(f)?; + NewLine.fmt(f)?; + f.write_str(")")?; + } }; if let Some(ref fr) = self.from { write!(f, " FROM {fr}")?; @@ -915,18 +988,21 @@ impl fmt::Display for ReplaceSelectElement { impl fmt::Display for SelectItem { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use core::fmt::Write; match &self { - SelectItem::UnnamedExpr(expr) => write!(f, "{expr}"), - SelectItem::ExprWithAlias { expr, alias } => write!(f, "{expr} AS {alias}"), + SelectItem::UnnamedExpr(expr) => expr.fmt(f), + SelectItem::ExprWithAlias { expr, alias } => { + expr.fmt(f)?; + f.write_str(" AS ")?; + alias.fmt(f) + } SelectItem::QualifiedWildcard(kind, additional_options) => { - write!(f, "{kind}")?; - write!(f, "{additional_options}")?; - Ok(()) + kind.fmt(f)?; + additional_options.fmt(f) } SelectItem::Wildcard(additional_options) => { - write!(f, "*")?; - write!(f, "{additional_options}")?; - Ok(()) + f.write_char('*')?; + additional_options.fmt(f) } } } @@ -942,9 +1018,10 @@ pub struct TableWithJoins { impl fmt::Display for TableWithJoins { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.relation)?; + self.relation.fmt(f)?; for join in &self.joins { - write!(f, "{join}")?; + SpaceOrNewline.fmt(f)?; + join.fmt(f)?; } Ok(()) } @@ -979,7 +1056,7 @@ impl fmt::Display for ConnectBy { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Setting { pub key: Ident, - pub value: Value, + pub value: Expr, } impl fmt::Display for Setting { @@ -1013,6 +1090,26 @@ impl fmt::Display for ExprWithAlias { } } +/// An expression optionally followed by an alias and order by options. +/// +/// Example: +/// ```sql +/// 42 AS myint ASC +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ExprWithAliasAndOrderBy { + pub expr: ExprWithAlias, + pub order_by: OrderByOptions, +} + +impl fmt::Display for ExprWithAliasAndOrderBy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}{}", self.expr, self.order_by) + } +} + /// Arguments to a table-valued function #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1095,7 +1192,7 @@ impl fmt::Display for TableIndexHints { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {} ", self.hint_type, self.index_type)?; if let Some(for_clause) = &self.for_clause { - write!(f, "FOR {} ", for_clause)?; + write!(f, "FOR {for_clause} ")?; } write!(f, "({})", display_comma_separated(&self.index_names)) } @@ -1248,7 +1345,7 @@ pub enum TableFactor { /// /// Syntax: /// ```sql - /// table UNPIVOT(value FOR name IN (column1, [ column2, ... ])) [ alias ] + /// table UNPIVOT [ { INCLUDE | EXCLUDE } NULLS ] (value FOR name IN (column1, [ column2, ... ])) [ alias ] /// ``` /// /// See . @@ -1257,6 +1354,7 @@ pub enum TableFactor { value: Ident, name: Ident, columns: Vec, + null_inclusion: Option, alias: Option, }, /// A `MATCH_RECOGNIZE` operation on a table. @@ -1280,13 +1378,43 @@ pub enum TableFactor { symbols: Vec, alias: Option, }, + /// The `XMLTABLE` table-valued function. + /// Part of the SQL standard, supported by PostgreSQL, Oracle, and DB2. + /// + /// + /// + /// ```sql + /// SELECT xmltable.* + /// FROM xmldata, + /// XMLTABLE('//ROWS/ROW' + /// PASSING data + /// COLUMNS id int PATH '@id', + /// ordinality FOR ORDINALITY, + /// "COUNTRY_NAME" text, + /// country_id text PATH 'COUNTRY_ID', + /// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', + /// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', + /// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' + /// ); + /// ```` + XmlTable { + /// Optional XMLNAMESPACES clause (empty if not present) + namespaces: Vec, + /// The row-generating XPath expression. + row_expression: Expr, + /// The PASSING clause specifying the document expression. + passing: XmlPassingClause, + /// The columns to be extracted from each generated row. + columns: Vec, + /// The alias for the table. + alias: Option, + }, } /// The table sample modifier options #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] - pub enum TableSampleKind { /// Table sample located before the table alias option BeforeTableAlias(Box), @@ -1340,7 +1468,7 @@ impl fmt::Display for TableSampleQuantity { } write!(f, "{}", self.value)?; if let Some(unit) = &self.unit { - write!(f, " {}", unit)?; + write!(f, " {unit}")?; } if self.parenthesized { write!(f, ")")?; @@ -1433,28 +1561,28 @@ impl fmt::Display for TableSampleBucket { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?; if let Some(on) = &self.on { - write!(f, " ON {}", on)?; + write!(f, " ON {on}")?; } Ok(()) } } impl fmt::Display for TableSample { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, " {}", self.modifier)?; + write!(f, "{}", self.modifier)?; if let Some(name) = &self.name { - write!(f, " {}", name)?; + write!(f, " {name}")?; } if let Some(quantity) = &self.quantity { - write!(f, " {}", quantity)?; + write!(f, " {quantity}")?; } if let Some(seed) = &self.seed { - write!(f, " {}", seed)?; + write!(f, " {seed}")?; } if let Some(bucket) = &self.bucket { - write!(f, " ({})", bucket)?; + write!(f, " ({bucket})")?; } if let Some(offset) = &self.offset { - write!(f, " OFFSET {}", offset)?; + write!(f, " OFFSET {offset}")?; } Ok(()) } @@ -1532,7 +1660,7 @@ impl fmt::Display for RowsPerMatch { RowsPerMatch::AllRows(mode) => { write!(f, "ALL ROWS PER MATCH")?; if let Some(mode) = mode { - write!(f, " {}", mode)?; + write!(f, " {mode}")?; } Ok(()) } @@ -1658,7 +1786,7 @@ impl fmt::Display for MatchRecognizePattern { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use MatchRecognizePattern::*; match self { - Symbol(symbol) => write!(f, "{}", symbol), + Symbol(symbol) => write!(f, "{symbol}"), Exclude(symbol) => write!(f, "{{- {symbol} -}}"), Permute(symbols) => write!(f, "PERMUTE({})", display_comma_separated(symbols)), Concat(patterns) => write!(f, "{}", display_separated(patterns, " ")), @@ -1721,9 +1849,9 @@ impl fmt::Display for TableFactor { sample, index_hints, } => { - write!(f, "{name}")?; + name.fmt(f)?; if let Some(json_path) = json_path { - write!(f, "{json_path}")?; + json_path.fmt(f)?; } if !partitions.is_empty() { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; @@ -1743,7 +1871,7 @@ impl fmt::Display for TableFactor { write!(f, " WITH ORDINALITY")?; } if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample { - write!(f, "{sample}")?; + write!(f, " {sample}")?; } if let Some(alias) = alias { write!(f, " AS {alias}")?; @@ -1758,7 +1886,7 @@ impl fmt::Display for TableFactor { write!(f, "{version}")?; } if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { - write!(f, "{sample}")?; + write!(f, " {sample}")?; } Ok(()) } @@ -1770,7 +1898,11 @@ impl fmt::Display for TableFactor { if *lateral { write!(f, "LATERAL ")?; } - write!(f, "({subquery})")?; + f.write_str("(")?; + NewLine.fmt(f)?; + Indent(subquery).fmt(f)?; + NewLine.fmt(f)?; + f.write_str(")")?; if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1893,15 +2025,19 @@ impl fmt::Display for TableFactor { } TableFactor::Unpivot { table, + null_inclusion, value, name, columns, alias, } => { + write!(f, "{table} UNPIVOT")?; + if let Some(null_inclusion) = null_inclusion { + write!(f, " {null_inclusion} ")?; + } write!( f, - "{} UNPIVOT({} FOR {} IN ({}))", - table, + "({} FOR {} IN ({}))", value, name, display_comma_separated(columns) @@ -1945,6 +2081,31 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::XmlTable { + row_expression, + passing, + columns, + alias, + namespaces, + } => { + write!(f, "XMLTABLE(")?; + if !namespaces.is_empty() { + write!( + f, + "XMLNAMESPACES({}), ", + display_comma_separated(namespaces) + )?; + } + write!( + f, + "{row_expression}{passing} COLUMNS {columns})", + columns = display_comma_separated(columns) + )?; + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } } } } @@ -1996,7 +2157,7 @@ impl fmt::Display for TableAliasColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; if let Some(ref data_type) = self.data_type { - write!(f, " {}", data_type)?; + write!(f, " {data_type}")?; } Ok(()) } @@ -2059,113 +2220,104 @@ impl fmt::Display for Join { Suffix(constraint) } if self.global { - write!(f, " GLOBAL")?; + write!(f, "GLOBAL ")?; } match &self.join_operator { - JoinOperator::Join(constraint) => write!( - f, - " {}JOIN {}{}", + JoinOperator::Join(constraint) => f.write_fmt(format_args!( + "{}JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::Inner(constraint) => write!( - f, - " {}INNER JOIN {}{}", + )), + JoinOperator::Inner(constraint) => f.write_fmt(format_args!( + "{}INNER JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::Left(constraint) => write!( - f, - " {}LEFT JOIN {}{}", + )), + JoinOperator::Left(constraint) => f.write_fmt(format_args!( + "{}LEFT JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::LeftOuter(constraint) => write!( - f, - " {}LEFT OUTER JOIN {}{}", + )), + JoinOperator::LeftOuter(constraint) => f.write_fmt(format_args!( + "{}LEFT OUTER JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::Right(constraint) => write!( - f, - " {}RIGHT JOIN {}{}", + )), + JoinOperator::Right(constraint) => f.write_fmt(format_args!( + "{}RIGHT JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::RightOuter(constraint) => write!( - f, - " {}RIGHT OUTER JOIN {}{}", + )), + JoinOperator::RightOuter(constraint) => f.write_fmt(format_args!( + "{}RIGHT OUTER JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::FullOuter(constraint) => write!( - f, - " {}FULL JOIN {}{}", + )), + JoinOperator::FullOuter(constraint) => f.write_fmt(format_args!( + "{}FULL JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::CrossJoin => write!(f, " CROSS JOIN {}", self.relation), - JoinOperator::Semi(constraint) => write!( - f, - " {}SEMI JOIN {}{}", + )), + JoinOperator::CrossJoin => f.write_fmt(format_args!("CROSS JOIN {}", self.relation)), + JoinOperator::Semi(constraint) => f.write_fmt(format_args!( + "{}SEMI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::LeftSemi(constraint) => write!( - f, - " {}LEFT SEMI JOIN {}{}", + )), + JoinOperator::LeftSemi(constraint) => f.write_fmt(format_args!( + "{}LEFT SEMI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::RightSemi(constraint) => write!( - f, - " {}RIGHT SEMI JOIN {}{}", + )), + JoinOperator::RightSemi(constraint) => f.write_fmt(format_args!( + "{}RIGHT SEMI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::Anti(constraint) => write!( - f, - " {}ANTI JOIN {}{}", + )), + JoinOperator::Anti(constraint) => f.write_fmt(format_args!( + "{}ANTI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::LeftAnti(constraint) => write!( - f, - " {}LEFT ANTI JOIN {}{}", + )), + JoinOperator::LeftAnti(constraint) => f.write_fmt(format_args!( + "{}LEFT ANTI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::RightAnti(constraint) => write!( - f, - " {}RIGHT ANTI JOIN {}{}", + )), + JoinOperator::RightAnti(constraint) => f.write_fmt(format_args!( + "{}RIGHT ANTI JOIN {}{}", prefix(constraint), self.relation, suffix(constraint) - ), - JoinOperator::CrossApply => write!(f, " CROSS APPLY {}", self.relation), - JoinOperator::OuterApply => write!(f, " OUTER APPLY {}", self.relation), + )), + JoinOperator::CrossApply => f.write_fmt(format_args!("CROSS APPLY {}", self.relation)), + JoinOperator::OuterApply => f.write_fmt(format_args!("OUTER APPLY {}", self.relation)), JoinOperator::AsOf { match_condition, constraint, - } => write!( - f, - " ASOF JOIN {} MATCH_CONDITION ({match_condition}){}", + } => f.write_fmt(format_args!( + "ASOF JOIN {} MATCH_CONDITION ({match_condition}){}", self.relation, suffix(constraint) - ), + )), + JoinOperator::StraightJoin(constraint) => f.write_fmt(format_args!( + "STRAIGHT_JOIN {}{}", + self.relation, + suffix(constraint) + )), } } } @@ -2206,6 +2358,10 @@ pub enum JoinOperator { match_condition: Expr, constraint: JoinConstraint, }, + /// STRAIGHT_JOIN (non-standard) + /// + /// See . + StraightJoin(JoinConstraint), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -2251,7 +2407,7 @@ impl fmt::Display for OrderBy { write!(f, " {}", display_comma_separated(exprs))?; } OrderByKind::All(all) => { - write!(f, " ALL{}", all)?; + write!(f, " ALL{all}")?; } } @@ -2282,7 +2438,7 @@ impl fmt::Display for OrderByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}{}", self.expr, self.options)?; if let Some(ref with_fill) = self.with_fill { - write!(f, " {}", with_fill)? + write!(f, " {with_fill}")? } Ok(()) } @@ -2305,13 +2461,13 @@ impl fmt::Display for WithFill { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "WITH FILL")?; if let Some(ref from) = self.from { - write!(f, " FROM {}", from)?; + write!(f, " FROM {from}")?; } if let Some(ref to) = self.to { - write!(f, " TO {}", to)?; + write!(f, " TO {to}")?; } if let Some(ref step) = self.step { - write!(f, " STEP {}", step)?; + write!(f, " STEP {step}")?; } Ok(()) } @@ -2340,7 +2496,7 @@ impl fmt::Display for InterpolateExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.column)?; if let Some(ref expr) = self.expr { - write!(f, " AS {}", expr)?; + write!(f, " AS {expr}")?; } Ok(()) } @@ -2372,6 +2528,58 @@ impl fmt::Display for OrderByOptions { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum LimitClause { + /// Standard SQL syntax + /// + /// `LIMIT [BY ,,...] [OFFSET ]` + LimitOffset { + /// `LIMIT { | ALL }` + limit: Option, + /// `OFFSET [ { ROW | ROWS } ]` + offset: Option, + /// `BY { ,,... } }` + /// + /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/limit-by) + limit_by: Vec, + }, + /// [MySQL]-specific syntax; the order of expressions is reversed. + /// + /// `LIMIT , ` + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/select.html + OffsetCommaLimit { offset: Expr, limit: Expr }, +} + +impl fmt::Display for LimitClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LimitClause::LimitOffset { + limit, + limit_by, + offset, + } => { + if let Some(ref limit) = limit { + write!(f, " LIMIT {limit}")?; + } + if let Some(ref offset) = offset { + write!(f, " {offset}")?; + } + if !limit_by.is_empty() { + debug_assert!(limit.is_some()); + write!(f, " BY {}", display_separated(limit_by, ", "))?; + } + Ok(()) + } + LimitClause::OffsetCommaLimit { offset, limit } => { + write!(f, " LIMIT {offset}, {limit}") + } + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -2407,6 +2615,296 @@ impl fmt::Display for OffsetRows { } } +/// Pipe syntax, first introduced in Google BigQuery. +/// Example: +/// +/// ```sql +/// FROM Produce +/// |> WHERE sales > 0 +/// |> AGGREGATE SUM(sales) AS total_sales, COUNT(*) AS num_sales +/// GROUP BY item; +/// ``` +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PipeOperator { + /// Limits the number of rows to return in a query, with an optional OFFSET clause to skip over rows. + /// + /// Syntax: `|> LIMIT [OFFSET ]` + /// + /// See more at + Limit { expr: Expr, offset: Option }, + /// Filters the results of the input table. + /// + /// Syntax: `|> WHERE ` + /// + /// See more at + Where { expr: Expr }, + /// `ORDER BY [ASC|DESC], ...` + OrderBy { exprs: Vec }, + /// Produces a new table with the listed columns, similar to the outermost SELECT clause in a table subquery in standard syntax. + /// + /// Syntax `|> SELECT [[AS] alias], ...` + /// + /// See more at + Select { exprs: Vec }, + /// Propagates the existing table and adds computed columns, similar to SELECT *, new_column in standard syntax. + /// + /// Syntax: `|> EXTEND [[AS] alias], ...` + /// + /// See more at + Extend { exprs: Vec }, + /// Replaces the value of a column in the current table, similar to SELECT * REPLACE (expression AS column) in standard syntax. + /// + /// Syntax: `|> SET = , ...` + /// + /// See more at + Set { assignments: Vec }, + /// Removes listed columns from the current table, similar to SELECT * EXCEPT (column) in standard syntax. + /// + /// Syntax: `|> DROP , ...` + /// + /// See more at + Drop { columns: Vec }, + /// Introduces a table alias for the input table, similar to applying the AS alias clause on a table subquery in standard syntax. + /// + /// Syntax: `|> AS ` + /// + /// See more at + As { alias: Ident }, + /// Performs aggregation on data across grouped rows or an entire table. + /// + /// Syntax: `|> AGGREGATE [[AS] alias], ...` + /// + /// Syntax: + /// ```norust + /// |> AGGREGATE [ [[AS] alias], ...] + /// GROUP BY [AS alias], ... + /// ``` + /// + /// See more at + Aggregate { + full_table_exprs: Vec, + group_by_expr: Vec, + }, + /// Selects a random sample of rows from the input table. + /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) + /// See more at + TableSample { sample: Box }, + /// Renames columns in the input table. + /// + /// Syntax: `|> RENAME old_name AS new_name, ...` + /// + /// See more at + Rename { mappings: Vec }, + /// Combines the input table with one or more tables using UNION. + /// + /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` + /// + /// See more at + Union { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Returns only the rows that are present in both the input table and the specified tables. + /// + /// Syntax: `|> INTERSECT [DISTINCT] (), (), ...` + /// + /// See more at + Intersect { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Returns only the rows that are present in the input table but not in the specified tables. + /// + /// Syntax: `|> EXCEPT DISTINCT (), (), ...` + /// + /// See more at + Except { + set_quantifier: SetQuantifier, + queries: Vec, + }, + /// Calls a table function or procedure that returns a table. + /// + /// Syntax: `|> CALL function_name(args) [AS alias]` + /// + /// See more at + Call { + function: Function, + alias: Option, + }, + /// Pivots data from rows to columns. + /// + /// Syntax: `|> PIVOT(aggregate_function(column) FOR pivot_column IN (value1, value2, ...)) [AS alias]` + /// + /// See more at + Pivot { + aggregate_functions: Vec, + value_column: Vec, + value_source: PivotValueSource, + alias: Option, + }, + /// The `UNPIVOT` pipe operator transforms columns into rows. + /// + /// Syntax: + /// ```sql + /// |> UNPIVOT(value_column FOR name_column IN (column1, column2, ...)) [alias] + /// ``` + /// + /// See more at + Unpivot { + value_column: Ident, + name_column: Ident, + unpivot_columns: Vec, + alias: Option, + }, + /// Joins the input table with another table. + /// + /// Syntax: `|> [JOIN_TYPE] JOIN
[alias] ON ` or `|> [JOIN_TYPE] JOIN
[alias] USING ()` + /// + /// See more at + Join(Join), +} + +impl fmt::Display for PipeOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PipeOperator::Select { exprs } => { + write!(f, "SELECT {}", display_comma_separated(exprs.as_slice())) + } + PipeOperator::Extend { exprs } => { + write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice())) + } + PipeOperator::Set { assignments } => { + write!(f, "SET {}", display_comma_separated(assignments.as_slice())) + } + PipeOperator::Drop { columns } => { + write!(f, "DROP {}", display_comma_separated(columns.as_slice())) + } + PipeOperator::As { alias } => { + write!(f, "AS {alias}") + } + PipeOperator::Limit { expr, offset } => { + write!(f, "LIMIT {expr}")?; + if let Some(offset) = offset { + write!(f, " OFFSET {offset}")?; + } + Ok(()) + } + PipeOperator::Aggregate { + full_table_exprs, + group_by_expr, + } => { + write!(f, "AGGREGATE")?; + if !full_table_exprs.is_empty() { + write!( + f, + " {}", + display_comma_separated(full_table_exprs.as_slice()) + )?; + } + if !group_by_expr.is_empty() { + write!(f, " GROUP BY {}", display_comma_separated(group_by_expr))?; + } + Ok(()) + } + + PipeOperator::Where { expr } => { + write!(f, "WHERE {expr}") + } + PipeOperator::OrderBy { exprs } => { + write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice())) + } + + PipeOperator::TableSample { sample } => { + write!(f, "{sample}") + } + PipeOperator::Rename { mappings } => { + write!(f, "RENAME {}", display_comma_separated(mappings)) + } + PipeOperator::Union { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "UNION", set_quantifier, queries), + PipeOperator::Intersect { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "INTERSECT", set_quantifier, queries), + PipeOperator::Except { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries), + PipeOperator::Call { function, alias } => { + write!(f, "CALL {function}")?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + } => { + write!( + f, + "PIVOT({} FOR {} IN ({}))", + display_comma_separated(aggregate_functions), + Expr::CompoundIdentifier(value_column.to_vec()), + value_source + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + } => { + write!( + f, + "UNPIVOT({} FOR {} IN ({}))", + value_column, + name_column, + display_comma_separated(unpivot_columns) + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Join(join) => write!(f, "{join}"), + } + } +} + +impl PipeOperator { + /// Helper function to format optional alias for pipe operators + fn fmt_optional_alias(f: &mut fmt::Formatter<'_>, alias: &Option) -> fmt::Result { + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } + + /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) with queries + fn fmt_set_operation( + f: &mut fmt::Formatter<'_>, + operation: &str, + set_quantifier: &SetQuantifier, + queries: &[Query], + ) -> fmt::Result { + write!(f, "{operation}")?; + match set_quantifier { + SetQuantifier::None => {} + _ => { + write!(f, " {set_quantifier}")?; + } + } + write!(f, " ")?; + let parenthesized_queries: Vec = + queries.iter().map(|query| format!("({query})")).collect(); + write!(f, "{}", display_comma_separated(&parenthesized_queries)) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -2560,13 +3058,14 @@ pub struct Values { impl fmt::Display for Values { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "VALUES ")?; + f.write_str("VALUES")?; let prefix = if self.explicit_row { "ROW" } else { "" }; let mut delim = ""; for row in &self.rows { - write!(f, "{delim}")?; - delim = ", "; - write!(f, "{prefix}({})", display_comma_separated(row))?; + f.write_str(delim)?; + delim = ","; + SpaceOrNewline.fmt(f)?; + Indent(format_args!("{prefix}({})", display_comma_separated(row))).fmt(f)?; } Ok(()) } @@ -2653,8 +3152,9 @@ impl fmt::Display for GroupByExpr { Ok(()) } GroupByExpr::Expressions(col_names, modifiers) => { - let col_names = display_comma_separated(col_names); - write!(f, "GROUP BY {col_names}")?; + f.write_str("GROUP BY")?; + SpaceOrNewline.fmt(f)?; + Indent(display_comma_separated(col_names)).fmt(f)?; if !modifiers.is_empty() { write!(f, " {}", display_separated(modifiers, " "))?; } @@ -2678,7 +3178,7 @@ pub enum FormatClause { impl fmt::Display for FormatClause { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Identifier(ident) => write!(f, "FORMAT {ident}"), FormatClause::Null => write!(f, "FORMAT NULL"), } } @@ -2740,9 +3240,9 @@ impl fmt::Display for ForClause { without_array_wrapper, } => { write!(f, "FOR JSON ")?; - write!(f, "{}", for_json)?; + write!(f, "{for_json}")?; if let Some(root) = root { - write!(f, ", ROOT('{}')", root)?; + write!(f, ", ROOT('{root}')")?; } if *include_null_values { write!(f, ", INCLUDE_NULL_VALUES")?; @@ -2760,7 +3260,7 @@ impl fmt::Display for ForClause { r#type, } => { write!(f, "FOR XML ")?; - write!(f, "{}", for_xml)?; + write!(f, "{for_xml}")?; if *binary_base64 { write!(f, ", BINARY BASE64")?; } @@ -2768,7 +3268,7 @@ impl fmt::Display for ForClause { write!(f, ", TYPE")?; } if let Some(root) = root { - write!(f, ", ROOT('{}')", root)?; + write!(f, ", ROOT('{root}')")?; } if *elements { write!(f, ", ELEMENTS")?; @@ -2795,7 +3295,7 @@ impl fmt::Display for ForXml { ForXml::Raw(root) => { write!(f, "RAW")?; if let Some(root) = root { - write!(f, "('{}')", root)?; + write!(f, "('{root}')")?; } Ok(()) } @@ -2804,7 +3304,7 @@ impl fmt::Display for ForXml { ForXml::Path(root) => { write!(f, "PATH")?; if let Some(root) = root { - write!(f, "('{}')", root)?; + write!(f, "('{root}')")?; } Ok(()) } @@ -2867,7 +3367,7 @@ impl fmt::Display for JsonTableColumn { JsonTableColumn::Named(json_table_named_column) => { write!(f, "{json_table_named_column}") } - JsonTableColumn::ForOrdinality(ident) => write!(f, "{} FOR ORDINALITY", ident), + JsonTableColumn::ForOrdinality(ident) => write!(f, "{ident} FOR ORDINALITY"), JsonTableColumn::Nested(json_table_nested_column) => { write!(f, "{json_table_nested_column}") } @@ -2933,10 +3433,10 @@ impl fmt::Display for JsonTableNamedColumn { self.path )?; if let Some(on_empty) = &self.on_empty { - write!(f, " {} ON EMPTY", on_empty)?; + write!(f, " {on_empty} ON EMPTY")?; } if let Some(on_error) = &self.on_error { - write!(f, " {} ON ERROR", on_error)?; + write!(f, " {on_error} ON ERROR")?; } Ok(()) } @@ -2958,7 +3458,7 @@ impl fmt::Display for JsonTableColumnErrorHandling { match self { JsonTableColumnErrorHandling::Null => write!(f, "NULL"), JsonTableColumnErrorHandling::Default(json_string) => { - write!(f, "DEFAULT {}", json_string) + write!(f, "DEFAULT {json_string}") } JsonTableColumnErrorHandling::Error => write!(f, "ERROR"), } @@ -3000,15 +3500,19 @@ impl fmt::Display for OpenJsonTableColumn { } /// BigQuery supports ValueTables which have 2 modes: -/// `SELECT AS STRUCT` -/// `SELECT AS VALUE` +/// `SELECT [ALL | DISTINCT] AS STRUCT` +/// `SELECT [ALL | DISTINCT] AS VALUE` +/// /// +/// #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ValueTableMode { AsStruct, AsValue, + DistinctAsStruct, + DistinctAsValue, } impl fmt::Display for ValueTableMode { @@ -3016,6 +3520,8 @@ impl fmt::Display for ValueTableMode { match self { ValueTableMode::AsStruct => write!(f, "AS STRUCT"), ValueTableMode::AsValue => write!(f, "AS VALUE"), + ValueTableMode::DistinctAsStruct => write!(f, "DISTINCT AS STRUCT"), + ValueTableMode::DistinctAsValue => write!(f, "DISTINCT AS VALUE"), } } } @@ -3032,3 +3538,133 @@ pub enum UpdateTableFromKind { /// For Example: `UPDATE SET t1.name='aaa' FROM t1` AfterSet(Vec), } + +/// Defines the options for an XmlTable column: Named or ForOrdinality +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum XmlTableColumnOption { + /// A named column with a type, optional path, and default value. + NamedInfo { + /// The type of the column to be extracted. + r#type: DataType, + /// The path to the column to be extracted. If None, defaults to the column name. + path: Option, + /// Default value if path does not match + default: Option, + /// Whether the column is nullable (NULL=true, NOT NULL=false) + nullable: bool, + }, + /// The FOR ORDINALITY marker + ForOrdinality, +} + +/// A single column definition in XMLTABLE +/// +/// ```sql +/// COLUMNS +/// id int PATH '@id', +/// ordinality FOR ORDINALITY, +/// "COUNTRY_NAME" text, +/// country_id text PATH 'COUNTRY_ID', +/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', +/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', +/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlTableColumn { + /// The name of the column. + pub name: Ident, + /// Column options: type/path/default or FOR ORDINALITY + pub option: XmlTableColumnOption, +} + +impl fmt::Display for XmlTableColumn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name)?; + match &self.option { + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + nullable, + } => { + write!(f, " {type}")?; + if let Some(p) = path { + write!(f, " PATH {p}")?; + } + if let Some(d) = default { + write!(f, " DEFAULT {d}")?; + } + if !*nullable { + write!(f, " NOT NULL")?; + } + Ok(()) + } + XmlTableColumnOption::ForOrdinality => { + write!(f, " FOR ORDINALITY") + } + } + } +} + +/// Argument passed in the XMLTABLE PASSING clause +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingArgument { + pub expr: Expr, + pub alias: Option, + pub by_value: bool, // True if BY VALUE is specified +} + +impl fmt::Display for XmlPassingArgument { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.by_value { + write!(f, "BY VALUE ")?; + } + write!(f, "{}", self.expr)?; + if let Some(alias) = &self.alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } +} + +/// The PASSING clause for XMLTABLE +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingClause { + pub arguments: Vec, +} + +impl fmt::Display for XmlPassingClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if !self.arguments.is_empty() { + write!(f, " PASSING {}", display_comma_separated(&self.arguments))?; + } + Ok(()) + } +} + +/// Represents a single XML namespace definition in the XMLNAMESPACES clause. +/// +/// `namespace_uri AS namespace_name` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlNamespaceDefinition { + /// The namespace URI (a text expression). + pub uri: Expr, + /// The alias for the namespace (a simple identifier). + pub name: Ident, +} + +impl fmt::Display for XmlNamespaceDefinition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} AS {}", self.uri, self.name) + } +} diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 38e9e258..3e82905e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -15,27 +15,30 @@ // specific language governing permissions and limitations // under the License. -use crate::ast::query::SelectItemQualifiedWildcardKind; +use crate::ast::{query::SelectItemQualifiedWildcardKind, ColumnOptions}; use core::iter; use crate::tokenizer::Span; use super::{ dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, - AlterIndexOperation, AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, - ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, + AlterIndexOperation, AlterTableOperation, Array, Assignment, AssignmentTarget, AttachedToken, + BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, + ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, - FunctionArguments, GroupByExpr, HavingBound, IlikeSelectItem, Insert, Interpolate, - InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, - MatchRecognizePattern, Measure, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, - OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByExpr, OrderByKind, Partition, - PivotValueSource, ProjectionSelect, Query, ReferentialAction, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, - Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, - TableFactor, TableObject, TableOptionsClustered, TableWithJoins, UpdateTableFromKind, Use, - Value, Values, ViewColumnDef, WildcardAdditionalOptions, With, WithFill, + FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, + Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, + LateralView, LimitClause, MatchRecognizePattern, Measure, NamedParenthesizedList, + NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, + OnInsert, OpenStatement, OrderBy, OrderByExpr, OrderByKind, Partition, PivotValueSource, + ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableObject, TableOptionsClustered, TableWithJoins, + UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, + WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -94,14 +97,13 @@ impl Spanned for Query { with, body, order_by, - limit, - limit_by, - offset, + limit_clause, fetch, - locks: _, // todo - for_clause: _, // todo, mssql specific - settings: _, // todo, clickhouse specific - format_clause: _, // todo, clickhouse specific + locks: _, // todo + for_clause: _, // todo, mssql specific + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific + pipe_operators: _, // todo bigquery specific } = self; union_spans( @@ -109,14 +111,31 @@ impl Spanned for Query { .map(|i| i.span()) .chain(core::iter::once(body.span())) .chain(order_by.as_ref().map(|i| i.span())) - .chain(limit.as_ref().map(|i| i.span())) - .chain(limit_by.iter().map(|i| i.span())) - .chain(offset.as_ref().map(|i| i.span())) + .chain(limit_clause.as_ref().map(|i| i.span())) .chain(fetch.as_ref().map(|i| i.span())), ) } } +impl Spanned for LimitClause { + fn span(&self) -> Span { + match self { + LimitClause::LimitOffset { + limit, + offset, + limit_by, + } => union_spans( + limit + .iter() + .map(|i| i.span()) + .chain(offset.as_ref().map(|i| i.span())) + .chain(limit_by.iter().map(|i| i.span())), + ), + LimitClause::OffsetCommaLimit { offset, limit } => offset.span().union(&limit.span()), + } + } +} + impl Spanned for Offset { fn span(&self) -> Span { let Offset { @@ -191,6 +210,7 @@ impl Spanned for SetExpr { SetExpr::Insert(statement) => statement.span(), SetExpr::Table(_) => Span::empty(), SetExpr::Update(statement) => statement.span(), + SetExpr::Delete(statement) => statement.span(), } } } @@ -229,11 +249,7 @@ impl Spanned for Values { /// - [Statement::Fetch] /// - [Statement::Flush] /// - [Statement::Discard] -/// - [Statement::SetRole] -/// - [Statement::SetVariable] -/// - [Statement::SetTimeZone] -/// - [Statement::SetNames] -/// - [Statement::SetNamesDefault] +/// - [Statement::Set] /// - [Statement::ShowFunctions] /// - [Statement::ShowVariable] /// - [Statement::ShowStatus] @@ -243,7 +259,6 @@ impl Spanned for Values { /// - [Statement::ShowTables] /// - [Statement::ShowCollation] /// - [Statement::StartTransaction] -/// - [Statement::SetTransaction] /// - [Statement::Comment] /// - [Statement::Commit] /// - [Statement::Rollback] @@ -297,7 +312,6 @@ impl Spanned for Statement { table_names, partitions, table: _, - only: _, identity: _, cascade: _, on_cluster: _, @@ -323,6 +337,10 @@ impl Spanned for Statement { file_format: _, source, } => source.span(), + Statement::Case(stmt) => stmt.span(), + Statement::If(stmt) => stmt.span(), + Statement::While(stmt) => stmt.span(), + Statement::Raise(stmt) => stmt.span(), Statement::Call(function) => function.span(), Statement::Copy { source, @@ -334,6 +352,7 @@ impl Spanned for Statement { } => source.span(), Statement::CopyIntoSnowflake { into: _, + into_columns: _, from_obj: _, from_obj_alias: _, stage_params: _, @@ -347,6 +366,7 @@ impl Spanned for Statement { from_query: _, partition: _, } => Span::empty(), + Statement::Open(open) => open.span(), Statement::Close { cursor } => match cursor { CloseCursor::All => Span::empty(), CloseCursor::Specific { name } => name.span, @@ -367,6 +387,7 @@ impl Spanned for Statement { ), Statement::Delete(delete) => delete.span(), Statement::CreateView { + or_alter: _, or_replace: _, materialized: _, name, @@ -402,6 +423,7 @@ impl Spanned for Statement { Statement::CreateIndex(create_index) => create_index.span(), Statement::CreateRole { .. } => Span::empty(), Statement::CreateSecret { .. } => Span::empty(), + Statement::CreateServer { .. } => Span::empty(), Statement::CreateConnector { .. } => Span::empty(), Statement::AlterTable { name, @@ -410,6 +432,7 @@ impl Spanned for Statement { operations, location: _, on_cluster, + iceberg: _, } => union_spans( core::iter::once(name.span()) .chain(operations.iter().map(|i| i.span())) @@ -436,6 +459,7 @@ impl Spanned for Statement { Statement::DetachDuckDBDatabase { .. } => Span::empty(), Statement::Drop { .. } => Span::empty(), Statement::DropFunction { .. } => Span::empty(), + Statement::DropDomain { .. } => Span::empty(), Statement::DropProcedure { .. } => Span::empty(), Statement::DropSecret { .. } => Span::empty(), Statement::Declare { .. } => Span::empty(), @@ -444,11 +468,7 @@ impl Spanned for Statement { Statement::Fetch { .. } => Span::empty(), Statement::Flush { .. } => Span::empty(), Statement::Discard { .. } => Span::empty(), - Statement::SetRole { .. } => Span::empty(), - Statement::SetVariable { .. } => Span::empty(), - Statement::SetTimeZone { .. } => Span::empty(), - Statement::SetNames { .. } => Span::empty(), - Statement::SetNamesDefault {} => Span::empty(), + Statement::Set(_) => Span::empty(), Statement::ShowFunctions { .. } => Span::empty(), Statement::ShowVariable { .. } => Span::empty(), Statement::ShowStatus { .. } => Span::empty(), @@ -459,13 +479,13 @@ impl Spanned for Statement { Statement::ShowCollation { .. } => Span::empty(), Statement::Use(u) => u.span(), Statement::StartTransaction { .. } => Span::empty(), - Statement::SetTransaction { .. } => Span::empty(), Statement::Comment { .. } => Span::empty(), Statement::Commit { .. } => Span::empty(), Statement::Rollback { .. } => Span::empty(), Statement::CreateSchema { .. } => Span::empty(), Statement::CreateDatabase { .. } => Span::empty(), Statement::CreateFunction { .. } => Span::empty(), + Statement::CreateDomain { .. } => Span::empty(), Statement::CreateTrigger { .. } => Span::empty(), Statement::DropTrigger { .. } => Span::empty(), Statement::CreateProcedure { .. } => Span::empty(), @@ -473,6 +493,7 @@ impl Spanned for Statement { Statement::CreateStage { .. } => Span::empty(), Statement::Assert { .. } => Span::empty(), Statement::Grant { .. } => Span::empty(), + Statement::Deny { .. } => Span::empty(), Statement::Revoke { .. } => Span::empty(), Statement::Deallocate { .. } => Span::empty(), Statement::Execute { .. } => Span::empty(), @@ -507,8 +528,9 @@ impl Spanned for Statement { Statement::UNLISTEN { .. } => Span::empty(), Statement::RenameTable { .. } => Span::empty(), Statement::RaisError { .. } => Span::empty(), + Statement::Print { .. } => Span::empty(), + Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), - Statement::SetSessionParam { .. } => Span::empty(), } } } @@ -549,27 +571,21 @@ impl Spanned for CreateTable { constraints, hive_distribution: _, // hive specific hive_formats: _, // hive specific - table_properties, - with_options, - file_format: _, // enum - location: _, // string, no span + file_format: _, // enum + location: _, // string, no span query, without_rowid: _, // bool like, clone, - engine: _, // todo - comment: _, // todo, no span - auto_increment_offset: _, // u32, no span - default_charset: _, // string, no span - collation: _, // string, no span - on_commit: _, // enum + comment: _, // todo, no span + on_commit: _, on_cluster: _, // todo, clickhouse specific primary_key: _, // todo, clickhouse specific order_by: _, // todo, clickhouse specific partition_by: _, // todo, BigQuery specific cluster_by: _, // todo, BigQuery specific clustered_by: _, // todo, Hive specific - options: _, // todo, BigQuery specific + inherits: _, // todo, PostgreSQL specific strict: _, // bool copy_grants: _, // bool enable_schema_evolution: _, // bool @@ -584,15 +600,15 @@ impl Spanned for CreateTable { base_location: _, // todo, Snowflake specific catalog: _, // todo, Snowflake specific catalog_sync: _, // todo, Snowflake specific - storage_serialization_policy: _, // todo, Snowflake specific + storage_serialization_policy: _, + table_options, } = self; union_spans( core::iter::once(name.span()) + .chain(core::iter::once(table_options.span())) .chain(columns.iter().map(|i| i.span())) .chain(constraints.iter().map(|i| i.span())) - .chain(table_properties.iter().map(|i| i.span())) - .chain(with_options.iter().map(|i| i.span())) .chain(query.iter().map(|i| i.span())) .chain(like.iter().map(|i| i.span())) .chain(clone.iter().map(|i| i.span())), @@ -636,7 +652,7 @@ impl Spanned for TableConstraint { name.iter() .map(|i| i.span) .chain(index_name.iter().map(|i| i.span)) - .chain(columns.iter().map(|i| i.span)) + .chain(columns.iter().map(|i| i.span())) .chain(characteristics.iter().map(|i| i.span())), ), TableConstraint::PrimaryKey { @@ -650,12 +666,13 @@ impl Spanned for TableConstraint { name.iter() .map(|i| i.span) .chain(index_name.iter().map(|i| i.span)) - .chain(columns.iter().map(|i| i.span)) + .chain(columns.iter().map(|i| i.span())) .chain(characteristics.iter().map(|i| i.span())), ), TableConstraint::ForeignKey { name, columns, + index_name, foreign_table, referred_columns, on_delete, @@ -664,6 +681,7 @@ impl Spanned for TableConstraint { } => union_spans( name.iter() .map(|i| i.span) + .chain(index_name.iter().map(|i| i.span)) .chain(columns.iter().map(|i| i.span)) .chain(core::iter::once(foreign_table.span())) .chain(referred_columns.iter().map(|i| i.span)) @@ -671,9 +689,11 @@ impl Spanned for TableConstraint { .chain(on_update.iter().map(|i| i.span())) .chain(characteristics.iter().map(|i| i.span())), ), - TableConstraint::Check { name, expr } => { - expr.span().union_opt(&name.as_ref().map(|i| i.span)) - } + TableConstraint::Check { + name, + expr, + enforced: _, + } => expr.span().union_opt(&name.as_ref().map(|i| i.span)), TableConstraint::Index { display_as_key: _, name, @@ -682,7 +702,7 @@ impl Spanned for TableConstraint { } => union_spans( name.iter() .map(|i| i.span) - .chain(columns.iter().map(|i| i.span)), + .chain(columns.iter().map(|i| i.span())), ), TableConstraint::FulltextOrSpatial { fulltext: _, @@ -693,7 +713,7 @@ impl Spanned for TableConstraint { opt_index_name .iter() .map(|i| i.span) - .chain(columns.iter().map(|i| i.span)), + .chain(columns.iter().map(|i| i.span())), ), } } @@ -704,7 +724,7 @@ impl Spanned for CreateIndex { let CreateIndex { name, table_name, - using, + using: _, columns, unique: _, // bool concurrently: _, // bool @@ -719,8 +739,7 @@ impl Spanned for CreateIndex { name.iter() .map(|i| i.span()) .chain(core::iter::once(table_name.span())) - .chain(using.iter().map(|i| i.span)) - .chain(columns.iter().map(|i| i.span())) + .chain(columns.iter().map(|i| i.column.span())) .chain(include.iter().map(|i| i.span)) .chain(with.iter().map(|i| i.span())) .chain(predicate.iter().map(|i| i.span())), @@ -728,6 +747,98 @@ impl Spanned for CreateIndex { } } +impl Spanned for IndexColumn { + fn span(&self) -> Span { + self.column.span() + } +} + +impl Spanned for CaseStatement { + fn span(&self) -> Span { + let CaseStatement { + case_token: AttachedToken(start), + match_expr: _, + when_blocks: _, + else_block: _, + end_case_token: AttachedToken(end), + } = self; + + union_spans([start.span, end.span].into_iter()) + } +} + +impl Spanned for IfStatement { + fn span(&self) -> Span { + let IfStatement { + if_block, + elseif_blocks, + else_block, + end_token, + } = self; + + union_spans( + iter::once(if_block.span()) + .chain(elseif_blocks.iter().map(|b| b.span())) + .chain(else_block.as_ref().map(|b| b.span())) + .chain(end_token.as_ref().map(|AttachedToken(t)| t.span)), + ) + } +} + +impl Spanned for WhileStatement { + fn span(&self) -> Span { + let WhileStatement { while_block } = self; + + while_block.span() + } +} + +impl Spanned for ConditionalStatements { + fn span(&self) -> Span { + match self { + ConditionalStatements::Sequence { statements } => { + union_spans(statements.iter().map(|s| s.span())) + } + ConditionalStatements::BeginEnd(bes) => bes.span(), + } + } +} + +impl Spanned for ConditionalStatementBlock { + fn span(&self) -> Span { + let ConditionalStatementBlock { + start_token: AttachedToken(start_token), + condition, + then_token, + conditional_statements, + } = self; + + union_spans( + iter::once(start_token.span) + .chain(condition.as_ref().map(|c| c.span())) + .chain(then_token.as_ref().map(|AttachedToken(t)| t.span)) + .chain(iter::once(conditional_statements.span())), + ) + } +} + +impl Spanned for RaiseStatement { + fn span(&self) -> Span { + let RaiseStatement { value } = self; + + union_spans(value.iter().map(|value| value.span())) + } +} + +impl Spanned for RaiseStatementValue { + fn span(&self) -> Span { + match self { + RaiseStatementValue::UsingMessage(expr) => expr.span(), + RaiseStatementValue::Expr(expr) => expr.span(), + } + } +} + /// # partial span /// /// Missing spans: @@ -772,6 +883,7 @@ impl Spanned for ColumnOption { ColumnOption::OnConflict(..) => Span::empty(), ColumnOption::Policy(..) => Span::empty(), ColumnOption::Tags(..) => Span::empty(), + ColumnOption::Srid(..) => Span::empty(), } } } @@ -813,6 +925,7 @@ impl Spanned for AlterColumnOperation { AlterColumnOperation::SetDataType { data_type: _, using, + had_set: _, } => using.as_ref().map_or(Span::empty(), |u| u.span()), AlterColumnOperation::AddGenerated { .. } => Span::empty(), } @@ -880,10 +993,13 @@ impl Spanned for ViewColumnDef { options, } = self; - union_spans( - core::iter::once(name.span) - .chain(options.iter().flat_map(|i| i.iter().map(|k| k.span()))), - ) + name.span.union_opt(&options.as_ref().map(|o| o.span())) + } +} + +impl Spanned for ColumnOptions { + fn span(&self) -> Span { + union_spans(self.as_slice().iter().map(|i| i.span())) } } @@ -900,6 +1016,14 @@ impl Spanned for SqlOption { } => union_spans( core::iter::once(column_name.span).chain(for_values.iter().map(|i| i.span())), ), + SqlOption::TableSpace(_) => Span::empty(), + SqlOption::Comment(_) => Span::empty(), + SqlOption::NamedParenthesizedList(NamedParenthesizedList { + key: name, + name: value, + values, + }) => union_spans(core::iter::once(name.span).chain(values.iter().map(|i| i.span))) + .union_opt(&value.as_ref().map(|i| i.span)), } } } @@ -936,7 +1060,11 @@ impl Spanned for CreateTableOptions { match self { CreateTableOptions::None => Span::empty(), CreateTableOptions::With(vec) => union_spans(vec.iter().map(|i| i.span())), - CreateTableOptions::Options(vec) => union_spans(vec.iter().map(|i| i.span())), + CreateTableOptions::Options(vec) => { + union_spans(vec.as_slice().iter().map(|i| i.span())) + } + CreateTableOptions::Plain(vec) => union_spans(vec.iter().map(|i| i.span())), + CreateTableOptions::TableProperties(vec) => union_spans(vec.iter().map(|i| i.span())), } } } @@ -948,7 +1076,10 @@ impl Spanned for CreateTableOptions { impl Spanned for AlterTableOperation { fn span(&self) -> Span { match self { - AlterTableOperation::AddConstraint(table_constraint) => table_constraint.span(), + AlterTableOperation::AddConstraint { + constraint, + not_valid: _, + } => constraint.span(), AlterTableOperation::AddColumn { column_keyword: _, if_not_exists: _, @@ -980,10 +1111,11 @@ impl Spanned for AlterTableOperation { drop_behavior: _, } => name.span, AlterTableOperation::DropColumn { - column_name, + has_column_keyword: _, + column_names, if_exists: _, drop_behavior: _, - } => column_name.span, + } => union_spans(column_names.iter().map(|i| i.span)), AlterTableOperation::AttachPartition { partition } => partition.span(), AlterTableOperation::DetachPartition { partition } => partition.span(), AlterTableOperation::FreezePartition { @@ -999,6 +1131,8 @@ impl Spanned for AlterTableOperation { .span() .union_opt(&with_name.as_ref().map(|n| n.span)), AlterTableOperation::DropPrimaryKey => Span::empty(), + AlterTableOperation::DropForeignKey { name } => name.span, + AlterTableOperation::DropIndex { name } => name.span, AlterTableOperation::EnableAlwaysRule { name } => name.span, AlterTableOperation::EnableAlwaysTrigger { name } => name.span, AlterTableOperation::EnableReplicaRule { name } => name.span, @@ -1064,6 +1198,9 @@ impl Spanned for AlterTableOperation { AlterTableOperation::ResumeRecluster => Span::empty(), AlterTableOperation::Algorithm { .. } => Span::empty(), AlterTableOperation::AutoIncrement { value, .. } => value.span(), + AlterTableOperation::Lock { .. } => Span::empty(), + AlterTableOperation::ReplicaIdentity { .. } => Span::empty(), + AlterTableOperation::ValidateConstraint { name } => name.span, } } } @@ -1279,7 +1416,6 @@ impl Spanned for AssignmentTarget { /// f.e. `IS NULL ` reports as `::span`. /// /// Missing spans: -/// - [Expr::TypedString] # missing span for data_type /// - [Expr::MatchAgainst] # MySQL specific /// - [Expr::RLike] # MySQL specific /// - [Expr::Struct] # BigQuery specific @@ -1429,6 +1565,7 @@ impl Spanned for Expr { substring_from, substring_for, special: _, + shorthand: _, } => union_spans( core::iter::once(expr.span()) .chain(substring_from.as_ref().map(|i| i.span())) @@ -1448,20 +1585,26 @@ impl Spanned for Expr { .map(|items| union_spans(items.iter().map(|i| i.span()))), ), ), - Expr::IntroducedString { value, .. } => value.span(), + Expr::Prefixed { value, .. } => value.span(), Expr::Case { + case_token, + end_token, operand, conditions, else_result, } => union_spans( - operand - .as_ref() - .map(|i| i.span()) - .into_iter() - .chain(conditions.iter().flat_map(|case_when| { - [case_when.condition.span(), case_when.result.span()] - })) - .chain(else_result.as_ref().map(|i| i.span())), + iter::once(case_token.0.span) + .chain( + operand + .as_ref() + .map(|i| i.span()) + .into_iter() + .chain(conditions.iter().flat_map(|case_when| { + [case_when.condition.span(), case_when.result.span()] + })) + .chain(else_result.as_ref().map(|i| i.span())), + ) + .chain(iter::once(end_token.0.span)), ), Expr::Exists { subquery, .. } => subquery.span(), Expr::Subquery(query) => query.span(), @@ -1481,6 +1624,7 @@ impl Spanned for Expr { Expr::OuterJoin(expr) => expr.span(), Expr::Prior(expr) => expr.span(), Expr::Lambda(_) => Span::empty(), + Expr::MemberOf(member_of) => member_of.value.span().union(&member_of.array.span()), } } } @@ -1527,6 +1671,10 @@ impl Spanned for ObjectNamePart { fn span(&self) -> Span { match self { ObjectNamePart::Identifier(ident) => ident.span, + ObjectNamePart::Function(func) => func + .name + .span + .union(&union_spans(func.args.iter().map(|i| i.span()))), } } } @@ -1812,6 +1960,7 @@ impl Spanned for TableFactor { .chain(alias.as_ref().map(|alias| alias.span())), ), TableFactor::JsonTable { .. } => Span::empty(), + TableFactor::XmlTable { .. } => Span::empty(), TableFactor::Pivot { table, aggregate_functions, @@ -1830,6 +1979,7 @@ impl Spanned for TableFactor { TableFactor::Unpivot { table, value, + null_inclusion: _, name, columns, alias, @@ -2034,6 +2184,7 @@ impl Spanned for JoinOperator { } => match_condition.span().union(&constraint.span()), JoinOperator::Anti(join_constraint) => join_constraint.span(), JoinOperator::Semi(join_constraint) => join_constraint.span(), + JoinOperator::StraightJoin(join_constraint) => join_constraint.span(), } } } @@ -2069,6 +2220,7 @@ impl Spanned for Select { distinct: _, // todo top: _, // todo, mysql specific projection, + exclude: _, into, from, lateral_views, @@ -2183,6 +2335,28 @@ impl Spanned for TableObject { } } +impl Spanned for BeginEndStatements { + fn span(&self) -> Span { + let BeginEndStatements { + begin_token, + statements, + end_token, + } = self; + union_spans( + core::iter::once(begin_token.0.span) + .chain(statements.iter().map(|i| i.span())) + .chain(core::iter::once(end_token.0.span)), + ) + } +} + +impl Spanned for OpenStatement { + fn span(&self) -> Span { + let OpenStatement { cursor_name } = self; + cursor_name.span + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; @@ -2322,4 +2496,16 @@ pub mod tests { assert_eq!(test.get_source(body_span), "SELECT cte.* FROM cte"); } + + #[test] + fn test_case_expr_span() { + let dialect = &GenericDialect; + let mut test = SpanTest::new(dialect, "CASE 1 WHEN 2 THEN 3 ELSE 4 END"); + let expr = test.0.parse_expr().unwrap(); + let expr_span = expr.span(); + assert_eq!( + test.get_source(expr_span), + "CASE 1 WHEN 2 THEN 3 ELSE 4 END" + ); + } } diff --git a/src/ast/trigger.rs b/src/ast/trigger.rs index cf1c8c46..2c64e423 100644 --- a/src/ast/trigger.rs +++ b/src/ast/trigger.rs @@ -110,6 +110,7 @@ impl fmt::Display for TriggerEvent { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerPeriod { + For, After, Before, InsteadOf, @@ -118,6 +119,7 @@ pub enum TriggerPeriod { impl fmt::Display for TriggerPeriod { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + TriggerPeriod::For => write!(f, "FOR"), TriggerPeriod::After => write!(f, "AFTER"), TriggerPeriod::Before => write!(f, "BEFORE"), TriggerPeriod::InsteadOf => write!(f, "INSTEAD OF"), diff --git a/src/ast/value.rs b/src/ast/value.rs index 77e2e0e8..fdfa6a67 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -116,7 +116,6 @@ impl From for Value { derive(Visit, VisitMut), visit(with = "visit_value") )] - pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] @@ -456,30 +455,38 @@ impl fmt::Display for EscapeQuotedString<'_> { // | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | let quote = self.quote; let mut previous_char = char::default(); - let mut peekable_chars = self.string.chars().peekable(); - while let Some(&ch) = peekable_chars.peek() { + let mut start_idx = 0; + let mut peekable_chars = self.string.char_indices().peekable(); + while let Some(&(idx, ch)) = peekable_chars.peek() { match ch { char if char == quote => { if previous_char == '\\' { - write!(f, "{char}")?; + // the quote is already escaped with a backslash, skip peekable_chars.next(); continue; } peekable_chars.next(); - if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) { - write!(f, "{char}{char}")?; - peekable_chars.next(); - } else { - write!(f, "{char}{char}")?; + match peekable_chars.peek() { + Some((_, c)) if *c == quote => { + // the quote is already escaped with another quote, skip + peekable_chars.next(); + } + _ => { + // The quote is not escaped. + // Including idx in the range, so the quote at idx will be printed twice: + // in this call to write_str() and in the next one. + f.write_str(&self.string[start_idx..=idx])?; + start_idx = idx; + } } } _ => { - write!(f, "{ch}")?; peekable_chars.next(); } } previous_char = ch; } + f.write_str(&self.string[start_idx..])?; Ok(()) } } @@ -543,16 +550,16 @@ impl fmt::Display for EscapeUnicodeStringLiteral<'_> { write!(f, r#"\\"#)?; } x if x.is_ascii() => { - write!(f, "{}", c)?; + write!(f, "{c}")?; } _ => { let codepoint = c as u32; // if the character fits in 32 bits, we can use the \XXXX format // otherwise, we need to use the \+XXXXXX format if codepoint <= 0xFFFF { - write!(f, "\\{:04X}", codepoint)?; + write!(f, "\\{codepoint:04X}")?; } else { - write!(f, "\\+{:06X}", codepoint)?; + write!(f, "\\+{codepoint:06X}")?; } } } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index a5d355fe..8e0a3139 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -523,7 +523,7 @@ where /// // Remove all select limits in sub-queries /// visit_expressions_mut(&mut statements, |expr| { /// if let Expr::Subquery(q) = expr { -/// q.limit = None +/// q.limit_clause = None; /// } /// ControlFlow::<()>::Continue(()) /// }); @@ -647,7 +647,7 @@ where /// // Remove all select limits in outer statements (not in sub-queries) /// visit_statements_mut(&mut statements, |stmt| { /// if let Statement::Query(q) = stmt { -/// q.limit = None +/// q.limit_clause = None; /// } /// ControlFlow::<()>::Continue(()) /// }); @@ -741,7 +741,7 @@ mod tests { } } - fn do_visit(sql: &str, visitor: &mut V) -> Statement { + fn do_visit>(sql: &str, visitor: &mut V) -> Statement { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let s = Parser::new(&dialect) @@ -749,7 +749,8 @@ mod tests { .parse_statement() .unwrap(); - s.visit(visitor); + let flow = s.visit(visitor); + assert_eq!(flow, ControlFlow::Continue(())); s } @@ -925,10 +926,10 @@ mod tests { #[test] fn overflow() { let cond = (0..1000) - .map(|n| format!("X = {}", n)) + .map(|n| format!("X = {n}")) .collect::>() .join(" OR "); - let sql = format!("SELECT x where {0}", cond); + let sql = format!("SELECT x where {cond}"); let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, sql.as_str()).tokenize().unwrap(); @@ -938,7 +939,8 @@ mod tests { .unwrap(); let mut visitor = QuickVisitor {}; - s.visit(&mut visitor); + let flow = s.visit(&mut visitor); + assert_eq!(flow, ControlFlow::Continue(())); } } @@ -969,7 +971,7 @@ mod visit_mut_tests { } } - fn do_visit_mut(sql: &str, visitor: &mut V) -> Statement { + fn do_visit_mut>(sql: &str, visitor: &mut V) -> Statement { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let mut s = Parser::new(&dialect) @@ -977,7 +979,8 @@ mod visit_mut_tests { .parse_statement() .unwrap(); - s.visit(visitor); + let flow = s.visit(visitor); + assert_eq!(flow, ControlFlow::Continue(())); s } diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 49fb24f1..c2cd507c 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -46,7 +46,11 @@ pub struct BigQueryDialect; impl Dialect for BigQueryDialect { fn parse_statement(&self, parser: &mut Parser) -> Option> { - self.maybe_parse_statement(parser) + if parser.parse_keyword(Keyword::BEGIN) { + return Some(parser.parse_begin_exception_end()); + } + + None } /// See @@ -136,49 +140,8 @@ impl Dialect for BigQueryDialect { fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { !RESERVED_FOR_COLUMN_ALIAS.contains(kw) } -} -impl BigQueryDialect { - fn maybe_parse_statement(&self, parser: &mut Parser) -> Option> { - if parser.peek_keyword(Keyword::BEGIN) { - return Some(self.parse_begin(parser)); - } - None - } - - /// Parse a `BEGIN` statement. - /// - fn parse_begin(&self, parser: &mut Parser) -> Result { - parser.expect_keyword(Keyword::BEGIN)?; - - let statements = parser.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?; - - let has_exception_when_clause = parser.parse_keywords(&[ - Keyword::EXCEPTION, - Keyword::WHEN, - Keyword::ERROR, - Keyword::THEN, - ]); - let exception_statements = if has_exception_when_clause { - if !parser.peek_keyword(Keyword::END) { - Some(parser.parse_statement_list(&[Keyword::END])?) - } else { - Some(Default::default()) - } - } else { - None - }; - - parser.expect_keyword(Keyword::END)?; - - Ok(Statement::StartTransaction { - begin: true, - statements, - exception_statements, - has_end_keyword: true, - transaction: None, - modifier: None, - modes: Default::default(), - }) + fn supports_pipe_operator(&self) -> bool { + true } } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 3366c670..fa18463a 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -94,4 +94,8 @@ impl Dialect for DuckDbDialect { fn supports_order_by_all(&self) -> bool { true } + + fn supports_select_wildcard_exclude(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 041d44bb..be2cc007 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -52,6 +52,10 @@ impl Dialect for GenericDialect { true } + fn supports_left_associative_joins_without_parens(&self) -> bool { + true + } + fn supports_connect_by(&self) -> bool { true } @@ -108,6 +112,14 @@ impl Dialect for GenericDialect { true } + fn supports_from_first_select(&self) -> bool { + true + } + + fn supports_projection_trailing_commas(&self) -> bool { + true + } + fn supports_asc_desc_in_column_definition(&self) -> bool { true } @@ -155,4 +167,20 @@ impl Dialect for GenericDialect { fn supports_match_against(&self) -> bool { true } + + fn supports_set_names(&self) -> bool { + true + } + + fn supports_comma_separated_set_assignments(&self) -> bool { + true + } + + fn supports_filter_during_aggregation(&self) -> bool { + true + } + + fn supports_select_wildcard_exclude(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1c32bc51..deb5719d 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -49,7 +49,7 @@ pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; -use crate::ast::{ColumnOption, Expr, Statement}; +use crate::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement}; pub use crate::keywords; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -201,6 +201,33 @@ pub trait Dialect: Debug + Any { false } + /// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _). + /// + /// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped + /// so they can be used in LIKE patterns without double-escaping (as is necessary in other + /// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules + /// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero + /// byte), and the default if an escaped character does not have a specific escaping rule is to + /// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case + /// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`. + /// This applies to all string literals though, not just those used in LIKE patterns. + /// + /// ```text + /// mysql> select '\_', hex('\\'), hex('_'), hex('\_'); + /// +----+-----------+----------+-----------+ + /// | \_ | hex('\\') | hex('_') | hex('\_') | + /// +----+-----------+----------+-----------+ + /// | \_ | 5C | 5F | 5C5F | + /// +----+-----------+----------+-----------+ + /// 1 row in set (0.00 sec) + /// ``` + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html + /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes + fn ignores_wildcard_escapes(&self) -> bool { + false + } + /// Determine if the dialect supports string literals with `U&` prefix. /// This is used to specify Unicode code points in string literals. /// For example, in PostgreSQL, the following is a valid string literal: @@ -251,6 +278,34 @@ pub trait Dialect: Debug + Any { false } + /// Indicates whether the dialect supports left-associative join parsing + /// by default when parentheses are omitted in nested joins. + /// + /// Most dialects (like MySQL or Postgres) assume **left-associative** precedence, + /// so a query like: + /// + /// ```sql + /// SELECT * FROM t1 NATURAL JOIN t5 INNER JOIN t0 ON ... + /// ``` + /// is interpreted as: + /// ```sql + /// ((t1 NATURAL JOIN t5) INNER JOIN t0 ON ...) + /// ``` + /// and internally represented as a **flat list** of joins. + /// + /// In contrast, some dialects (e.g. **Snowflake**) assume **right-associative** + /// precedence and interpret the same query as: + /// ```sql + /// (t1 NATURAL JOIN (t5 INNER JOIN t0 ON ...)) + /// ``` + /// which results in a **nested join** structure in the AST. + /// + /// If this method returns `false`, the parser must build nested join trees + /// even in the absence of parentheses to reflect the correct associativity + fn supports_left_associative_joins_without_parens(&self) -> bool { + true + } + /// Returns true if the dialect supports the `(+)` syntax for OUTER JOIN. fn supports_outer_join_operator(&self) -> bool { false @@ -372,6 +427,16 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports multiple `SET` statements + /// in a single statement. + /// + /// ```sql + /// SET variable = expression [, variable = expression]; + /// ``` + fn supports_comma_separated_set_assignments(&self) -> bool { + false + } + /// Returns true if the dialect supports an `EXCEPT` clause following a /// wildcard in a select list. /// @@ -481,6 +546,20 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports pipe operator. + /// + /// Example: + /// ```sql + /// SELECT * + /// FROM table + /// |> limit 1 + /// ``` + /// + /// See + fn supports_pipe_operator(&self) -> bool { + false + } + /// Does the dialect support MySQL-style `'user'@'host'` grantee syntax? fn supports_user_host_grantee(&self) -> bool { false @@ -491,6 +570,26 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports an exclude option + /// following a wildcard in the projection section. For example: + /// `SELECT * EXCLUDE col1 FROM tbl`. + /// + /// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_EXCLUDE_list.html) + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select) + fn supports_select_wildcard_exclude(&self) -> bool { + false + } + + /// Returns true if the dialect supports an exclude option + /// as the last item in the projection section, not necessarily + /// after a wildcard. For example: + /// `SELECT *, c1, c2 EXCLUDE c3 FROM tbl` + /// + /// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_EXCLUDE_list.html) + fn supports_select_exclude(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. @@ -536,7 +635,7 @@ pub trait Dialect: Debug + Any { } let token = parser.peek_token(); - debug!("get_next_precedence_full() {:?}", token); + debug!("get_next_precedence_full() {token:?}"); match token.token { Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)), Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)), @@ -568,7 +667,9 @@ pub trait Dialect: Debug + Any { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), _ => Ok(self.prec_unknown()), }, Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)), @@ -579,11 +680,14 @@ pub trait Dialect: Debug + Any { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)), Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)), Token::Period => Ok(p!(Period)), - Token::Eq + Token::Assignment + | Token::Eq | Token::Lt | Token::LtEq | Token::Neq @@ -850,6 +954,17 @@ pub trait Dialect: Debug + Any { keywords::RESERVED_FOR_TABLE_FACTOR } + /// Returns reserved keywords that may prefix a select item expression + /// e.g. `SELECT CONNECT_BY_ROOT name FROM Tbl2` (Snowflake) + fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] { + &[] + } + + /// Returns grantee types that should be treated as identifiers + fn get_reserved_grantees_types(&self) -> &[GranteesType] { + &[] + } + /// Returns true if this dialect supports the `TABLESAMPLE` option /// before the table alias option. For example: /// @@ -897,11 +1012,17 @@ pub trait Dialect: Debug + Any { explicit || self.is_column_alias(kw, parser) } + /// Returns true if the specified keyword should be parsed as a table identifier. + /// See [keywords::RESERVED_FOR_TABLE_ALIAS] + fn is_table_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw) + } + /// Returns true if the specified keyword should be parsed as a table factor alias. /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided /// to enable looking ahead if needed. - fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { - explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw) + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + explicit || self.is_table_alias(kw, parser) } /// Returns true if this dialect supports querying historical table data @@ -953,6 +1074,47 @@ pub trait Dialect: Debug + Any { fn supports_order_by_all(&self) -> bool { false } + + /// Returns true if the dialect supports `SET NAMES [COLLATE ]`. + /// + /// - [MySQL](https://dev.mysql.com/doc/refman/8.4/en/set-names.html) + /// - [PostgreSQL](https://www.postgresql.org/docs/17/sql-set.html) + /// + /// Note: Postgres doesn't support the `COLLATE` clause, but we permissively parse it anyway. + fn supports_set_names(&self) -> bool { + false + } + + fn supports_space_separated_column_options(&self) -> bool { + false + } + + /// Returns true if the dialect supports the `USING` clause in an `ALTER COLUMN` statement. + /// Example: + /// ```sql + /// ALTER TABLE tbl ALTER COLUMN col SET DATA TYPE USING ` + /// ``` + fn supports_alter_column_type_using(&self) -> bool { + false + } + + /// Returns true if the dialect supports `ALTER TABLE tbl DROP COLUMN c1, ..., cn` + fn supports_comma_separated_drop_column_list(&self) -> bool { + false + } + + /// Returns true if the dialect considers the specified ident as a function + /// that returns an identifier. Typically used to generate identifiers + /// programmatically. + /// + /// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal) + fn is_identifier_generating_function_name( + &self, + _ident: &Ident, + _name_parts: &[ObjectNamePart], + ) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 980f5ec3..36bd222b 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -15,7 +15,19 @@ // specific language governing permissions and limitations // under the License. +use crate::ast::helpers::attached_token::AttachedToken; +use crate::ast::{ + BeginEndStatements, ConditionalStatementBlock, ConditionalStatements, GranteesType, + IfStatement, Statement, TriggerObject, +}; use crate::dialect::Dialect; +use crate::keywords::{self, Keyword}; +use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Token; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + +const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[Keyword::IF, Keyword::ELSE]; /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] @@ -40,6 +52,10 @@ impl Dialect for MsSqlDialect { || ch == '_' } + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('[') + } + /// SQL Server has `CONVERT(type, value)` instead of `CONVERT(value, type)` /// fn convert_type_before_value(&self) -> bool { @@ -82,6 +98,7 @@ impl Dialect for MsSqlDialect { fn supports_start_transaction_modifier(&self) -> bool { true } + fn supports_end_transaction_modifier(&self) -> bool { true } @@ -95,4 +112,187 @@ impl Dialect for MsSqlDialect { fn supports_timestamp_versioning(&self) -> bool { true } + + /// See + fn supports_nested_comments(&self) -> bool { + true + } + + /// See + fn supports_object_name_double_dot_notation(&self) -> bool { + true + } + + /// See + fn get_reserved_grantees_types(&self) -> &[GranteesType] { + &[GranteesType::Public] + } + + fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) + } + + fn parse_statement(&self, parser: &mut Parser) -> Option> { + if parser.peek_keyword(Keyword::IF) { + Some(self.parse_if_stmt(parser)) + } else if parser.parse_keywords(&[Keyword::CREATE, Keyword::TRIGGER]) { + Some(self.parse_create_trigger(parser, false)) + } else if parser.parse_keywords(&[ + Keyword::CREATE, + Keyword::OR, + Keyword::ALTER, + Keyword::TRIGGER, + ]) { + Some(self.parse_create_trigger(parser, true)) + } else { + None + } + } +} + +impl MsSqlDialect { + /// ```sql + /// IF boolean_expression + /// { sql_statement | statement_block } + /// [ ELSE + /// { sql_statement | statement_block } ] + /// ``` + fn parse_if_stmt(&self, parser: &mut Parser) -> Result { + let if_token = parser.expect_keyword(Keyword::IF)?; + + let condition = parser.parse_expr()?; + + let if_block = if parser.peek_keyword(Keyword::BEGIN) { + let begin_token = parser.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statement_list(parser, Some(Keyword::END))?; + let end_token = parser.expect_keyword(Keyword::END)?; + ConditionalStatementBlock { + start_token: AttachedToken(if_token), + condition: Some(condition), + then_token: None, + conditional_statements: ConditionalStatements::BeginEnd(BeginEndStatements { + begin_token: AttachedToken(begin_token), + statements, + end_token: AttachedToken(end_token), + }), + } + } else { + let stmt = parser.parse_statement()?; + ConditionalStatementBlock { + start_token: AttachedToken(if_token), + condition: Some(condition), + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![stmt], + }, + } + }; + + let mut prior_statement_ended_with_semi_colon = false; + while let Token::SemiColon = parser.peek_token_ref().token { + parser.advance_token(); + prior_statement_ended_with_semi_colon = true; + } + + let mut else_block = None; + if parser.peek_keyword(Keyword::ELSE) { + let else_token = parser.expect_keyword(Keyword::ELSE)?; + if parser.peek_keyword(Keyword::BEGIN) { + let begin_token = parser.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statement_list(parser, Some(Keyword::END))?; + let end_token = parser.expect_keyword(Keyword::END)?; + else_block = Some(ConditionalStatementBlock { + start_token: AttachedToken(else_token), + condition: None, + then_token: None, + conditional_statements: ConditionalStatements::BeginEnd(BeginEndStatements { + begin_token: AttachedToken(begin_token), + statements, + end_token: AttachedToken(end_token), + }), + }); + } else { + let stmt = parser.parse_statement()?; + else_block = Some(ConditionalStatementBlock { + start_token: AttachedToken(else_token), + condition: None, + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![stmt], + }, + }); + } + } else if prior_statement_ended_with_semi_colon { + parser.prev_token(); + } + + Ok(Statement::If(IfStatement { + if_block, + else_block, + elseif_blocks: Vec::new(), + end_token: None, + })) + } + + /// Parse `CREATE TRIGGER` for [MsSql] + /// + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql + fn parse_create_trigger( + &self, + parser: &mut Parser, + or_alter: bool, + ) -> Result { + let name = parser.parse_object_name(false)?; + parser.expect_keyword_is(Keyword::ON)?; + let table_name = parser.parse_object_name(false)?; + let period = parser.parse_trigger_period()?; + let events = parser.parse_comma_separated(Parser::parse_trigger_event)?; + + parser.expect_keyword_is(Keyword::AS)?; + let statements = Some(parser.parse_conditional_statements(&[Keyword::END])?); + + Ok(Statement::CreateTrigger { + or_alter, + or_replace: false, + is_constraint: false, + name, + period, + events, + table_name, + referenced_table_name: None, + referencing: Vec::new(), + trigger_object: TriggerObject::Statement, + include_each: false, + condition: None, + exec_body: None, + statements, + characteristics: None, + }) + } + + /// Parse a sequence of statements, optionally separated by semicolon. + /// + /// Stops parsing when reaching EOF or the given keyword. + fn parse_statement_list( + &self, + parser: &mut Parser, + terminal_keyword: Option, + ) -> Result, ParserError> { + let mut stmts = Vec::new(); + loop { + if let Token::EOF = parser.peek_token_ref().token { + break; + } + if let Some(term) = terminal_keyword { + if parser.peek_keyword(term) { + break; + } + } + stmts.push(parser.parse_statement()?); + while let Token::SemiColon = parser.peek_token_ref().token { + parser.advance_token(); + } + } + Ok(stmts) + } } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 8a0da87e..f69e4243 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -27,7 +27,12 @@ use crate::{ use super::keywords; -const RESERVED_FOR_TABLE_ALIAS_MYSQL: &[Keyword] = &[Keyword::USE, Keyword::IGNORE, Keyword::FORCE]; +const RESERVED_FOR_TABLE_ALIAS_MYSQL: &[Keyword] = &[ + Keyword::USE, + Keyword::IGNORE, + Keyword::FORCE, + Keyword::STRAIGHT_JOIN, +]; /// A [`Dialect`] for [MySQL](https://www.mysql.com/) #[derive(Debug)] @@ -62,6 +67,10 @@ impl Dialect for MySqlDialect { true } + fn ignores_wildcard_escapes(&self) -> bool { + true + } + fn supports_numeric_prefix(&self) -> bool { true } @@ -133,6 +142,14 @@ impl Dialect for MySqlDialect { fn supports_match_against(&self) -> bool { true } + + fn supports_set_names(&self) -> bool { + true + } + + fn supports_comma_separated_set_assignments(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 57ed0b68..b2d4014c 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -104,7 +104,7 @@ impl Dialect for PostgreSqlDialect { fn get_next_precedence(&self, parser: &Parser) -> Option> { let token = parser.peek_token(); - debug!("get_next_precedence() {:?}", token); + debug!("get_next_precedence() {token:?}"); // we only return some custom value here when the behaviour (not merely the numeric value) differs // from the default implementation @@ -254,4 +254,12 @@ impl Dialect for PostgreSqlDialect { fn supports_geometric_types(&self) -> bool { true } + + fn supports_set_names(&self) -> bool { + true + } + + fn supports_alter_column_type_using(&self) -> bool { + true + } } diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 25b8f164..8ffed98a 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -80,13 +80,15 @@ impl Dialect for RedshiftSqlDialect { } fn is_identifier_start(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp - PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' + // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii() } fn is_identifier_part(&self, ch: char) -> bool { - // Extends Postgres dialect with sharp - PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' + // Extends Postgres dialect with sharp and UTF-8 multibyte chars + // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' || !ch.is_ascii() } /// redshift has `CONVERT(type, value)` instead of `CONVERT(value, type)` @@ -121,4 +123,20 @@ impl Dialect for RedshiftSqlDialect { fn supports_array_typedef_with_brackets(&self) -> bool { true } + + fn allow_extract_single_quotes(&self) -> bool { + true + } + + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } + + fn supports_select_wildcard_exclude(&self) -> bool { + true + } + + fn supports_select_exclude(&self) -> bool { + true + } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 72252b27..3b1eff39 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -20,17 +20,17 @@ use crate::alloc::string::ToString; use crate::ast::helpers::key_value_options::{KeyValueOption, KeyValueOptionType, KeyValueOptions}; use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::helpers::stmt_data_loading::{ - FileStagingCommand, StageLoadSelectItem, StageParamsObject, + FileStagingCommand, StageLoadSelectItem, StageLoadSelectItemKind, StageParamsObject, }; use crate::ast::{ ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, - IdentityPropertyOrder, ObjectName, RowAccessPolicy, ShowObjects, Statement, TagsColumnOption, - WrappedCollection, + IdentityPropertyOrder, ObjectName, ObjectNamePart, RowAccessPolicy, ShowObjects, SqlOption, + Statement, TagsColumnOption, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; -use crate::parser::{Parser, ParserError}; +use crate::parser::{IsOptional, Parser, ParserError}; use crate::tokenizer::{Token, Word}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -44,6 +44,7 @@ use alloc::{format, vec}; use super::keywords::RESERVED_FOR_IDENTIFIER; use sqlparser::ast::StorageSerializationPolicy; +const RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR: [Keyword; 1] = [Keyword::CONNECT_BY_ROOT]; /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) #[derive(Debug, Default)] pub struct SnowflakeDialect; @@ -130,6 +131,10 @@ impl Dialect for SnowflakeDialect { } fn parse_statement(&self, parser: &mut Parser) -> Option> { + if parser.parse_keyword(Keyword::BEGIN) { + return Some(parser.parse_begin_exception_end()); + } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) { // ALTER SESSION let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { @@ -278,6 +283,10 @@ impl Dialect for SnowflakeDialect { true } + fn supports_left_associative_joins_without_parens(&self) -> bool { + false + } + fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { // Unreserve some keywords that Snowflake accepts as identifiers // See: https://docs.snowflake.com/en/sql-reference/reserved-keywords @@ -292,9 +301,8 @@ impl Dialect for SnowflakeDialect { true } - fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { - explicit - || match kw { + fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { // The following keywords can be considered an alias as long as // they are not followed by other tokens that may change their meaning // e.g. `SELECT * EXCEPT (col1) FROM tbl` @@ -310,9 +318,11 @@ impl Dialect for SnowflakeDialect { } // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` - // which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias - Keyword::FETCH - if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) => + // which would give it a different meanings, for example: + // `SELECT 1 FETCH FIRST 10 ROWS` - not an alias + // `SELECT 1 FETCH 10` - not an alias + Keyword::FETCH if parser.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]).is_some() + || matches!(parser.peek_token().token, Token::Number(_, _)) => { false } @@ -337,6 +347,86 @@ impl Dialect for SnowflakeDialect { } } + fn is_table_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // The following keywords can be considered an alias as long as + // they are not followed by other tokens that may change their meaning + Keyword::LIMIT + | Keyword::RETURNING + | Keyword::INNER + | Keyword::USING + | Keyword::PIVOT + | Keyword::UNPIVOT + | Keyword::EXCEPT + | Keyword::MATCH_RECOGNIZE + | Keyword::OFFSET + if !matches!(parser.peek_token_ref().token, Token::SemiColon | Token::EOF) => + { + false + } + + // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` + // which would give it a different meanings, for example: + // `SELECT * FROM tbl FETCH FIRST 10 ROWS` - not an alias + // `SELECT * FROM tbl FETCH 10` - not an alias + Keyword::FETCH + if parser + .peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]) + .is_some() + || matches!(parser.peek_token().token, Token::Number(_, _)) => + { + false + } + + // All sorts of join-related keywords can be considered aliases unless additional + // keywords change their meaning. + Keyword::RIGHT | Keyword::LEFT | Keyword::SEMI | Keyword::ANTI + if parser + .peek_one_of_keywords(&[Keyword::JOIN, Keyword::OUTER]) + .is_some() => + { + false + } + Keyword::GLOBAL if parser.peek_keyword(Keyword::FULL) => false, + + // Reserved keywords by the Snowflake dialect, which seem to be less strictive + // than what is listed in `keywords::RESERVED_FOR_TABLE_ALIAS`. The following + // keywords were tested with the this statement: `SELECT .* FROM tbl `. + Keyword::WITH + | Keyword::ORDER + | Keyword::SELECT + | Keyword::WHERE + | Keyword::GROUP + | Keyword::HAVING + | Keyword::LATERAL + | Keyword::UNION + | Keyword::INTERSECT + | Keyword::MINUS + | Keyword::ON + | Keyword::JOIN + | Keyword::INNER + | Keyword::CROSS + | Keyword::FULL + | Keyword::LEFT + | Keyword::RIGHT + | Keyword::NATURAL + | Keyword::USING + | Keyword::ASOF + | Keyword::MATCH_CONDITION + | Keyword::SET + | Keyword::QUALIFY + | Keyword::FOR + | Keyword::START + | Keyword::CONNECT + | Keyword::SAMPLE + | Keyword::TABLESAMPLE + | Keyword::FROM => false, + + // Any other word is considered an alias + _ => true, + } + } + /// See: fn supports_timestamp_versioning(&self) -> bool { true @@ -346,6 +436,40 @@ impl Dialect for SnowflakeDialect { fn supports_group_by_expr(&self) -> bool { true } + + /// See: + fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] { + &RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR + } + + fn supports_space_separated_column_options(&self) -> bool { + true + } + + fn supports_comma_separated_drop_column_list(&self) -> bool { + true + } + + fn is_identifier_generating_function_name( + &self, + ident: &Ident, + name_parts: &[ObjectNamePart], + ) -> bool { + ident.quote_style.is_none() + && ident.value.to_lowercase() == "identifier" + && !name_parts + .iter() + .any(|p| matches!(p, ObjectNamePart::Function(_))) + } + + // For example: `SELECT IDENTIFIER('alias1').* FROM tbl AS alias1` + fn supports_select_expr_star(&self) -> bool { + true + } + + fn supports_select_wildcard_exclude(&self) -> bool { + true + } } fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result { @@ -411,6 +535,8 @@ pub fn parse_create_table( // "CREATE TABLE x COPY GRANTS (c INT)" and "CREATE TABLE x (c INT) COPY GRANTS" are both // accepted by Snowflake + let mut plain_options = vec![]; + loop { let next_token = parser.next_token(); match &next_token.token { @@ -422,7 +548,9 @@ pub fn parse_create_table( Keyword::COMMENT => { // Rewind the COMMENT keyword parser.prev_token(); - builder = builder.comment(parser.parse_optional_inline_comment()?); + if let Some(comment_def) = parser.parse_optional_inline_comment()? { + plain_options.push(SqlOption::Comment(comment_def)) + } } Keyword::AS => { let query = parser.parse_query()?; @@ -443,7 +571,7 @@ pub fn parse_create_table( parser.expect_keyword_is(Keyword::BY)?; parser.expect_token(&Token::LParen)?; let cluster_by = Some(WrappedCollection::Parentheses( - parser.parse_comma_separated(|p| p.parse_identifier())?, + parser.parse_comma_separated(|p| p.parse_expr())?, )); parser.expect_token(&Token::RParen)?; @@ -550,6 +678,9 @@ pub fn parse_create_table( builder.storage_serialization_policy = Some(parse_storage_serialization_policy(parser)?); } + Keyword::IF if parser.parse_keywords(&[Keyword::NOT, Keyword::EXISTS]) => { + builder = builder.if_not_exists(true); + } _ => { return parser.expected("end of statement", next_token); } @@ -583,6 +714,13 @@ pub fn parse_create_table( } } } + let table_options = if !plain_options.is_empty() { + crate::ast::CreateTableOptions::Plain(plain_options) + } else { + crate::ast::CreateTableOptions::None + }; + + builder = builder.table_options(table_options); if iceberg && builder.base_location.is_none() { return Err(ParserError::ParserError( @@ -644,10 +782,7 @@ pub fn parse_create_stage( // [ comment ] if parser.parse_keyword(Keyword::COMMENT) { parser.expect_token(&Token::Eq)?; - comment = Some(match parser.next_token().token { - Token::SingleQuotedString(word) => Ok(word), - _ => parser.expected("a comment statement", parser.peek_token()), - }?) + comment = Some(parser.parse_comment_value()?); } Ok(Statement::CreateStage { @@ -686,6 +821,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result ident.push('~'), Token::Mod => ident.push('%'), Token::Div => ident.push('/'), + Token::Plus => ident.push('+'), Token::Word(w) => ident.push_str(&w.to_string()), _ => return parser.expected("stage name identifier", parser.peek_token()), } @@ -725,7 +861,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { }; let mut files: Vec = vec![]; - let mut from_transformations: Option> = None; + let mut from_transformations: Option> = None; let mut from_stage_alias = None; let mut from_stage = None; let mut stage_params = StageParamsObject { @@ -747,6 +883,11 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { stage_params = parse_stage_params(parser)?; } + let into_columns = match &parser.peek_token().token { + Token::LParen => Some(parser.parse_parenthesized_column_list(IsOptional::Optional, true)?), + _ => None, + }; + parser.expect_keyword_is(Keyword::FROM)?; match parser.next_token().token { Token::LParen if kind == CopyIntoSnowflakeKind::Table => { @@ -758,15 +899,10 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { from_stage = Some(parse_snowflake_stage_name(parser)?); stage_params = parse_stage_params(parser)?; - // as - from_stage_alias = if parser.parse_keyword(Keyword::AS) { - Some(match parser.next_token().token { - Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("stage alias", parser.peek_token()), - }?) - } else { - None - }; + // Parse an optional alias + from_stage_alias = parser + .maybe_parse_table_alias()? + .map(|table_alias| table_alias.name); parser.expect_token(&Token::RParen)?; } Token::LParen if kind == CopyIntoSnowflakeKind::Location => { @@ -849,6 +985,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { Ok(Statement::CopyIntoSnowflake { kind, into, + into_columns, from_obj: from_stage, from_obj_alias: from_stage_alias, stage_params, @@ -869,86 +1006,93 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { fn parse_select_items_for_data_load( parser: &mut Parser, -) -> Result>, ParserError> { - // [.]$[.] [ , [.]$[.] ... ] - let mut select_items: Vec = vec![]; +) -> Result>, ParserError> { + let mut select_items: Vec = vec![]; loop { - let mut alias: Option = None; - let mut file_col_num: i32 = 0; - let mut element: Option = None; - let mut item_as: Option = None; + match parser.maybe_parse(parse_select_item_for_data_load)? { + // [.]$[.] [ , [.]$[.] ... ] + Some(item) => select_items.push(StageLoadSelectItemKind::StageLoadSelectItem(item)), + // Fallback, try to parse a standard SQL select item + None => select_items.push(StageLoadSelectItemKind::SelectItem( + parser.parse_select_item()?, + )), + } + if matches!(parser.peek_token_ref().token, Token::Comma) { + parser.advance_token(); + } else { + break; + } + } + Ok(Some(select_items)) +} - let next_token = parser.next_token(); - match next_token.token { +fn parse_select_item_for_data_load( + parser: &mut Parser, +) -> Result { + let mut alias: Option = None; + let mut file_col_num: i32 = 0; + let mut element: Option = None; + let mut item_as: Option = None; + + let next_token = parser.next_token(); + match next_token.token { + Token::Placeholder(w) => { + file_col_num = w.to_string().split_off(1).parse::().map_err(|e| { + ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}")) + })?; + Ok(()) + } + Token::Word(w) => { + alias = Some(Ident::new(w.value)); + Ok(()) + } + _ => parser.expected("alias or file_col_num", next_token), + }?; + + if alias.is_some() { + parser.expect_token(&Token::Period)?; + // now we get col_num token + let col_num_token = parser.next_token(); + match col_num_token.token { Token::Placeholder(w) => { file_col_num = w.to_string().split_off(1).parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}")) })?; Ok(()) } - Token::Word(w) => { - alias = Some(Ident::new(w.value)); - Ok(()) - } - _ => parser.expected("alias or file_col_num", next_token), + _ => parser.expected("file_col_num", col_num_token), }?; + } - if alias.is_some() { - parser.expect_token(&Token::Period)?; - // now we get col_num token - let col_num_token = parser.next_token(); - match col_num_token.token { - Token::Placeholder(w) => { - file_col_num = w.to_string().split_off(1).parse::().map_err(|e| { - ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}")) - })?; - Ok(()) - } - _ => parser.expected("file_col_num", col_num_token), - }?; + // try extracting optional element + match parser.next_token().token { + Token::Colon => { + // parse element + element = Some(Ident::new(match parser.next_token().token { + Token::Word(w) => Ok(w.value), + _ => parser.expected("file_col_num", parser.peek_token()), + }?)); } - - // try extracting optional element - match parser.next_token().token { - Token::Colon => { - // parse element - element = Some(Ident::new(match parser.next_token().token { - Token::Word(w) => Ok(w.value), - _ => parser.expected("file_col_num", parser.peek_token()), - }?)); - } - _ => { - // element not present move back - parser.prev_token(); - } - } - - // as - if parser.parse_keyword(Keyword::AS) { - item_as = Some(match parser.next_token().token { - Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("column item alias", parser.peek_token()), - }?); - } - - select_items.push(StageLoadSelectItem { - alias, - file_col_num, - element, - item_as, - }); - - match parser.next_token().token { - Token::Comma => { - // continue - } - _ => { - parser.prev_token(); // need to move back - break; - } + _ => { + // element not present move back + parser.prev_token(); } } - Ok(Some(select_items)) + + // as + if parser.parse_keyword(Keyword::AS) { + item_as = Some(match parser.next_token().token { + Token::Word(w) => Ok(Ident::new(w.value)), + _ => parser.expected("column item alias", parser.peek_token()), + }?); + } + + Ok(StageLoadSelectItem { + alias, + file_col_num, + element, + item_as, + }) } fn parse_stage_params(parser: &mut Parser) -> Result { @@ -1016,9 +1160,15 @@ fn parse_session_options( let mut options: Vec = Vec::new(); let empty = String::new; loop { - match parser.next_token().token { - Token::Comma => continue, + let next_token = parser.peek_token(); + match next_token.token { + Token::SemiColon | Token::EOF => break, + Token::Comma => { + parser.advance_token(); + continue; + } Token::Word(key) => { + parser.advance_token(); if set { let option = parse_option(parser, key)?; options.push(option); @@ -1031,21 +1181,17 @@ fn parse_session_options( } } _ => { - if parser.peek_token().token == Token::EOF { - break; - } - return parser.expected("another option", parser.peek_token()); + return parser.expected("another option or end of statement", next_token); } } } - options - .is_empty() - .then(|| { - Err(ParserError::ParserError( - "expected at least one option".to_string(), - )) - }) - .unwrap_or(Ok(options)) + if options.is_empty() { + Err(ParserError::ParserError( + "expected at least one option".to_string(), + )) + } else { + Ok(options) + } } /// Parses options provided within parentheses like: @@ -1150,7 +1296,7 @@ fn parse_column_policy_property( parser: &mut Parser, with: bool, ) -> Result { - let policy_name = parser.parse_identifier()?; + let policy_name = parser.parse_object_name(false)?; let using_columns = if parser.parse_keyword(Keyword::USING) { parser.expect_token(&Token::LParen)?; let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 138c4692..847e0d13 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,7 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::ast::Statement; +#[cfg(not(feature = "std"))] +use alloc::boxed::Box; + +use crate::ast::BinaryOperator; +use crate::ast::{Expr, Statement}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -70,6 +74,27 @@ impl Dialect for SQLiteDialect { } } + fn parse_infix( + &self, + parser: &mut crate::parser::Parser, + expr: &crate::ast::Expr, + _precedence: u8, + ) -> Option> { + // Parse MATCH and REGEXP as operators + // See + for (keyword, op) in [ + (Keyword::REGEXP, BinaryOperator::Regexp), + (Keyword::MATCH, BinaryOperator::Match), + ] { + if parser.parse_keyword(keyword) { + let left = Box::new(expr.clone()); + let right = Box::new(parser.parse_expr().unwrap()); + return Some(Ok(Expr::BinaryOp { left, op, right })); + } + } + None + } + fn supports_in_empty_list(&self) -> bool { true } diff --git a/src/display_utils.rs b/src/display_utils.rs new file mode 100644 index 00000000..ba36fccd --- /dev/null +++ b/src/display_utils.rs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utilities for formatting SQL AST nodes with pretty printing support. +//! +//! The module provides formatters that implement the `Display` trait with support +//! for both regular (`{}`) and pretty (`{:#}`) formatting modes. Pretty printing +//! adds proper indentation and line breaks to make SQL statements more readable. + +use core::fmt::{self, Display, Write}; + +/// A wrapper around a value that adds an indent to the value when displayed with {:#}. +pub(crate) struct Indent(pub T); + +const INDENT: &str = " "; + +impl Display for Indent +where + T: Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if f.alternate() { + f.write_str(INDENT)?; + write!(Indent(f), "{:#}", self.0) + } else { + self.0.fmt(f) + } + } +} + +/// Adds an indent to the inner writer +impl Write for Indent +where + T: Write, +{ + fn write_str(&mut self, s: &str) -> fmt::Result { + self.0.write_str(s)?; + // Our NewLine and SpaceOrNewline utils always print individual newlines as a single-character string. + if s == "\n" { + self.0.write_str(INDENT)?; + } + Ok(()) + } +} + +/// A value that inserts a newline when displayed with {:#}, but not when displayed with {}. +pub(crate) struct NewLine; + +impl Display for NewLine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if f.alternate() { + f.write_char('\n') + } else { + Ok(()) + } + } +} + +/// A value that inserts a space when displayed with {}, but a newline when displayed with {:#}. +pub(crate) struct SpaceOrNewline; + +impl Display for SpaceOrNewline { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if f.alternate() { + f.write_char('\n') + } else { + f.write_char(' ') + } + } +} + +/// A value that displays a comma-separated list of values. +/// When pretty-printed (using {:#}), it displays each value on a new line. +pub(crate) struct DisplayCommaSeparated<'a, T: fmt::Display>(pub(crate) &'a [T]); + +impl fmt::Display for DisplayCommaSeparated<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut first = true; + for t in self.0 { + if !first { + f.write_char(',')?; + SpaceOrNewline.fmt(f)?; + } + first = false; + t.fmt(f)?; + } + Ok(()) + } +} + +/// Displays a whitespace, followed by a comma-separated list that is indented when pretty-printed. +pub(crate) fn indented_list(f: &mut fmt::Formatter, items: &[T]) -> fmt::Result { + SpaceOrNewline.fmt(f)?; + Indent(DisplayCommaSeparated(items)).fmt(f) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_indent() { + struct TwoLines; + + impl Display for TwoLines { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("line 1")?; + SpaceOrNewline.fmt(f)?; + f.write_str("line 2") + } + } + + let indent = Indent(TwoLines); + assert_eq!( + indent.to_string(), + TwoLines.to_string(), + "Only the alternate form should be indented" + ); + assert_eq!(format!("{:#}", indent), " line 1\n line 2"); + } +} diff --git a/src/keywords.rs b/src/keywords.rs index a6854f07..9e689a6d 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -18,14 +18,14 @@ //! This module defines //! 1) a list of constants for every keyword //! 2) an `ALL_KEYWORDS` array with every keyword in it -//! This is not a list of *reserved* keywords: some of these can be -//! parsed as identifiers if the parser decides so. This means that -//! new keywords can be added here without affecting the parse result. +//! This is not a list of *reserved* keywords: some of these can be +//! parsed as identifiers if the parser decides so. This means that +//! new keywords can be added here without affecting the parse result. //! -//! As a matter of fact, most of these keywords are not used at all -//! and could be removed. +//! As a matter of fact, most of these keywords are not used at all +//! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -83,6 +83,7 @@ define_keywords!( ADMIN, AFTER, AGAINST, + AGGREGATE, AGGREGATION, ALERT, ALGORITHM, @@ -115,9 +116,11 @@ define_keywords!( AUTHENTICATION, AUTHORIZATION, AUTO, + AUTOEXTEND_SIZE, AUTOINCREMENT, AUTO_INCREMENT, AVG, + AVG_ROW_LENGTH, AVRO, BACKWARD, BASE64, @@ -137,11 +140,13 @@ define_keywords!( BIT, BLOB, BLOCK, + BLOOM, BLOOMFILTER, BOOL, BOOLEAN, BOTH, BOX, + BRIN, BROWSE, BTREE, BUCKET, @@ -171,11 +176,13 @@ define_keywords!( CHANNEL, CHAR, CHARACTER, + CHARACTERISTICS, CHARACTERS, CHARACTER_LENGTH, CHARSET, CHAR_LENGTH, CHECK, + CHECKSUM, CIRCLE, CLEAR, CLOB, @@ -204,6 +211,7 @@ define_keywords!( CONNECT, CONNECTION, CONNECTOR, + CONNECT_BY_ROOT, CONSTRAINT, CONTAINS, CONTINUE, @@ -264,11 +272,13 @@ define_keywords!( DEFINED, DEFINER, DELAYED, + DELAY_KEY_WRITE, DELETE, DELIMITED, DELIMITER, DELTA, DENSE_RANK, + DENY, DEREF, DESC, DESCRIBE, @@ -283,6 +293,7 @@ define_keywords!( DISTRIBUTE, DIV, DO, + DOMAIN, DOUBLE, DOW, DOY, @@ -294,6 +305,7 @@ define_keywords!( ELEMENT, ELEMENTS, ELSE, + ELSEIF, EMPTY, ENABLE, ENABLE_SCHEMA_EVOLUTION, @@ -306,6 +318,7 @@ define_keywords!( END_PARTITION, ENFORCED, ENGINE, + ENGINE_ATTRIBUTE, ENUM, ENUM16, ENUM8, @@ -333,6 +346,7 @@ define_keywords!( EXPLAIN, EXPLICIT, EXPORT, + EXTEND, EXTENDED, EXTENSION, EXTERNAL, @@ -381,11 +395,14 @@ define_keywords!( FUNCTION, FUNCTIONS, FUSION, + FUTURE, GENERAL, GENERATE, GENERATED, GEOGRAPHY, GET, + GIN, + GIST, GLOBAL, GRANT, GRANTED, @@ -405,6 +422,7 @@ define_keywords!( HOSTS, HOUR, HOURS, + HUGEINT, ICEBERG, ID, IDENTITY, @@ -423,6 +441,7 @@ define_keywords!( INDEX, INDICATOR, INHERIT, + INHERITS, INITIALLY, INNER, INOUT, @@ -432,6 +451,7 @@ define_keywords!( INPUTFORMAT, INSENSITIVE, INSERT, + INSERT_METHOD, INSTALL, INSTANT, INSTEAD, @@ -468,6 +488,7 @@ define_keywords!( JULIAN, KEY, KEYS, + KEY_BLOCK_SIZE, KILL, LAG, LANGUAGE, @@ -521,12 +542,14 @@ define_keywords!( MAX, MAXVALUE, MAX_DATA_EXTENSION_TIME_IN_DAYS, + MAX_ROWS, MEASURES, MEDIUMBLOB, MEDIUMINT, MEDIUMTEXT, MEMBER, MERGE, + MESSAGE, METADATA, METHOD, METRIC, @@ -541,6 +564,7 @@ define_keywords!( MINUTE, MINUTES, MINVALUE, + MIN_ROWS, MOD, MODE, MODIFIES, @@ -553,6 +577,7 @@ define_keywords!( MULTISET, MUTATION, NAME, + NAMES, NANOSECOND, NANOSECONDS, NATIONAL, @@ -622,8 +647,10 @@ define_keywords!( ORDER, ORDINALITY, ORGANIZATION, + OTHER, OUT, OUTER, + OUTPUT, OUTPUTFORMAT, OVER, OVERFLOW, @@ -636,6 +663,7 @@ define_keywords!( OWNERSHIP, PACKAGE, PACKAGES, + PACK_KEYS, PARALLEL, PARAMETER, PARQUET, @@ -643,6 +671,7 @@ define_keywords!( PARTITION, PARTITIONED, PARTITIONS, + PASSING, PASSWORD, PAST, PATH, @@ -675,6 +704,7 @@ define_keywords!( PRESERVE, PREWHERE, PRIMARY, + PRINT, PRIOR, PRIVILEGES, PROCEDURE, @@ -688,6 +718,7 @@ define_keywords!( QUARTER, QUERY, QUOTE, + RAISE, RAISERROR, RANGE, RANK, @@ -729,6 +760,7 @@ define_keywords!( REPLICATION, RESET, RESOLVE, + RESOURCE, RESPECT, RESTART, RESTRICT, @@ -754,6 +786,7 @@ define_keywords!( ROW, ROWID, ROWS, + ROW_FORMAT, ROW_NUMBER, RULE, RUN, @@ -768,6 +801,7 @@ define_keywords!( SEARCH, SECOND, SECONDARY, + SECONDARY_ENGINE_ATTRIBUTE, SECONDS, SECRET, SECURITY, @@ -782,6 +816,7 @@ define_keywords!( SERDE, SERDEPROPERTIES, SERIALIZABLE, + SERVER, SERVICE, SESSION, SESSION_USER, @@ -790,6 +825,7 @@ define_keywords!( SETS, SETTINGS, SHARE, + SHARED, SHARING, SHOW, SIGNED, @@ -805,11 +841,13 @@ define_keywords!( SPATIAL, SPECIFIC, SPECIFICTYPE, + SPGIST, SQL, SQLEXCEPTION, SQLSTATE, SQLWARNING, SQRT, + SRID, STABLE, STAGE, START, @@ -817,19 +855,25 @@ define_keywords!( STATEMENT, STATIC, STATISTICS, + STATS_AUTO_RECALC, + STATS_PERSISTENT, + STATS_SAMPLE_PAGES, STATUS, STDDEV_POP, STDDEV_SAMP, STDIN, STDOUT, STEP, + STORAGE, STORAGE_INTEGRATION, STORAGE_SERIALIZATION_POLICY, STORED, + STRAIGHT_JOIN, STRICT, STRING, STRUCT, SUBMULTISET, + SUBSTR, SUBSTRING, SUBSTRING_REGEX, SUCCEEDS, @@ -847,6 +891,7 @@ define_keywords!( TABLE, TABLES, TABLESAMPLE, + TABLESPACE, TAG, TARGET, TASK, @@ -863,6 +908,7 @@ define_keywords!( TIME, TIMESTAMP, TIMESTAMPTZ, + TIMESTAMP_NTZ, TIMETZ, TIMEZONE, TIMEZONE_ABBR, @@ -891,9 +937,13 @@ define_keywords!( TRY, TRY_CAST, TRY_CONVERT, + TSQUERY, + TSVECTOR, TUPLE, TYPE, + UBIGINT, UESCAPE, + UHUGEINT, UINT128, UINT16, UINT256, @@ -927,9 +977,12 @@ define_keywords!( USER, USER_RESOURCES, USING, + USMALLINT, + UTINYINT, UUID, VACUUM, VALID, + VALIDATE, VALIDATION_MODE, VALUE, VALUES, @@ -957,6 +1010,7 @@ define_keywords!( WHEN, WHENEVER, WHERE, + WHILE, WIDTH_BUCKET, WINDOW, WITH, @@ -964,8 +1018,11 @@ define_keywords!( WITHOUT, WITHOUT_ARRAY_WRAPPER, WORK, + WRAPPER, WRITE, XML, + XMLNAMESPACES, + XMLTABLE, XOR, YEAR, YEARS, @@ -1038,6 +1095,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SAMPLE, Keyword::TABLESAMPLE, Keyword::FROM, + Keyword::OPEN, ]; /// Can't be used as a column alias, so that `SELECT alias` @@ -1061,6 +1119,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::FETCH, Keyword::UNION, Keyword::EXCEPT, + Keyword::EXCLUDE, Keyword::INTERSECT, Keyword::MINUS, Keyword::CLUSTER, @@ -1072,7 +1131,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::END, ]; -// Global list of reserved keywords alloweed after FROM. +// Global list of reserved keywords allowed after FROM. // Parser should call Dialect::get_reserved_keyword_after_from // to allow for each dialect to customize the list. pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ diff --git a/src/lib.rs b/src/lib.rs index 5d72f9f0..dbfd1791 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,27 @@ //! // The original SQL text can be generated from the AST //! assert_eq!(ast[0].to_string(), sql); //! ``` +//! +//! # Pretty Printing +//! +//! SQL statements can be pretty-printed with proper indentation and line breaks using the alternate flag (`{:#}`): +//! +//! ``` +//! # use sqlparser::dialect::GenericDialect; +//! # use sqlparser::parser::Parser; +//! let sql = "SELECT a, b FROM table_1"; +//! let ast = Parser::parse_sql(&GenericDialect, sql).unwrap(); +//! +//! // Pretty print with indentation and line breaks +//! let pretty_sql = format!("{:#}", ast[0]); +//! assert_eq!(pretty_sql, r#" +//! SELECT +//! a, +//! b +//! FROM +//! table_1 +//! "#.trim()); +//! ``` //! [sqlparser crates.io page]: https://crates.io/crates/sqlparser //! [`Parser::parse_sql`]: crate::parser::Parser::parse_sql //! [`Parser::new`]: crate::parser::Parser::new @@ -128,6 +149,10 @@ #![cfg_attr(not(feature = "std"), no_std)] #![allow(clippy::upper_case_acronyms)] +// Permit large enum variants to keep a unified, expressive AST. +// Splitting complex nodes (expressions, statements, types) into separate types +// would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. +#![allow(clippy::large_enum_variant)] // Allow proc-macros to find this crate extern crate self as sqlparser; @@ -142,6 +167,7 @@ extern crate pretty_assertions; pub mod ast; #[macro_use] pub mod dialect; +mod display_utils; pub mod keywords; pub mod parser; pub mod tokenizer; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f234fcc0..47b63da8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -222,6 +222,9 @@ pub struct ParserOptions { /// Controls how literal values are unescaped. See /// [`Tokenizer::with_unescape`] for more details. pub unescape: bool, + /// Controls if the parser expects a semi-colon token + /// between statements. Default is `true`. + pub require_semicolon_stmt_delimiter: bool, } impl Default for ParserOptions { @@ -229,6 +232,7 @@ impl Default for ParserOptions { Self { trailing_commas: false, unescape: true, + require_semicolon_stmt_delimiter: true, } } } @@ -436,7 +440,7 @@ impl<'a> Parser<'a> { /// /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { - debug!("Parsing sql '{}'...", sql); + debug!("Parsing sql '{sql}'..."); let tokens = Tokenizer::new(self.dialect, sql) .with_unescape(self.options.unescape) .tokenize_with_location()?; @@ -467,6 +471,10 @@ impl<'a> Parser<'a> { expecting_statement_delimiter = false; } + if !self.options.require_semicolon_stmt_delimiter { + expecting_statement_delimiter = false; + } + match self.peek_token().token { Token::EOF => break, @@ -528,6 +536,22 @@ impl<'a> Parser<'a> { Keyword::DESCRIBE => self.parse_explain(DescribeAlias::Describe), Keyword::EXPLAIN => self.parse_explain(DescribeAlias::Explain), Keyword::ANALYZE => self.parse_analyze(), + Keyword::CASE => { + self.prev_token(); + self.parse_case_stmt() + } + Keyword::IF => { + self.prev_token(); + self.parse_if_stmt() + } + Keyword::WHILE => { + self.prev_token(); + self.parse_while() + } + Keyword::RAISE => { + self.prev_token(); + self.parse_raise_stmt() + } Keyword::SELECT | Keyword::WITH | Keyword::VALUES | Keyword::FROM => { self.prev_token(); self.parse_query().map(Statement::Query) @@ -558,20 +582,22 @@ impl<'a> Parser<'a> { Keyword::ALTER => self.parse_alter(), Keyword::CALL => self.parse_call(), Keyword::COPY => self.parse_copy(), + Keyword::OPEN => { + self.prev_token(); + self.parse_open() + } Keyword::CLOSE => self.parse_close(), Keyword::SET => self.parse_set(), Keyword::SHOW => self.parse_show(), Keyword::USE => self.parse_use(), Keyword::GRANT => self.parse_grant(), + Keyword::DENY => { + self.prev_token(); + self.parse_deny() + } Keyword::REVOKE => self.parse_revoke(), Keyword::START => self.parse_start_transaction(), - // `BEGIN` is a nonstandard but common alias for the - // standard `START TRANSACTION` statement. It is supported - // by at least PostgreSQL and MySQL. Keyword::BEGIN => self.parse_begin(), - // `END` is a nonstandard but common alias for the - // standard `COMMIT TRANSACTION` statement. It is supported - // by PostgreSQL. Keyword::END => self.parse_end(), Keyword::SAVEPOINT => self.parse_savepoint(), Keyword::RELEASE => self.parse_release(), @@ -605,6 +631,8 @@ impl<'a> Parser<'a> { } // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), + Keyword::PRINT => self.parse_print(), + Keyword::RETURN => self.parse_return(), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -615,6 +643,170 @@ impl<'a> Parser<'a> { } } + /// Parse a `CASE` statement. + /// + /// See [Statement::Case] + pub fn parse_case_stmt(&mut self) -> Result { + let case_token = self.expect_keyword(Keyword::CASE)?; + + let match_expr = if self.peek_keyword(Keyword::WHEN) { + None + } else { + Some(self.parse_expr()?) + }; + + self.expect_keyword_is(Keyword::WHEN)?; + let when_blocks = self.parse_keyword_separated(Keyword::WHEN, |parser| { + parser.parse_conditional_statement_block(&[Keyword::WHEN, Keyword::ELSE, Keyword::END]) + })?; + + let else_block = if self.parse_keyword(Keyword::ELSE) { + Some(self.parse_conditional_statement_block(&[Keyword::END])?) + } else { + None + }; + + let mut end_case_token = self.expect_keyword(Keyword::END)?; + if self.peek_keyword(Keyword::CASE) { + end_case_token = self.expect_keyword(Keyword::CASE)?; + } + + Ok(Statement::Case(CaseStatement { + case_token: AttachedToken(case_token), + match_expr, + when_blocks, + else_block, + end_case_token: AttachedToken(end_case_token), + })) + } + + /// Parse an `IF` statement. + /// + /// See [Statement::If] + pub fn parse_if_stmt(&mut self) -> Result { + self.expect_keyword_is(Keyword::IF)?; + let if_block = self.parse_conditional_statement_block(&[ + Keyword::ELSE, + Keyword::ELSEIF, + Keyword::END, + ])?; + + let elseif_blocks = if self.parse_keyword(Keyword::ELSEIF) { + self.parse_keyword_separated(Keyword::ELSEIF, |parser| { + parser.parse_conditional_statement_block(&[ + Keyword::ELSEIF, + Keyword::ELSE, + Keyword::END, + ]) + })? + } else { + vec![] + }; + + let else_block = if self.parse_keyword(Keyword::ELSE) { + Some(self.parse_conditional_statement_block(&[Keyword::END])?) + } else { + None + }; + + self.expect_keyword_is(Keyword::END)?; + let end_token = self.expect_keyword(Keyword::IF)?; + + Ok(Statement::If(IfStatement { + if_block, + elseif_blocks, + else_block, + end_token: Some(AttachedToken(end_token)), + })) + } + + /// Parse a `WHILE` statement. + /// + /// See [Statement::While] + fn parse_while(&mut self) -> Result { + self.expect_keyword_is(Keyword::WHILE)?; + let while_block = self.parse_conditional_statement_block(&[Keyword::END])?; + + Ok(Statement::While(WhileStatement { while_block })) + } + + /// Parses an expression and associated list of statements + /// belonging to a conditional statement like `IF` or `WHEN` or `WHILE`. + /// + /// Example: + /// ```sql + /// IF condition THEN statement1; statement2; + /// ``` + fn parse_conditional_statement_block( + &mut self, + terminal_keywords: &[Keyword], + ) -> Result { + let start_token = self.get_current_token().clone(); // self.expect_keyword(keyword)?; + let mut then_token = None; + + let condition = match &start_token.token { + Token::Word(w) if w.keyword == Keyword::ELSE => None, + Token::Word(w) if w.keyword == Keyword::WHILE => { + let expr = self.parse_expr()?; + Some(expr) + } + _ => { + let expr = self.parse_expr()?; + then_token = Some(AttachedToken(self.expect_keyword(Keyword::THEN)?)); + Some(expr) + } + }; + + let conditional_statements = self.parse_conditional_statements(terminal_keywords)?; + + Ok(ConditionalStatementBlock { + start_token: AttachedToken(start_token), + condition, + then_token, + conditional_statements, + }) + } + + /// Parse a BEGIN/END block or a sequence of statements + /// This could be inside of a conditional (IF, CASE, WHILE etc.) or an object body defined optionally BEGIN/END and one or more statements. + pub(crate) fn parse_conditional_statements( + &mut self, + terminal_keywords: &[Keyword], + ) -> Result { + let conditional_statements = if self.peek_keyword(Keyword::BEGIN) { + let begin_token = self.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statement_list(terminal_keywords)?; + let end_token = self.expect_keyword(Keyword::END)?; + + ConditionalStatements::BeginEnd(BeginEndStatements { + begin_token: AttachedToken(begin_token), + statements, + end_token: AttachedToken(end_token), + }) + } else { + ConditionalStatements::Sequence { + statements: self.parse_statement_list(terminal_keywords)?, + } + }; + Ok(conditional_statements) + } + + /// Parse a `RAISE` statement. + /// + /// See [Statement::Raise] + pub fn parse_raise_stmt(&mut self) -> Result { + self.expect_keyword_is(Keyword::RAISE)?; + + let value = if self.parse_keywords(&[Keyword::USING, Keyword::MESSAGE]) { + self.expect_token(&Token::Eq)?; + Some(RaiseStatementValue::UsingMessage(self.parse_expr()?)) + } else { + self.maybe_parse(|parser| parser.parse_expr().map(RaiseStatementValue::Expr))? + }; + + Ok(Statement::Raise(RaiseStatement { value })) + } + pub fn parse_comment(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -776,12 +968,13 @@ impl<'a> Parser<'a> { pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); - let only = self.parse_keyword(Keyword::ONLY); let table_names = self - .parse_comma_separated(|p| p.parse_object_name(false))? + .parse_comma_separated(|p| { + Ok((p.parse_keyword(Keyword::ONLY), p.parse_object_name(false)?)) + })? .into_iter() - .map(|n| TruncateTableTarget { name: n }) + .map(|(only, name)| TruncateTableTarget { name, only }) .collect(); let mut partitions = None; @@ -812,7 +1005,6 @@ impl<'a> Parser<'a> { table_names, partitions, table, - only, identity, cascade, on_cluster, @@ -1015,6 +1207,25 @@ impl<'a> Parser<'a> { self.parse_subexpr(self.dialect.prec_unknown()) } + pub fn parse_expr_with_alias_and_order_by( + &mut self, + ) -> Result { + let expr = self.parse_expr()?; + + fn validator(explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { + explicit || !&[Keyword::ASC, Keyword::DESC, Keyword::GROUP].contains(kw) + } + let alias = self.parse_optional_alias_inner(None, validator)?; + let order_by = OrderByOptions { + asc: self.parse_asc_desc(), + nulls_first: None, + }; + Ok(ExprWithAliasAndOrderBy { + expr: ExprWithAlias { expr, alias }, + order_by, + }) + } + /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; @@ -1023,10 +1234,10 @@ impl<'a> Parser<'a> { expr = self.parse_compound_expr(expr, vec![])?; - debug!("prefix: {:?}", expr); + debug!("prefix: {expr:?}"); loop { let next_precedence = self.get_next_precedence()?; - debug!("next precedence: {:?}", next_precedence); + debug!("next precedence: {next_precedence:?}"); if precedence >= next_precedence { break; @@ -1178,7 +1389,10 @@ impl<'a> Parser<'a> { Keyword::POSITION if self.peek_token_ref().token == Token::LParen => { Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?)) } - Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), + Keyword::SUBSTR | Keyword::SUBSTRING => { + self.prev_token(); + Ok(Some(self.parse_substring()?)) + } Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)), Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), @@ -1251,9 +1465,9 @@ impl<'a> Parser<'a> { | Token::HexStringLiteral(_) if w.value.starts_with('_') => { - Ok(Expr::IntroducedString { - introducer: w.value.clone(), - value: self.parse_introduced_string_value()?, + Ok(Expr::Prefixed { + prefix: w.clone().into_ident(w_span), + value: self.parse_introduced_string_expr()?.into(), }) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html @@ -1262,9 +1476,9 @@ impl<'a> Parser<'a> { | Token::HexStringLiteral(_) if w.value.starts_with('_') => { - Ok(Expr::IntroducedString { - introducer: w.value.clone(), - value: self.parse_introduced_string_value()?, + Ok(Expr::Prefixed { + prefix: w.clone().into_ident(w_span), + value: self.parse_introduced_string_expr()?.into(), }) } Token::Arrow if self.dialect.supports_lambda_functions() => { @@ -1315,7 +1529,7 @@ impl<'a> Parser<'a> { DataType::Custom(..) => parser_err!("dummy", loc), data_type => Ok(Expr::TypedString { data_type, - value: parser.parse_value()?.value, + value: parser.parse_value()?, }), } })?; @@ -1425,8 +1639,7 @@ impl<'a> Parser<'a> { Token::QuestionPipe => UnaryOperator::QuestionPipe, _ => { return Err(ParserError::ParserError(format!( - "Unexpected token in unary operator parsing: {:?}", - tok + "Unexpected token in unary operator parsing: {tok:?}" ))) } }; @@ -1503,10 +1716,9 @@ impl<'a> Parser<'a> { } fn parse_geometric_type(&mut self, kind: GeometricTypeKind) -> Result { - let value: Value = self.parse_value()?.value; Ok(Expr::TypedString { data_type: DataType::GeometricType(kind), - value, + value: self.parse_value()?, }) } @@ -1695,6 +1907,15 @@ impl<'a> Parser<'a> { }) } + fn keyword_to_modifier(k: Keyword) -> Option { + match k { + Keyword::LOCAL => Some(ContextModifier::Local), + Keyword::GLOBAL => Some(ContextModifier::Global), + Keyword::SESSION => Some(ContextModifier::Session), + _ => None, + } + } + /// Check if the root is an identifier and all fields are identifiers. fn is_all_ident(root: &Expr, fields: &[AccessExpr]) -> bool { if !matches!(root, Expr::Identifier(_)) { @@ -2059,6 +2280,7 @@ impl<'a> Parser<'a> { } pub fn parse_case_expr(&mut self) -> Result { + let case_token = AttachedToken(self.get_current_token().clone()); let mut operand = None; if !self.parse_keyword(Keyword::WHEN) { operand = Some(Box::new(self.parse_expr()?)); @@ -2079,8 +2301,10 @@ impl<'a> Parser<'a> { } else { None }; - self.expect_keyword_is(Keyword::END)?; + let end_token = AttachedToken(self.expect_keyword(Keyword::END)?); Ok(Expr::Case { + case_token, + end_token, operand, conditions, else_result, @@ -2279,8 +2503,16 @@ impl<'a> Parser<'a> { } } - pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + // { SUBSTRING | SUBSTR } ( [FROM 1] [FOR 3]) + pub fn parse_substring(&mut self) -> Result { + let shorthand = match self.expect_one_of_keywords(&[Keyword::SUBSTR, Keyword::SUBSTRING])? { + Keyword::SUBSTR => true, + Keyword::SUBSTRING => false, + _ => { + self.prev_token(); + return self.expected("SUBSTR or SUBSTRING", self.peek_token()); + } + }; self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; let mut from_expr = None; @@ -2300,6 +2532,7 @@ impl<'a> Parser<'a> { substring_from: from_expr.map(Box::new), substring_for: to_expr.map(Box::new), special, + shorthand, }) } @@ -2334,10 +2567,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let mut trim_where = None; if let Token::Word(word) = self.peek_token().token { - if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] - .iter() - .any(|d| word.keyword == *d) - { + if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING].contains(&word.keyword) { trim_where = Some(self.parse_trim_where()?); } } @@ -2353,7 +2583,7 @@ impl<'a> Parser<'a> { trim_characters: None, }) } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + && dialect_of!(self is DuckDbDialect | SnowflakeDialect | BigQueryDialect | GenericDialect) { let characters = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; @@ -2547,7 +2777,7 @@ impl<'a> Parser<'a> { if self.dialect.supports_dictionary_syntax() { self.prev_token(); // Put back the '{' - return self.parse_duckdb_struct_literal(); + return self.parse_dictionary(); } self.expected("an expression", token) @@ -2559,7 +2789,7 @@ impl<'a> Parser<'a> { /// This method will raise an error if the column list is empty or with invalid identifiers, /// the match expression is not a literal string, or if the search modifier is not valid. pub fn parse_match_against(&mut self) -> Result { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let columns = self.parse_parenthesized_qualified_column_list(Mandatory, false)?; self.expect_keyword_is(Keyword::AGAINST)?; @@ -2810,7 +3040,6 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, { - let start_token = self.peek_token(); self.expect_keyword_is(Keyword::STRUCT)?; // Nothing to do if we have no type information. @@ -2823,16 +3052,10 @@ impl<'a> Parser<'a> { let trailing_bracket = loop { let (def, trailing_bracket) = elem_parser(self)?; field_defs.push(def); - if !self.consume_token(&Token::Comma) { + // The struct field definition is finished if it occurs `>>` or comma. + if trailing_bracket.0 || !self.consume_token(&Token::Comma) { break trailing_bracket; } - - // Angle brackets are balanced so we only expect the trailing `>>` after - // we've matched all field types for the current struct. - // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` - if trailing_bracket.0 { - return parser_err!("unmatched > in STRUCT definition", start_token.span.start); - } }; Ok(( @@ -2852,6 +3075,7 @@ impl<'a> Parser<'a> { Ok(StructField { field_name: Some(field_name), field_type, + options: None, }) }); self.expect_token(&Token::RParen)?; @@ -2885,10 +3109,12 @@ impl<'a> Parser<'a> { let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + let options = self.maybe_parse_options(Keyword::OPTIONS)?; Ok(( StructField { field_name, field_type, + options, }, trailing_bracket, )) @@ -2920,7 +3146,7 @@ impl<'a> Parser<'a> { Ok(fields) } - /// DuckDB specific: Parse a duckdb [dictionary] + /// DuckDB and ClickHouse specific: Parse a duckdb [dictionary] or a clickhouse [map] setting /// /// Syntax: /// @@ -2929,18 +3155,18 @@ impl<'a> Parser<'a> { /// ``` /// /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_struct_literal(&mut self) -> Result { + /// [map]: https://clickhouse.com/docs/operations/settings/settings#additional_table_filters + fn parse_dictionary(&mut self) -> Result { self.expect_token(&Token::LBrace)?; - let fields = - self.parse_comma_separated0(Self::parse_duckdb_dictionary_field, Token::RBrace)?; + let fields = self.parse_comma_separated0(Self::parse_dictionary_field, Token::RBrace)?; self.expect_token(&Token::RBrace)?; Ok(Expr::Dictionary(fields)) } - /// Parse a field for a duckdb [dictionary] + /// Parse a field for a duckdb [dictionary] or a clickhouse [map] setting /// /// Syntax /// @@ -2949,7 +3175,8 @@ impl<'a> Parser<'a> { /// ``` /// /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_dictionary_field(&mut self) -> Result { + /// [map]: https://clickhouse.com/docs/operations/settings/settings#additional_table_filters + fn parse_dictionary_field(&mut self) -> Result { let key = self.parse_identifier()?; self.expect_token(&Token::Colon)?; @@ -3088,6 +3315,7 @@ impl<'a> Parser<'a> { let regular_binary_operator = match &tok.token { Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), + Token::Assignment => Some(BinaryOperator::Assignment), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => Some(BinaryOperator::Gt), @@ -3388,6 +3616,19 @@ impl<'a> Parser<'a> { self.expected("IN or BETWEEN after NOT", self.peek_token()) } } + Keyword::MEMBER => { + if self.parse_keyword(Keyword::OF) { + self.expect_token(&Token::LParen)?; + let array = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::MemberOf(MemberOf { + value: Box::new(expr), + array: Box::new(array), + })) + } else { + self.expected("OF after MEMBER", self.peek_token()) + } + } // Can only happen if `get_next_precedence` got out of sync with this function _ => parser_err!( format!("No infix parser for token {:?}", tok.token), @@ -3421,9 +3662,9 @@ impl<'a> Parser<'a> { } /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` - pub fn parse_escape_char(&mut self) -> Result, ParserError> { + pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_literal_string()?)) + Ok(Some(self.parse_value()?.into())) } else { Ok(None) } @@ -3596,15 +3837,13 @@ impl<'a> Parser<'a> { }); } self.expect_token(&Token::LParen)?; - let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::InSubquery { + let in_op = match self.maybe_parse(|p| p.parse_query())? { + Some(subquery) => Expr::InSubquery { expr: Box::new(expr), - subquery: self.parse_query()?, + subquery, negated, - } - } else { - Expr::InList { + }, + None => Expr::InList { expr: Box::new(expr), list: if self.dialect.supports_in_empty_list() { self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? @@ -3612,7 +3851,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(Parser::parse_expr)? }, negated, - } + }, }; self.expect_token(&Token::RParen)?; Ok(in_op) @@ -3633,7 +3872,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a postgresql casting style which is in the form of `expr::datatype`. + /// Parse a PostgreSQL casting style which is in the form of `expr::datatype`. pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { kind: CastKind::DoubleColon, @@ -3955,6 +4194,18 @@ impl<'a> Parser<'a> { true } + /// If the current token is one of the given `keywords`, returns the keyword + /// that matches, without consuming the token. Otherwise, returns [`None`]. + #[must_use] + pub fn peek_one_of_keywords(&self, keywords: &[Keyword]) -> Option { + for keyword in keywords { + if self.peek_keyword(*keyword) { + return Some(*keyword); + } + } + None + } + /// If the current token is one of the given `keywords`, consume the token /// and return the keyword that matches. Otherwise, no tokens are consumed /// and returns [`None`]. @@ -4282,10 +4533,14 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { let mut values = vec![]; loop { - if let Token::Word(w) = &self.peek_nth_token_ref(0).token { - if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) { - break; + match &self.peek_nth_token_ref(0).token { + Token::EOF => break, + Token::Word(w) => { + if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) { + break; + } } + _ => {} } values.push(self.parse_statement()?); @@ -4302,7 +4557,8 @@ impl<'a> Parser<'a> { } /// Run a parser method `f`, reverting back to the current position if unsuccessful. - /// Returns `None` if `f` returns an error + /// Returns `ParserError::RecursionLimitExceeded` if `f` returns a `RecursionLimitExceeded`. + /// Returns `Ok(None)` if `f` returns any other error. pub fn maybe_parse(&mut self, f: F) -> Result, ParserError> where F: FnMut(&mut Parser) -> Result, @@ -4382,17 +4638,19 @@ impl<'a> Parser<'a> { self.parse_create_table(or_replace, temporary, global, transient) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); - self.parse_create_view(or_replace, temporary, create_view_params) + self.parse_create_view(or_alter, or_replace, temporary, create_view_params) } else if self.parse_keyword(Keyword::POLICY) { self.parse_create_policy() } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { - self.parse_create_function(or_replace, temporary) + self.parse_create_function(or_alter, or_replace, temporary) + } else if self.parse_keyword(Keyword::DOMAIN) { + self.parse_create_domain() } else if self.parse_keyword(Keyword::TRIGGER) { - self.parse_create_trigger(or_replace, false) + self.parse_create_trigger(or_alter, or_replace, false) } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { - self.parse_create_trigger(or_replace, true) + self.parse_create_trigger(or_alter, or_replace, true) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::SECRET) { @@ -4424,6 +4682,8 @@ impl<'a> Parser<'a> { self.parse_create_procedure(or_alter) } else if self.parse_keyword(Keyword::CONNECTOR) { self.parse_create_connector() + } else if self.parse_keyword(Keyword::SERVER) { + self.parse_pg_create_server() } else { self.expected("an object type after CREATE", self.peek_token()) } @@ -4614,9 +4874,30 @@ impl<'a> Parser<'a> { let schema_name = self.parse_schema_name()?; + let default_collate_spec = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE]) { + Some(self.parse_expr()?) + } else { + None + }; + + let with = if self.peek_keyword(Keyword::WITH) { + Some(self.parse_options(Keyword::WITH)?) + } else { + None + }; + + let options = if self.peek_keyword(Keyword::OPTIONS) { + Some(self.parse_options(Keyword::OPTIONS)?) + } else { + None + }; + Ok(Statement::CreateSchema { schema_name, if_not_exists, + with, + options, + default_collate_spec, }) } @@ -4683,6 +4964,7 @@ impl<'a> Parser<'a> { pub fn parse_create_function( &mut self, + or_alter: bool, or_replace: bool, temporary: bool, ) -> Result { @@ -4694,15 +4976,17 @@ impl<'a> Parser<'a> { self.parse_create_macro(or_replace, temporary) } else if dialect_of!(self is BigQueryDialect) { self.parse_bigquery_create_function(or_replace, temporary) + } else if dialect_of!(self is MsSqlDialect) { + self.parse_mssql_create_function(or_alter, or_replace, temporary) } else { self.prev_token(); self.expected("an object type after CREATE", self.peek_token()) } } - /// Parse `CREATE FUNCTION` for [Postgres] + /// Parse `CREATE FUNCTION` for [PostgreSQL] /// - /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html + /// [PostgreSQL]: https://www.postgresql.org/docs/15/sql-createfunction.html fn parse_postgres_create_function( &mut self, or_replace: bool, @@ -4808,6 +5092,7 @@ impl<'a> Parser<'a> { } Ok(Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace, temporary, name, @@ -4841,6 +5126,7 @@ impl<'a> Parser<'a> { let using = self.parse_optional_create_function_using()?; Ok(Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace, temporary, name, @@ -4868,22 +5154,7 @@ impl<'a> Parser<'a> { temporary: bool, ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_object_name(false)?; - - let parse_function_param = - |parser: &mut Parser| -> Result { - let name = parser.parse_identifier()?; - let data_type = parser.parse_data_type()?; - Ok(OperateFunctionArg { - mode: None, - name: Some(name), - data_type, - default_expr: None, - }) - }; - self.expect_token(&Token::LParen)?; - let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; - self.expect_token(&Token::RParen)?; + let (name, args) = self.parse_create_function_name_and_params()?; let return_type = if self.parse_keyword(Keyword::RETURNS) { Some(self.parse_data_type()?) @@ -4930,6 +5201,7 @@ impl<'a> Parser<'a> { }; Ok(Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace, temporary, if_not_exists, @@ -4948,6 +5220,122 @@ impl<'a> Parser<'a> { })) } + /// Parse `CREATE FUNCTION` for [MsSql] + /// + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql + fn parse_mssql_create_function( + &mut self, + or_alter: bool, + or_replace: bool, + temporary: bool, + ) -> Result { + let (name, args) = self.parse_create_function_name_and_params()?; + + self.expect_keyword(Keyword::RETURNS)?; + + let return_table = self.maybe_parse(|p| { + let return_table_name = p.parse_identifier()?; + + p.expect_keyword_is(Keyword::TABLE)?; + p.prev_token(); + + let table_column_defs = match p.parse_data_type()? { + DataType::Table(Some(table_column_defs)) if !table_column_defs.is_empty() => { + table_column_defs + } + _ => parser_err!( + "Expected table column definitions after TABLE keyword", + p.peek_token().span.start + )?, + }; + + Ok(DataType::NamedTable { + name: ObjectName(vec![ObjectNamePart::Identifier(return_table_name)]), + columns: table_column_defs, + }) + })?; + + let return_type = if return_table.is_some() { + return_table + } else { + Some(self.parse_data_type()?) + }; + + let _ = self.parse_keyword(Keyword::AS); + + let function_body = if self.peek_keyword(Keyword::BEGIN) { + let begin_token = self.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statement_list(&[Keyword::END])?; + let end_token = self.expect_keyword(Keyword::END)?; + + Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(begin_token), + statements, + end_token: AttachedToken(end_token), + })) + } else if self.parse_keyword(Keyword::RETURN) { + if self.peek_token() == Token::LParen { + Some(CreateFunctionBody::AsReturnExpr(self.parse_expr()?)) + } else if self.peek_keyword(Keyword::SELECT) { + let select = self.parse_select()?; + Some(CreateFunctionBody::AsReturnSelect(select)) + } else { + parser_err!( + "Expected a subquery (or bare SELECT statement) after RETURN", + self.peek_token().span.start + )? + } + } else { + parser_err!("Unparsable function body", self.peek_token().span.start)? + }; + + Ok(Statement::CreateFunction(CreateFunction { + or_alter, + or_replace, + temporary, + if_not_exists: false, + name, + args: Some(args), + return_type, + function_body, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + using: None, + behavior: None, + called_on_null: None, + parallel: None, + })) + } + + fn parse_create_function_name_and_params( + &mut self, + ) -> Result<(ObjectName, Vec), ParserError> { + let name = self.parse_object_name(false)?; + let parse_function_param = + |parser: &mut Parser| -> Result { + let name = parser.parse_identifier()?; + let data_type = parser.parse_data_type()?; + let default_expr = if parser.consume_token(&Token::Eq) { + Some(parser.parse_expr()?) + } else { + None + }; + + Ok(OperateFunctionArg { + mode: None, + name: Some(name), + data_type, + default_expr, + }) + }; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; + self.expect_token(&Token::RParen)?; + Ok((name, args)) + } + fn parse_function_arg(&mut self) -> Result { let mode = if self.parse_keyword(Keyword::IN) { Some(ArgMode::In) @@ -4962,12 +5350,21 @@ impl<'a> Parser<'a> { // parse: [ argname ] argtype let mut name = None; let mut data_type = self.parse_data_type()?; - if let DataType::Custom(n, _) = &data_type { - // the first token is actually a name - match n.0[0].clone() { - ObjectNamePart::Identifier(ident) => name = Some(ident), + + // To check whether the first token is a name or a type, we need to + // peek the next token, which if it is another type keyword, then the + // first token is a name and not a type in itself. + let data_type_idx = self.get_current_index(); + if let Some(next_data_type) = self.maybe_parse(|parser| parser.parse_data_type())? { + let token = self.token_at(data_type_idx); + + // We ensure that the token is a `Word` token, and not other special tokens. + if !matches!(token.token, Token::Word(_)) { + return self.expected("a name or type", token.clone()); } - data_type = self.parse_data_type()?; + + name = Some(Ident::new(token.to_string())); + data_type = next_data_type; } let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) @@ -4990,7 +5387,7 @@ impl<'a> Parser<'a> { /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] /// ``` pub fn parse_drop_trigger(&mut self) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect | MySqlDialect) { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); return self.expected("an object type after DROP", self.peek_token()); } @@ -5018,10 +5415,11 @@ impl<'a> Parser<'a> { pub fn parse_create_trigger( &mut self, + or_alter: bool, or_replace: bool, is_constraint: bool, ) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect | MySqlDialect) { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); return self.expected("an object type after CREATE", self.peek_token()); } @@ -5067,6 +5465,7 @@ impl<'a> Parser<'a> { let exec_body = self.parse_trigger_exec_body()?; Ok(Statement::CreateTrigger { + or_alter, or_replace, is_constraint, name, @@ -5078,7 +5477,8 @@ impl<'a> Parser<'a> { trigger_object, include_each, condition, - exec_body, + exec_body: Some(exec_body), + statements: None, characteristics, }) } @@ -5086,10 +5486,12 @@ impl<'a> Parser<'a> { pub fn parse_trigger_period(&mut self) -> Result { Ok( match self.expect_one_of_keywords(&[ + Keyword::FOR, Keyword::BEFORE, Keyword::AFTER, Keyword::INSTEAD, ])? { + Keyword::FOR => TriggerPeriod::For, Keyword::BEFORE => TriggerPeriod::Before, Keyword::AFTER => TriggerPeriod::After, Keyword::INSTEAD => self @@ -5228,12 +5630,17 @@ impl<'a> Parser<'a> { }; let location = hive_formats.location.clone(); let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + let table_options = if !table_properties.is_empty() { + CreateTableOptions::TableProperties(table_properties) + } else { + CreateTableOptions::None + }; Ok(CreateTableBuilder::new(table_name) .columns(columns) .constraints(constraints) .hive_distribution(hive_distribution) .hive_formats(Some(hive_formats)) - .table_properties(table_properties) + .table_options(table_options) .or_replace(or_replace) .if_not_exists(if_not_exists) .external(true) @@ -5274,6 +5681,7 @@ impl<'a> Parser<'a> { pub fn parse_create_view( &mut self, + or_alter: bool, or_replace: bool, temporary: bool, create_view_params: Option, @@ -5320,11 +5728,7 @@ impl<'a> Parser<'a> { && self.parse_keyword(Keyword::COMMENT) { self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(str), - _ => self.expected("string literal", next_token)?, - } + Some(self.parse_comment_value()?) } else { None }; @@ -5342,6 +5746,7 @@ impl<'a> Parser<'a> { ]); Ok(Statement::CreateView { + or_alter, name, columns, query, @@ -5658,6 +6063,35 @@ impl<'a> Parser<'a> { Ok(owner) } + /// Parses a [Statement::CreateDomain] statement. + fn parse_create_domain(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword_is(Keyword::AS)?; + let data_type = self.parse_data_type()?; + let collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_identifier()?) + } else { + None + }; + let default = if self.parse_keyword(Keyword::DEFAULT) { + Some(self.parse_expr()?) + } else { + None + }; + let mut constraints = Vec::new(); + while let Some(constraint) = self.parse_optional_table_constraint()? { + constraints.push(constraint); + } + + Ok(Statement::CreateDomain(CreateDomain { + name, + data_type, + collation, + default, + constraints, + })) + } + /// ```sql /// CREATE POLICY name ON table_name [ AS { PERMISSIVE | RESTRICTIVE } ] /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] @@ -5793,6 +6227,8 @@ impl<'a> Parser<'a> { ObjectType::Table } else if self.parse_keyword(Keyword::VIEW) { ObjectType::View + } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEW]) { + ObjectType::MaterializedView } else if self.parse_keyword(Keyword::INDEX) { ObjectType::Index } else if self.parse_keyword(Keyword::ROLE) { @@ -5813,6 +6249,8 @@ impl<'a> Parser<'a> { return self.parse_drop_policy(); } else if self.parse_keyword(Keyword::CONNECTOR) { return self.parse_drop_connector(); + } else if self.parse_keyword(Keyword::DOMAIN) { + return self.parse_drop_domain(); } else if self.parse_keyword(Keyword::PROCEDURE) { return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { @@ -5823,7 +6261,7 @@ impl<'a> Parser<'a> { return self.parse_drop_extension(); } else { return self.expected( - "CONNECTOR, DATABASE, EXTENSION, FUNCTION, INDEX, POLICY, PROCEDURE, ROLE, SCHEMA, SECRET, SEQUENCE, STAGE, TABLE, TRIGGER, TYPE, or VIEW after DROP", + "CONNECTOR, DATABASE, EXTENSION, FUNCTION, INDEX, POLICY, PROCEDURE, ROLE, SCHEMA, SECRET, SEQUENCE, STAGE, TABLE, TRIGGER, TYPE, VIEW, or MATERIALIZED VIEW after DROP", self.peek_token(), ); }; @@ -5845,6 +6283,11 @@ impl<'a> Parser<'a> { loc ); } + let table = if self.parse_keyword(Keyword::ON) { + Some(self.parse_object_name(false)?) + } else { + None + }; Ok(Statement::Drop { object_type, if_exists, @@ -5853,6 +6296,7 @@ impl<'a> Parser<'a> { restrict, purge, temporary, + table, }) } @@ -5908,6 +6352,20 @@ impl<'a> Parser<'a> { Ok(Statement::DropConnector { if_exists, name }) } + /// ```sql + /// DROP DOMAIN [ IF EXISTS ] name [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_domain(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + let drop_behavior = self.parse_optional_drop_behavior(); + Ok(Statement::DropDomain(DropDomain { + if_exists, + name, + drop_behavior, + })) + } + /// ```sql /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] /// [ CASCADE | RESTRICT ] @@ -6208,7 +6666,7 @@ impl<'a> Parser<'a> { /// DECLARE // { // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } + // | { @cursor_variable_name CURSOR [ FOR ] } // } [ ,...n ] /// ``` /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 @@ -6224,14 +6682,19 @@ impl<'a> Parser<'a> { /// ```text // { // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } + // | { @cursor_variable_name CURSOR [ FOR ]} // } [ ,...n ] /// ``` /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 pub fn parse_mssql_declare_stmt(&mut self) -> Result { let name = { let ident = self.parse_identifier()?; - if !ident.value.starts_with('@') { + if !ident.value.starts_with('@') + && !matches!( + self.peek_token().token, + Token::Word(w) if w.keyword == Keyword::CURSOR + ) + { Err(ParserError::TokenizerError( "Invalid MsSql variable declaration.".to_string(), )) @@ -6255,7 +6718,14 @@ impl<'a> Parser<'a> { _ => (None, Some(self.parse_data_type()?)), }; - let assignment = self.parse_mssql_variable_declaration_expression()?; + let (for_query, assignment) = if self.peek_keyword(Keyword::FOR) { + self.next_token(); + let query = Some(self.parse_query()?); + (query, None) + } else { + let assignment = self.parse_mssql_variable_declaration_expression()?; + (None, assignment) + }; Ok(Declare { names: vec![name], @@ -6266,7 +6736,7 @@ impl<'a> Parser<'a> { sensitive: None, scroll: None, hold: None, - for_query: None, + for_query, }) } @@ -6359,7 +6829,15 @@ impl<'a> Parser<'a> { } }; - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + let position = if self.peek_keyword(Keyword::FROM) { + self.expect_keyword(Keyword::FROM)?; + FetchPosition::From + } else if self.peek_keyword(Keyword::IN) { + self.expect_keyword(Keyword::IN)?; + FetchPosition::In + } else { + return parser_err!("Expected FROM or IN", self.peek_token().span.start); + }; let name = self.parse_identifier()?; @@ -6372,6 +6850,7 @@ impl<'a> Parser<'a> { Ok(Statement::Fetch { name, direction, + position, into, }) } @@ -6406,13 +6885,12 @@ impl<'a> Parser<'a> { }; let table_name = self.parse_object_name(false)?; let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_identifier()?) + Some(self.parse_index_type()?) } else { None }; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; + + let columns = self.parse_parenthesized_index_column_list()?; let include = if self.parse_keyword(Keyword::INCLUDE) { self.expect_token(&Token::LParen)?; @@ -6707,17 +7185,16 @@ impl<'a> Parser<'a> { // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; - let mut comment = if dialect_of!(self is HiveDialect) - && self.parse_keyword(Keyword::COMMENT) - { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(CommentDef::AfterColumnDefsWithoutEq(str)), - _ => self.expected("comment", next_token)?, - } - } else { - None - }; + let comment_after_column_def = + if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(CommentDef::WithoutEq(str)), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); @@ -6725,39 +7202,8 @@ impl<'a> Parser<'a> { let hive_distribution = self.parse_hive_distribution()?; let clustered_by = self.parse_optional_clustered_by()?; let hive_formats = self.parse_hive_formats()?; - // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - let engine = if self.parse_keyword(Keyword::ENGINE) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => { - let name = w.value; - let parameters = if self.peek_token() == Token::LParen { - Some(self.parse_parenthesized_identifiers()?) - } else { - None - }; - Some(TableEngine { name, parameters }) - } - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { - let _ = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::Number(s, _) => Some(Self::parse::(s, next_token.span.start)?), - _ => self.expected("literal int", next_token)?, - } - } else { - None - }; + let create_table_config = self.parse_optional_create_table_config()?; // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key @@ -6785,30 +7231,6 @@ impl<'a> Parser<'a> { None }; - let create_table_config = self.parse_optional_create_table_config()?; - - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let collation = if self.parse_keywords(&[Keyword::COLLATE]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { Some(self.parse_create_table_on_commit()?) } else { @@ -6817,13 +7239,6 @@ impl<'a> Parser<'a> { let strict = self.parse_keyword(Keyword::STRICT); - // Excludes Hive dialect here since it has been handled after table column definitions. - if !dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { - // rewind the COMMENT keyword - self.prev_token(); - comment = self.parse_optional_inline_comment()? - }; - // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(self.parse_query()?) @@ -6840,8 +7255,6 @@ impl<'a> Parser<'a> { .temporary(temporary) .columns(columns) .constraints(constraints) - .with_options(with_options) - .table_properties(table_properties) .or_replace(or_replace) .if_not_exists(if_not_exists) .transient(transient) @@ -6852,18 +7265,15 @@ impl<'a> Parser<'a> { .without_rowid(without_rowid) .like(like) .clone_clause(clone) - .engine(engine) - .comment(comment) - .auto_increment_offset(auto_increment_offset) + .comment_after_column_def(comment_after_column_def) .order_by(order_by) - .default_charset(default_charset) - .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) .clustered_by(clustered_by) .partition_by(create_table_config.partition_by) .cluster_by(create_table_config.cluster_by) - .options(create_table_config.options) + .inherits(create_table_config.inherits) + .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) .build()) @@ -6884,13 +7294,32 @@ impl<'a> Parser<'a> { } } - /// Parse configuration like partitioning, clustering information during the table creation. + /// Parse configuration like inheritance, partitioning, clustering information during the table creation. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + /// [MySql](https://dev.mysql.com/doc/refman/8.4/en/create-table.html) fn parse_optional_create_table_config( &mut self, ) -> Result { + let mut table_options = CreateTableOptions::None; + + let inherits = if self.parse_keyword(Keyword::INHERITS) { + Some(self.parse_parenthesized_qualified_column_list(IsOptional::Mandatory, false)?) + } else { + None + }; + + // PostgreSQL supports `WITH ( options )`, before `AS` + let with_options = self.parse_options(Keyword::WITH)?; + if !with_options.is_empty() { + table_options = CreateTableOptions::With(with_options) + } + + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + if !table_properties.is_empty() { + table_options = CreateTableOptions::TableProperties(table_properties); + } let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { @@ -6900,46 +7329,264 @@ impl<'a> Parser<'a> { }; let mut cluster_by = None; - let mut options = None; if dialect_of!(self is BigQueryDialect | GenericDialect) { if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier())?, + self.parse_comma_separated(|p| p.parse_expr())?, )); }; if let Token::Word(word) = self.peek_token().token { if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); + table_options = + CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?) } }; } + if !dialect_of!(self is HiveDialect) && table_options == CreateTableOptions::None { + let plain_options = self.parse_plain_options()?; + if !plain_options.is_empty() { + table_options = CreateTableOptions::Plain(plain_options) + } + }; + Ok(CreateTableConfiguration { partition_by, cluster_by, - options, + inherits, + table_options, }) } + fn parse_plain_option(&mut self) -> Result, ParserError> { + // Single parameter option + // + if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) { + return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION")))); + } + + // Custom option + // + if self.parse_keywords(&[Keyword::COMMENT]) { + let has_eq = self.consume_token(&Token::Eq); + let value = self.next_token(); + + let comment = match (has_eq, value.token) { + (true, Token::SingleQuotedString(s)) => { + Ok(Some(SqlOption::Comment(CommentDef::WithEq(s)))) + } + (false, Token::SingleQuotedString(s)) => { + Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s)))) + } + (_, token) => { + self.expected("Token::SingleQuotedString", TokenWithSpan::wrap(token)) + } + }; + return comment; + } + + // + // + if self.parse_keywords(&[Keyword::ENGINE]) { + let _ = self.consume_token(&Token::Eq); + let value = self.next_token(); + + let engine = match value.token { + Token::Word(w) => { + let parameters = if self.peek_token() == Token::LParen { + self.parse_parenthesized_identifiers()? + } else { + vec![] + }; + + Ok(Some(SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new(w.value)), + values: parameters, + }, + ))) + } + _ => { + return self.expected("Token::Word", value)?; + } + }; + + return engine; + } + + // + if self.parse_keywords(&[Keyword::TABLESPACE]) { + let _ = self.consume_token(&Token::Eq); + let value = self.next_token(); + + let tablespace = match value.token { + Token::Word(Word { value: name, .. }) | Token::SingleQuotedString(name) => { + let storage = match self.parse_keyword(Keyword::STORAGE) { + true => { + let _ = self.consume_token(&Token::Eq); + let storage_token = self.next_token(); + match &storage_token.token { + Token::Word(w) => match w.value.to_uppercase().as_str() { + "DISK" => Some(StorageType::Disk), + "MEMORY" => Some(StorageType::Memory), + _ => self + .expected("Storage type (DISK or MEMORY)", storage_token)?, + }, + _ => self.expected("Token::Word", storage_token)?, + } + } + false => None, + }; + + Ok(Some(SqlOption::TableSpace(TablespaceOption { + name, + storage, + }))) + } + _ => { + return self.expected("Token::Word", value)?; + } + }; + + return tablespace; + } + + // + if self.parse_keyword(Keyword::UNION) { + let _ = self.consume_token(&Token::Eq); + let value = self.next_token(); + + match value.token { + Token::LParen => { + let tables: Vec = + self.parse_comma_separated0(Parser::parse_identifier, Token::RParen)?; + self.expect_token(&Token::RParen)?; + + return Ok(Some(SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("UNION"), + name: None, + values: tables, + }, + ))); + } + _ => { + return self.expected("Token::LParen", value)?; + } + } + } + + // Key/Value parameter option + let key = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { + Ident::new("DEFAULT CHARSET") + } else if self.parse_keyword(Keyword::CHARSET) { + Ident::new("CHARSET") + } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARACTER, Keyword::SET]) { + Ident::new("DEFAULT CHARACTER SET") + } else if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Ident::new("CHARACTER SET") + } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE]) { + Ident::new("DEFAULT COLLATE") + } else if self.parse_keyword(Keyword::COLLATE) { + Ident::new("COLLATE") + } else if self.parse_keywords(&[Keyword::DATA, Keyword::DIRECTORY]) { + Ident::new("DATA DIRECTORY") + } else if self.parse_keywords(&[Keyword::INDEX, Keyword::DIRECTORY]) { + Ident::new("INDEX DIRECTORY") + } else if self.parse_keyword(Keyword::KEY_BLOCK_SIZE) { + Ident::new("KEY_BLOCK_SIZE") + } else if self.parse_keyword(Keyword::ROW_FORMAT) { + Ident::new("ROW_FORMAT") + } else if self.parse_keyword(Keyword::PACK_KEYS) { + Ident::new("PACK_KEYS") + } else if self.parse_keyword(Keyword::STATS_AUTO_RECALC) { + Ident::new("STATS_AUTO_RECALC") + } else if self.parse_keyword(Keyword::STATS_PERSISTENT) { + Ident::new("STATS_PERSISTENT") + } else if self.parse_keyword(Keyword::STATS_SAMPLE_PAGES) { + Ident::new("STATS_SAMPLE_PAGES") + } else if self.parse_keyword(Keyword::DELAY_KEY_WRITE) { + Ident::new("DELAY_KEY_WRITE") + } else if self.parse_keyword(Keyword::COMPRESSION) { + Ident::new("COMPRESSION") + } else if self.parse_keyword(Keyword::ENCRYPTION) { + Ident::new("ENCRYPTION") + } else if self.parse_keyword(Keyword::MAX_ROWS) { + Ident::new("MAX_ROWS") + } else if self.parse_keyword(Keyword::MIN_ROWS) { + Ident::new("MIN_ROWS") + } else if self.parse_keyword(Keyword::AUTOEXTEND_SIZE) { + Ident::new("AUTOEXTEND_SIZE") + } else if self.parse_keyword(Keyword::AVG_ROW_LENGTH) { + Ident::new("AVG_ROW_LENGTH") + } else if self.parse_keyword(Keyword::CHECKSUM) { + Ident::new("CHECKSUM") + } else if self.parse_keyword(Keyword::CONNECTION) { + Ident::new("CONNECTION") + } else if self.parse_keyword(Keyword::ENGINE_ATTRIBUTE) { + Ident::new("ENGINE_ATTRIBUTE") + } else if self.parse_keyword(Keyword::PASSWORD) { + Ident::new("PASSWORD") + } else if self.parse_keyword(Keyword::SECONDARY_ENGINE_ATTRIBUTE) { + Ident::new("SECONDARY_ENGINE_ATTRIBUTE") + } else if self.parse_keyword(Keyword::INSERT_METHOD) { + Ident::new("INSERT_METHOD") + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) { + Ident::new("AUTO_INCREMENT") + } else { + return Ok(None); + }; + + let _ = self.consume_token(&Token::Eq); + + let value = match self + .maybe_parse(|parser| parser.parse_value())? + .map(Expr::Value) + { + Some(expr) => expr, + None => Expr::Identifier(self.parse_identifier()?), + }; + + Ok(Some(SqlOption::KeyValue { key, value })) + } + + pub fn parse_plain_options(&mut self) -> Result, ParserError> { + let mut options = Vec::new(); + + while let Some(option) = self.parse_plain_option()? { + options.push(option); + } + + Ok(options) + } + pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { let comment = if self.parse_keyword(Keyword::COMMENT) { let has_eq = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(if has_eq { - CommentDef::WithEq(str) - } else { - CommentDef::WithoutEq(str) - }), - _ => self.expected("comment", next_token)?, - } + let comment = self.parse_comment_value()?; + Some(if has_eq { + CommentDef::WithEq(comment) + } else { + CommentDef::WithoutEq(comment) + }) } else { None }; Ok(comment) } + pub fn parse_comment_value(&mut self) -> Result { + let next_token = self.next_token(); + let value = match next_token.token { + Token::SingleQuotedString(str) => str, + Token::DollarQuotedString(str) => str.value, + _ => self.expected("string literal", next_token)?, + }; + Ok(value) + } + pub fn parse_optional_procedure_parameters( &mut self, ) -> Result>, ParserError> { @@ -6999,9 +7646,22 @@ impl<'a> Parser<'a> { } pub fn parse_procedure_param(&mut self) -> Result { + let mode = if self.parse_keyword(Keyword::IN) { + Some(ArgMode::In) + } else if self.parse_keyword(Keyword::OUT) { + Some(ArgMode::Out) + } else if self.parse_keyword(Keyword::INOUT) { + Some(ArgMode::InOut) + } else { + None + }; let name = self.parse_identifier()?; let data_type = self.parse_data_type()?; - Ok(ProcedureParam { name, data_type }) + Ok(ProcedureParam { + name, + data_type, + mode, + }) } pub fn parse_column_def(&mut self) -> Result { @@ -7075,11 +7735,7 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { Ok(Some(ColumnOption::NotNull)) } else if self.parse_keywords(&[Keyword::COMMENT]) { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))), - _ => self.expected("string", next_token), - } + Ok(Some(ColumnOption::Comment(self.parse_comment_value()?))) } else if self.parse_keyword(Keyword::NULL) { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { @@ -7192,6 +7848,10 @@ impl<'a> Parser<'a> { && dialect_of!(self is MySqlDialect | SQLiteDialect | DuckDbDialect | GenericDialect) { self.parse_optional_column_option_as() + } else if self.parse_keyword(Keyword::SRID) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + Ok(Some(ColumnOption::Srid(Box::new(self.parse_expr()?)))) } else if self.parse_keyword(Keyword::IDENTITY) && dialect_of!(self is MsSqlDialect | GenericDialect) { @@ -7232,7 +7892,7 @@ impl<'a> Parser<'a> { } pub(crate) fn parse_tag(&mut self) -> Result { - let name = self.parse_identifier()?; + let name = self.parse_object_name(false)?; self.expect_token(&Token::Eq)?; let value = self.parse_literal_string()?; @@ -7440,10 +8100,10 @@ impl<'a> Parser<'a> { let nulls_distinct = self.parse_optional_nulls_distinct()?; // optional index name - let index_name = self.parse_optional_indent()?; + let index_name = self.parse_optional_ident()?; let index_type = self.parse_optional_using_then_index_type()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let columns = self.parse_parenthesized_index_column_list()?; let index_options = self.parse_index_options()?; let characteristics = self.parse_constraint_characteristics()?; Ok(Some(TableConstraint::Unique { @@ -7462,10 +8122,10 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::KEY)?; // optional index name - let index_name = self.parse_optional_indent()?; + let index_name = self.parse_optional_ident()?; let index_type = self.parse_optional_using_then_index_type()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let columns = self.parse_parenthesized_index_column_list()?; let index_options = self.parse_index_options()?; let characteristics = self.parse_constraint_characteristics()?; Ok(Some(TableConstraint::PrimaryKey { @@ -7479,6 +8139,7 @@ impl<'a> Parser<'a> { } Token::Word(w) if w.keyword == Keyword::FOREIGN => { self.expect_keyword_is(Keyword::KEY)?; + let index_name = self.parse_optional_ident()?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; self.expect_keyword_is(Keyword::REFERENCES)?; let foreign_table = self.parse_object_name(false)?; @@ -7501,6 +8162,7 @@ impl<'a> Parser<'a> { Ok(Some(TableConstraint::ForeignKey { name, + index_name, columns, foreign_table, referred_columns, @@ -7513,7 +8175,20 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let expr = Box::new(self.parse_expr()?); self.expect_token(&Token::RParen)?; - Ok(Some(TableConstraint::Check { name, expr })) + + let enforced = if self.parse_keyword(Keyword::ENFORCED) { + Some(true) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) { + Some(false) + } else { + None + }; + + Ok(Some(TableConstraint::Check { + name, + expr, + enforced, + })) } Token::Word(w) if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) @@ -7524,11 +8199,11 @@ impl<'a> Parser<'a> { let name = match self.peek_token().token { Token::Word(word) if word.keyword == Keyword::USING => None, - _ => self.parse_optional_indent()?, + _ => self.parse_optional_ident()?, }; let index_type = self.parse_optional_using_then_index_type()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let columns = self.parse_parenthesized_index_column_list()?; Ok(Some(TableConstraint::Index { display_as_key, @@ -7555,9 +8230,9 @@ impl<'a> Parser<'a> { let index_type_display = self.parse_index_type_display(); - let opt_index_name = self.parse_optional_indent()?; + let opt_index_name = self.parse_optional_ident()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let columns = self.parse_parenthesized_index_column_list()?; Ok(Some(TableConstraint::FulltextOrSpatial { fulltext, @@ -7629,16 +8304,30 @@ impl<'a> Parser<'a> { } pub fn parse_index_type(&mut self) -> Result { - if self.parse_keyword(Keyword::BTREE) { - Ok(IndexType::BTree) + Ok(if self.parse_keyword(Keyword::BTREE) { + IndexType::BTree } else if self.parse_keyword(Keyword::HASH) { - Ok(IndexType::Hash) + IndexType::Hash + } else if self.parse_keyword(Keyword::GIN) { + IndexType::GIN + } else if self.parse_keyword(Keyword::GIST) { + IndexType::GiST + } else if self.parse_keyword(Keyword::SPGIST) { + IndexType::SPGiST + } else if self.parse_keyword(Keyword::BRIN) { + IndexType::BRIN + } else if self.parse_keyword(Keyword::BLOOM) { + IndexType::Bloom } else { - self.expected("index type {BTREE | HASH}", self.peek_token()) - } + IndexType::Custom(self.parse_identifier()?) + }) } - /// Parse [USING {BTREE | HASH}] + /// Optionally parse the `USING` keyword, followed by an [IndexType] + /// Example: + /// ```sql + //// USING BTREE (name, age DESC) + /// ``` pub fn parse_optional_using_then_index_type( &mut self, ) -> Result, ParserError> { @@ -7651,7 +8340,7 @@ impl<'a> Parser<'a> { /// Parse `[ident]`, mostly `ident` is name, like: /// `window_name`, `index_name`, ... - pub fn parse_optional_indent(&mut self) -> Result, ParserError> { + pub fn parse_optional_ident(&mut self) -> Result, ParserError> { self.maybe_parse(|parser| parser.parse_identifier()) } @@ -7810,7 +8499,11 @@ impl<'a> Parser<'a> { pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) + let not_valid = self.parse_keywords(&[Keyword::NOT, Keyword::VALID]); + AlterTableOperation::AddConstraint { + constraint, + not_valid, + } } else if dialect_of!(self is ClickHouseDialect|GenericDialect) && self.parse_keyword(Keyword::PROJECTION) { @@ -7971,10 +8664,14 @@ impl<'a> Parser<'a> { name, drop_behavior, } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { AlterTableOperation::DropPrimaryKey + } else if self.parse_keywords(&[Keyword::FOREIGN, Keyword::KEY]) { + let name = self.parse_identifier()?; + AlterTableOperation::DropForeignKey { name } + } else if self.parse_keyword(Keyword::INDEX) { + let name = self.parse_identifier()?; + AlterTableOperation::DropIndex { name } } else if self.parse_keyword(Keyword::PROJECTION) && dialect_of!(self is ClickHouseDialect|GenericDialect) { @@ -7984,12 +8681,17 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { AlterTableOperation::DropClusteringKey } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let has_column_keyword = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; + let column_names = if self.dialect.supports_comma_separated_drop_column_list() { + self.parse_comma_separated(Parser::parse_identifier)? + } else { + vec![self.parse_identifier()?] + }; let drop_behavior = self.parse_optional_drop_behavior(); AlterTableOperation::DropColumn { - column_name, + has_column_keyword, + column_names, if_exists, drop_behavior, } @@ -8062,16 +8764,10 @@ impl<'a> Parser<'a> { } } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { AlterColumnOperation::DropDefault {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) - || (is_postgresql && self.parse_keyword(Keyword::TYPE)) - { - let data_type = self.parse_data_type()?; - let using = if is_postgresql && self.parse_keyword(Keyword::USING) { - Some(self.parse_expr()?) - } else { - None - }; - AlterColumnOperation::SetDataType { data_type, using } + } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) { + self.parse_set_data_type(true)? + } else if self.parse_keyword(Keyword::TYPE) { + self.parse_set_data_type(false)? } else if self.parse_keywords(&[Keyword::ADD, Keyword::GENERATED]) { let generated_as = if self.parse_keyword(Keyword::ALWAYS) { Some(GeneratedAs::Always) @@ -8163,6 +8859,24 @@ impl<'a> Parser<'a> { AlterTableOperation::SuspendRecluster } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { AlterTableOperation::ResumeRecluster + } else if self.parse_keyword(Keyword::LOCK) { + let equals = self.consume_token(&Token::Eq); + let lock = match self.parse_one_of_keywords(&[ + Keyword::DEFAULT, + Keyword::EXCLUSIVE, + Keyword::NONE, + Keyword::SHARED, + ]) { + Some(Keyword::DEFAULT) => AlterTableLock::Default, + Some(Keyword::EXCLUSIVE) => AlterTableLock::Exclusive, + Some(Keyword::NONE) => AlterTableLock::None, + Some(Keyword::SHARED) => AlterTableLock::Shared, + _ => self.expected( + "DEFAULT, EXCLUSIVE, NONE or SHARED after LOCK [=]", + self.peek_token(), + )?, + }; + AlterTableOperation::Lock { equals, lock } } else if self.parse_keyword(Keyword::ALGORITHM) { let equals = self.consume_token(&Token::Eq); let algorithm = match self.parse_one_of_keywords(&[ @@ -8185,6 +8899,26 @@ impl<'a> Parser<'a> { let equals = self.consume_token(&Token::Eq); let value = self.parse_number_value()?; AlterTableOperation::AutoIncrement { equals, value } + } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::IDENTITY]) { + let identity = if self.parse_keyword(Keyword::NONE) { + ReplicaIdentity::None + } else if self.parse_keyword(Keyword::FULL) { + ReplicaIdentity::Full + } else if self.parse_keyword(Keyword::DEFAULT) { + ReplicaIdentity::Default + } else if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { + ReplicaIdentity::Index(self.parse_identifier()?) + } else { + return self.expected( + "NONE, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", + self.peek_token(), + ); + }; + + AlterTableOperation::ReplicaIdentity { identity } + } else if self.parse_keywords(&[Keyword::VALIDATE, Keyword::CONSTRAINT]) { + let name = self.parse_identifier()?; + AlterTableOperation::ValidateConstraint { name } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; @@ -8194,7 +8928,7 @@ impl<'a> Parser<'a> { } } else { return self.expected( - "ADD, RENAME, PARTITION, SWAP, DROP, or SET TBLPROPERTIES after ALTER TABLE", + "ADD, RENAME, PARTITION, SWAP, DROP, REPLICA IDENTITY, or SET TBLPROPERTIES after ALTER TABLE", self.peek_token(), ); } @@ -8202,6 +8936,22 @@ impl<'a> Parser<'a> { Ok(operation) } + fn parse_set_data_type(&mut self, had_set: bool) -> Result { + let data_type = self.parse_data_type()?; + let using = if self.dialect.supports_alter_column_type_using() + && self.parse_keyword(Keyword::USING) + { + Some(self.parse_expr()?) + } else { + None + }; + Ok(AlterColumnOperation::SetDataType { + data_type, + using, + had_set, + }) + } + fn parse_part_or_partition(&mut self) -> Result { let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; match keyword { @@ -8221,38 +8971,15 @@ impl<'a> Parser<'a> { Keyword::ROLE, Keyword::POLICY, Keyword::CONNECTOR, + Keyword::ICEBERG, ])?; match object_type { Keyword::VIEW => self.parse_alter_view(), Keyword::TYPE => self.parse_alter_type(), - Keyword::TABLE => { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] - let table_name = self.parse_object_name(false)?; - let on_cluster = self.parse_optional_on_cluster()?; - let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; - - let mut location = None; - if self.parse_keyword(Keyword::LOCATION) { - location = Some(HiveSetLocation { - has_set: false, - location: self.parse_identifier()?, - }); - } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { - location = Some(HiveSetLocation { - has_set: true, - location: self.parse_identifier()?, - }); - } - - Ok(Statement::AlterTable { - name: table_name, - if_exists, - only, - operations, - location, - on_cluster, - }) + Keyword::TABLE => self.parse_alter_table(false), + Keyword::ICEBERG => { + self.expect_keyword(Keyword::TABLE)?; + self.parse_alter_table(true) } Keyword::INDEX => { let index_name = self.parse_object_name(false)?; @@ -8280,6 +9007,38 @@ impl<'a> Parser<'a> { } } + /// Parse a [Statement::AlterTable] + pub fn parse_alter_table(&mut self, iceberg: bool) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] + let table_name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; + + let mut location = None; + if self.parse_keyword(Keyword::LOCATION) { + location = Some(HiveSetLocation { + has_set: false, + location: self.parse_identifier()?, + }); + } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { + location = Some(HiveSetLocation { + has_set: true, + location: self.parse_identifier()?, + }); + } + + Ok(Statement::AlterTable { + name: table_name, + if_exists, + only, + operations, + location, + on_cluster, + iceberg, + }) + } + pub fn parse_alter_view(&mut self) -> Result { let name = self.parse_object_name(false)?; let columns = self.parse_parenthesized_column_list(Optional, false)?; @@ -8439,6 +9198,14 @@ impl<'a> Parser<'a> { }) } + /// Parse [Statement::Open] + fn parse_open(&mut self) -> Result { + self.expect_keyword(Keyword::OPEN)?; + Ok(Statement::Open(OpenStatement { + cursor_name: self.parse_identifier()?, + })) + } + pub fn parse_close(&mut self) -> Result { let cursor = if self.parse_keyword(Keyword::ALL) { CloseCursor::All @@ -8739,13 +9506,19 @@ impl<'a> Parser<'a> { } } - fn parse_introduced_string_value(&mut self) -> Result { + fn parse_introduced_string_expr(&mut self) -> Result { let next_token = self.next_token(); let span = next_token.span; match next_token.token { - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + Token::SingleQuotedString(ref s) => Ok(Expr::Value( + Value::SingleQuotedString(s.to_string()).with_span(span), + )), + Token::DoubleQuotedString(ref s) => Ok(Expr::Value( + Value::DoubleQuotedString(s.to_string()).with_span(span), + )), + Token::HexStringLiteral(ref s) => Ok(Expr::Value( + Value::HexStringLiteral(s.to_string()).with_span(span), + )), unexpected => self.expected( "a string value", TokenWithSpan { @@ -8963,6 +9736,11 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } + Keyword::HUGEINT => Ok(DataType::HugeInt), + Keyword::UBIGINT => Ok(DataType::UBigInt), + Keyword::UHUGEINT => Ok(DataType::UHugeInt), + Keyword::USMALLINT => Ok(DataType::USmallInt), + Keyword::UTINYINT => Ok(DataType::UTinyInt), Keyword::UINT8 => Ok(DataType::UInt8), Keyword::UINT16 => Ok(DataType::UInt16), Keyword::UINT32 => Ok(DataType::UInt32), @@ -9039,6 +9817,7 @@ impl<'a> Parser<'a> { self.parse_optional_precision()?, TimezoneInfo::Tz, )), + Keyword::TIMESTAMP_NTZ => Ok(DataType::TimestampNtz), Keyword::TIME => { let precision = self.parse_optional_precision()?; let tz = if self.parse_keyword(Keyword::WITH) { @@ -9161,8 +9940,14 @@ impl<'a> Parser<'a> { Ok(DataType::AnyType) } Keyword::TABLE => { - let columns = self.parse_returns_table_columns()?; - Ok(DataType::Table(columns)) + // an LParen after the TABLE keyword indicates that table columns are being defined + // whereas no LParen indicates an anonymous table expression will be returned + if self.peek_token() == Token::LParen { + let columns = self.parse_returns_table_columns()?; + Ok(DataType::Table(Some(columns))) + } else { + Ok(DataType::Table(None)) + } } Keyword::SIGNED => { if self.parse_keyword(Keyword::INTEGER) { @@ -9178,6 +9963,12 @@ impl<'a> Parser<'a> { Ok(DataType::Unsigned) } } + Keyword::TSVECTOR if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { + Ok(DataType::TsVector) + } + Keyword::TSQUERY if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { + Ok(DataType::TsQuery) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; @@ -9203,13 +9994,7 @@ impl<'a> Parser<'a> { } fn parse_returns_table_column(&mut self) -> Result { - let name = self.parse_identifier()?; - let data_type = self.parse_data_type()?; - Ok(ColumnDef { - name, - data_type, - options: Vec::new(), // No constraints expected here - }) + self.parse_column_def() } fn parse_returns_table_columns(&mut self) -> Result, ParserError> { @@ -9246,6 +10031,48 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + fn parse_identifier_with_optional_alias(&mut self) -> Result { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(*query) + }) + } + + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result { + let quantifier = self.parse_set_quantifier(&Some(SetOperator::Intersect)); + match quantifier { + SetQuantifier::Distinct | SetQuantifier::DistinctByName => Ok(quantifier), + _ => Err(ParserError::ParserError(format!( + "{operator_name} pipe operator requires DISTINCT modifier", + ))), + } + } + + /// Parse optional identifier alias (with or without AS keyword) + fn parse_identifier_optional_alias(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::AS) { + Ok(Some(self.parse_identifier()?)) + } else { + // Check if the next token is an identifier (implicit alias) + self.maybe_parse(|parser| parser.parse_identifier()) + } + } + /// Optionally parses an alias for a select list item fn maybe_parse_select_item_alias(&mut self) -> Result, ParserError> { fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { @@ -9419,7 +10246,13 @@ impl<'a> Parser<'a> { } if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + let result = self.parse_comma_separated(|p| { + if p.peek_token_ref().token == Token::LParen { + p.parse_tuple(true, true) + } else { + Ok(vec![p.parse_expr()?]) + } + })?; self.expect_token(&Token::RParen)?; modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets( result, @@ -9462,6 +10295,60 @@ impl<'a> Parser<'a> { } } + fn parse_optional_limit_clause(&mut self) -> Result, ParserError> { + let mut offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; + + let (limit, limit_by) = if self.parse_keyword(Keyword::LIMIT) { + let expr = self.parse_limit()?; + + if self.dialect.supports_limit_comma() + && offset.is_none() + && expr.is_some() // ALL not supported with comma + && self.consume_token(&Token::Comma) + { + let offset = expr.ok_or_else(|| { + ParserError::ParserError( + "Missing offset for LIMIT , ".to_string(), + ) + })?; + return Ok(Some(LimitClause::OffsetCommaLimit { + offset, + limit: self.parse_expr()?, + })); + } + + let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::BY) + { + Some(self.parse_comma_separated(Parser::parse_expr)?) + } else { + None + }; + + (Some(expr), limit_by) + } else { + (None, None) + }; + + if offset.is_none() && limit.is_some() && self.parse_keyword(Keyword::OFFSET) { + offset = Some(self.parse_offset()?); + } + + if offset.is_some() || (limit.is_some() && limit != Some(None)) || limit_by.is_some() { + Ok(Some(LimitClause::LimitOffset { + limit: limit.unwrap_or_default(), + offset, + limit_by: limit_by.unwrap_or_default(), + })) + } else { + Ok(None) + } + } + /// Parse a table object for insertion /// e.g. `some_database.some_table` or `FUNCTION some_table_func(...)` pub fn parse_table_object(&mut self) -> Result { @@ -9474,49 +10361,14 @@ impl<'a> Parser<'a> { } } - /// Parse a possibly qualified, possibly quoted identifier, optionally allowing for wildcards, - /// e.g. *, *.*, `foo`.*, or "foo"."bar" - fn parse_object_name_with_wildcards( - &mut self, - in_table_clause: bool, - allow_wildcards: bool, - ) -> Result { - let mut idents = vec![]; - - if dialect_of!(self is BigQueryDialect) && in_table_clause { - loop { - let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?; - idents.push(ident); - if !self.consume_token(&Token::Period) && !end_with_period { - break; - } - } - } else { - loop { - let ident = if allow_wildcards && self.peek_token().token == Token::Mul { - let span = self.next_token().span; - Ident { - value: Token::Mul.to_string(), - quote_style: None, - span, - } - } else { - if self.dialect.supports_object_name_double_dot_notation() - && idents.len() == 1 - && self.consume_token(&Token::Period) - { - // Empty string here means default schema - idents.push(Ident::new("")); - } - self.parse_identifier()? - }; - idents.push(ident); - if !self.consume_token(&Token::Period) { - break; - } - } - } - Ok(ObjectName::from(idents)) + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table" + /// + /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, + /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers + /// in this context on BigQuery. + pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { + self.parse_object_name_inner(in_table_clause, false) } /// Parse a possibly qualified, possibly quoted identifier, e.g. @@ -9525,19 +10377,76 @@ impl<'a> Parser<'a> { /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers /// in this context on BigQuery. - pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { - let ObjectName(mut idents) = - self.parse_object_name_with_wildcards(in_table_clause, false)?; + /// + /// The `allow_wildcards` parameter indicates whether to allow for wildcards in the object name + /// e.g. *, *.*, `foo`.*, or "foo"."bar" + fn parse_object_name_inner( + &mut self, + in_table_clause: bool, + allow_wildcards: bool, + ) -> Result { + let mut parts = vec![]; + if dialect_of!(self is BigQueryDialect) && in_table_clause { + loop { + let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?; + parts.push(ObjectNamePart::Identifier(ident)); + if !self.consume_token(&Token::Period) && !end_with_period { + break; + } + } + } else { + loop { + if allow_wildcards && self.peek_token().token == Token::Mul { + let span = self.next_token().span; + parts.push(ObjectNamePart::Identifier(Ident { + value: Token::Mul.to_string(), + quote_style: None, + span, + })); + } else if dialect_of!(self is BigQueryDialect) && in_table_clause { + let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?; + parts.push(ObjectNamePart::Identifier(ident)); + if !self.consume_token(&Token::Period) && !end_with_period { + break; + } + } else if self.dialect.supports_object_name_double_dot_notation() + && parts.len() == 1 + && matches!(self.peek_token().token, Token::Period) + { + // Empty string here means default schema + parts.push(ObjectNamePart::Identifier(Ident::new(""))); + } else { + let ident = self.parse_identifier()?; + let part = if self + .dialect + .is_identifier_generating_function_name(&ident, &parts) + { + self.expect_token(&Token::LParen)?; + let args: Vec = + self.parse_comma_separated0(Self::parse_function_args, Token::RParen)?; + self.expect_token(&Token::RParen)?; + ObjectNamePart::Function(ObjectNamePartFunction { name: ident, args }) + } else { + ObjectNamePart::Identifier(ident) + }; + parts.push(part); + } + + if !self.consume_token(&Token::Period) { + break; + } + } + } // BigQuery accepts any number of quoted identifiers of a table name. // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers if dialect_of!(self is BigQueryDialect) - && idents.iter().any(|part| { + && parts.iter().any(|part| { part.as_ident() .is_some_and(|ident| ident.value.contains('.')) }) { - idents = idents + parts = parts .into_iter() .flat_map(|part| match part.as_ident() { Some(ident) => ident @@ -9556,7 +10465,7 @@ impl<'a> Parser<'a> { .collect() } - Ok(ObjectName(idents)) + Ok(ObjectName(parts)) } /// Parse identifiers @@ -9777,17 +10686,7 @@ impl<'a> Parser<'a> { /// Parses a column definition within a view. fn parse_view_column(&mut self) -> Result { let name = self.parse_identifier()?; - let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::OPTIONS)) - || (dialect_of!(self is SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COMMENT)) - { - self.prev_token(); - self.parse_optional_column_option()? - .map(|option| vec![option]) - } else { - None - }; + let options = self.parse_view_column_options()?; let data_type = if dialect_of!(self is ClickHouseDialect) { Some(self.parse_data_type()?) } else { @@ -9800,6 +10699,25 @@ impl<'a> Parser<'a> { }) } + fn parse_view_column_options(&mut self) -> Result, ParserError> { + let mut options = Vec::new(); + loop { + let option = self.parse_optional_column_option()?; + if let Some(option) = option { + options.push(option); + } else { + break; + } + } + if options.is_empty() { + Ok(None) + } else if self.dialect.supports_space_separated_column_options() { + Ok(Some(ColumnOptions::SpaceSeparated(options))) + } else { + Ok(Some(ColumnOptions::CommaSeparated(options))) + } + } + /// Parses a parenthesized comma-separated list of unqualified, possibly quoted identifiers. /// For example: `(col1, "col 2", ...)` pub fn parse_parenthesized_column_list( @@ -9810,6 +10728,14 @@ impl<'a> Parser<'a> { self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| p.parse_identifier()) } + /// Parses a parenthesized comma-separated list of index columns, which can be arbitrary + /// expressions with ordering information (and an opclass in some dialects). + fn parse_parenthesized_index_column_list(&mut self) -> Result, ParserError> { + self.parse_parenthesized_column_list_inner(Mandatory, false, |p| { + p.parse_create_index_expr() + }) + } + /// Parses a parenthesized comma-separated list of qualified, possibly quoted identifiers. /// For example: `(db1.sc1.tbl1.col1, db1.sc1.tbl1."col 2", ...)` pub fn parse_parenthesized_qualified_column_list( @@ -10022,6 +10948,13 @@ impl<'a> Parser<'a> { Ok(parent_type(inside_type.into())) } + /// Parse a DELETE statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + fn parse_delete_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Delete(self.parse_delete()?))) + } + pub fn parse_delete(&mut self) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -10195,30 +11128,42 @@ impl<'a> Parser<'a> { Ok(Query { with, body: self.parse_insert_setexpr_boxed()?, - limit: None, - limit_by: vec![], order_by: None, - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], } .into()) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { with, body: self.parse_update_setexpr_boxed()?, - limit: None, - limit_by: vec![], order_by: None, - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], + } + .into()) + } else if self.parse_keyword(Keyword::DELETE) { + Ok(Query { + with, + body: self.parse_delete_setexpr_boxed()?, + limit_clause: None, + order_by: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], } .into()) } else { @@ -10226,40 +11171,7 @@ impl<'a> Parser<'a> { let order_by = self.parse_optional_order_by()?; - let mut limit = None; - let mut offset = None; - - for _x in 0..2 { - if limit.is_none() && self.parse_keyword(Keyword::LIMIT) { - limit = self.parse_limit()? - } - - if offset.is_none() && self.parse_keyword(Keyword::OFFSET) { - offset = Some(self.parse_offset()?) - } - - if self.dialect.supports_limit_comma() - && limit.is_some() - && offset.is_none() - && self.consume_token(&Token::Comma) - { - // MySQL style LIMIT x,y => LIMIT y OFFSET x. - // Check for more details. - offset = Some(Offset { - value: limit.unwrap(), - rows: OffsetRows::None, - }); - limit = Some(self.parse_expr()?); - } - } - - let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::BY) - { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; + let limit_clause = self.parse_optional_limit_clause()?; let settings = self.parse_settings()?; @@ -10292,23 +11204,247 @@ impl<'a> Parser<'a> { None }; + let pipe_operators = if self.dialect.supports_pipe_operator() { + self.parse_pipe_operators()? + } else { + Vec::new() + }; + Ok(Query { with, body, order_by, - limit, - limit_by, - offset, + limit_clause, fetch, locks, for_clause, settings, format_clause, + pipe_operators, } .into()) } } + fn parse_pipe_operators(&mut self) -> Result, ParserError> { + let mut pipe_operators = Vec::new(); + + while self.consume_token(&Token::VerticalBarRightAngleBracket) { + let kw = self.expect_one_of_keywords(&[ + Keyword::SELECT, + Keyword::EXTEND, + Keyword::SET, + Keyword::DROP, + Keyword::AS, + Keyword::WHERE, + Keyword::LIMIT, + Keyword::AGGREGATE, + Keyword::ORDER, + Keyword::TABLESAMPLE, + Keyword::RENAME, + Keyword::UNION, + Keyword::INTERSECT, + Keyword::EXCEPT, + Keyword::CALL, + Keyword::PIVOT, + Keyword::UNPIVOT, + Keyword::JOIN, + Keyword::INNER, + Keyword::LEFT, + Keyword::RIGHT, + Keyword::FULL, + Keyword::CROSS, + ])?; + match kw { + Keyword::SELECT => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Select { exprs }) + } + Keyword::EXTEND => { + let exprs = self.parse_comma_separated(Parser::parse_select_item)?; + pipe_operators.push(PipeOperator::Extend { exprs }) + } + Keyword::SET => { + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + pipe_operators.push(PipeOperator::Set { assignments }) + } + Keyword::DROP => { + let columns = self.parse_identifiers()?; + pipe_operators.push(PipeOperator::Drop { columns }) + } + Keyword::AS => { + let alias = self.parse_identifier()?; + pipe_operators.push(PipeOperator::As { alias }) + } + Keyword::WHERE => { + let expr = self.parse_expr()?; + pipe_operators.push(PipeOperator::Where { expr }) + } + Keyword::LIMIT => { + let expr = self.parse_expr()?; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + pipe_operators.push(PipeOperator::Limit { expr, offset }) + } + Keyword::AGGREGATE => { + let full_table_exprs = if self.peek_keyword(Keyword::GROUP) { + vec![] + } else { + self.parse_comma_separated(|parser| { + parser.parse_expr_with_alias_and_order_by() + })? + }; + + let group_by_expr = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + self.parse_comma_separated(|parser| { + parser.parse_expr_with_alias_and_order_by() + })? + } else { + vec![] + }; + + pipe_operators.push(PipeOperator::Aggregate { + full_table_exprs, + group_by_expr, + }) + } + Keyword::ORDER => { + self.expect_one_of_keywords(&[Keyword::BY])?; + let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + pipe_operators.push(PipeOperator::OrderBy { exprs }) + } + Keyword::TABLESAMPLE => { + let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; + pipe_operators.push(PipeOperator::TableSample { sample }); + } + Keyword::RENAME => { + let mappings = + self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + pipe_operators.push(PipeOperator::Rename { mappings }); + } + Keyword::UNION => { + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Union { + set_quantifier, + queries, + }); + } + Keyword::INTERSECT => { + let set_quantifier = + self.parse_distinct_required_set_quantifier("INTERSECT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Intersect { + set_quantifier, + queries, + }); + } + Keyword::EXCEPT => { + let set_quantifier = self.parse_distinct_required_set_quantifier("EXCEPT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Except { + set_quantifier, + queries, + }); + } + Keyword::CALL => { + let function_name = self.parse_object_name(false)?; + let function_expr = self.parse_function(function_name)?; + if let Expr::Function(function) = function_expr { + let alias = self.parse_identifier_optional_alias()?; + pipe_operators.push(PipeOperator::Call { function, alias }); + } else { + return Err(ParserError::ParserError( + "Expected function call after CALL".to_string(), + )); + } + } + Keyword::PIVOT => { + self.expect_token(&Token::LParen)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword_is(Keyword::FOR)?; + let value_column = self.parse_period_separated(|p| p.parse_identifier())?; + self.expect_keyword_is(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias)?, + ) + }; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let alias = self.parse_identifier_optional_alias()?; + + pipe_operators.push(PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + }); + } + Keyword::UNPIVOT => { + self.expect_token(&Token::LParen)?; + let value_column = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name_column = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_identifier_optional_alias()?; + + pipe_operators.push(PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + }); + } + Keyword::JOIN + | Keyword::INNER + | Keyword::LEFT + | Keyword::RIGHT + | Keyword::FULL + | Keyword::CROSS => { + self.prev_token(); + let mut joins = self.parse_joins()?; + if joins.len() != 1 { + return Err(ParserError::ParserError( + "Join pipe operator must have a single join".to_string(), + )); + } + let join = joins.swap_remove(0); + pipe_operators.push(PipeOperator::Join(join)) + } + unhandled => { + return Err(ParserError::ParserError(format!( + "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" + ))) + } + } + } + Ok(pipe_operators) + } + fn parse_settings(&mut self) -> Result>, ParserError> { let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) && self.parse_keyword(Keyword::SETTINGS) @@ -10316,7 +11452,7 @@ impl<'a> Parser<'a> { let key_values = self.parse_comma_separated(|p| { let key = p.parse_identifier()?; p.expect_token(&Token::Eq)?; - let value = p.parse_value()?.value; + let value = p.parse_expr()?; Ok(Setting { key, value }) })?; Some(key_values) @@ -10612,6 +11748,7 @@ impl<'a> Parser<'a> { top: None, top_before_distinct: false, projection: vec![], + exclude: None, into: None, from, lateral_views: vec![], @@ -10634,18 +11771,7 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; - let value_table_mode = - if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { - if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::AsValue) - } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::AsStruct) - } else { - self.expected("VALUE or STRUCT", self.peek_token())? - } - } else { - None - }; + let value_table_mode = self.parse_value_table_mode()?; let mut top_before_distinct = false; let mut top = None; @@ -10665,19 +11791,14 @@ impl<'a> Parser<'a> { self.parse_projection()? }; + let exclude = if self.dialect.supports_select_exclude() { + self.parse_optional_select_item_exclude()? + } else { + None + }; + let into = if self.parse_keyword(Keyword::INTO) { - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - let unlogged = self.parse_keyword(Keyword::UNLOGGED); - let table = self.parse_keyword(Keyword::TABLE); - let name = self.parse_object_name(false)?; - Some(SelectInto { - temporary, - unlogged, - table, - name, - }) + Some(self.parse_select_into()?) } else { None }; @@ -10757,7 +11878,7 @@ impl<'a> Parser<'a> { }; let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? + self.parse_comma_separated(Parser::parse_order_by_expr)? } else { vec![] }; @@ -10809,6 +11930,7 @@ impl<'a> Parser<'a> { top, top_before_distinct, projection, + exclude, into, from, lateral_views, @@ -10832,6 +11954,32 @@ impl<'a> Parser<'a> { }) } + fn parse_value_table_mode(&mut self) -> Result, ParserError> { + if !dialect_of!(self is BigQueryDialect) { + return Ok(None); + } + + let mode = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::VALUE]) { + Some(ValueTableMode::DistinctAsValue) + } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::STRUCT]) { + Some(ValueTableMode::DistinctAsStruct) + } else if self.parse_keywords(&[Keyword::AS, Keyword::VALUE]) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::VALUE]) + { + Some(ValueTableMode::AsValue) + } else if self.parse_keywords(&[Keyword::AS, Keyword::STRUCT]) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::STRUCT]) + { + Some(ValueTableMode::AsStruct) + } else if self.parse_keyword(Keyword::AS) { + self.expected("VALUE or STRUCT", self.peek_token())? + } else { + None + }; + + Ok(mode) + } + /// Invoke `f` after first setting the parser's `ParserState` to `state`. /// /// Upon return, restores the parser's state to what it started at. @@ -10915,147 +12063,232 @@ impl<'a> Parser<'a> { } /// Parse a `SET ROLE` statement. Expects SET to be consumed already. - fn parse_set_role(&mut self, modifier: Option) -> Result { + fn parse_set_role( + &mut self, + modifier: Option, + ) -> Result { self.expect_keyword_is(Keyword::ROLE)?; - let context_modifier = match modifier { - Some(Keyword::LOCAL) => ContextModifier::Local, - Some(Keyword::SESSION) => ContextModifier::Session, - _ => ContextModifier::None, - }; let role_name = if self.parse_keyword(Keyword::NONE) { None } else { Some(self.parse_identifier()?) }; - Ok(Statement::SetRole { - context_modifier, + Ok(Statement::Set(Set::SetRole { + context_modifier: modifier, role_name, - }) + })) } - pub fn parse_set(&mut self) -> Result { + fn parse_set_values( + &mut self, + parenthesized_assignment: bool, + ) -> Result, ParserError> { + let mut values = vec![]; + + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; + } + + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(values); + } + } + + fn parse_context_modifier(&mut self) -> Option { let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::GLOBAL])?; + + Self::keyword_to_modifier(modifier) + } + + /// Parse a single SET statement assignment `var = expr`. + fn parse_set_assignment(&mut self) -> Result { + let scope = self.parse_context_modifier(); + + let name = if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) { + // Parenthesized assignments are handled in the `parse_set` function after + // trying to parse list of assignments using this function. + // If a dialect supports both, and we find a LParen, we early exit from this function. + self.expected("Unparenthesized assignment", self.peek_token())? + } else { + self.parse_object_name(false)? + }; + + if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { + return self.expected("assignment operator", self.peek_token()); + } + + let value = self.parse_expr()?; + + Ok(SetAssignment { scope, name, value }) + } + + fn parse_set(&mut self) -> Result { + let hivevar = self.parse_keyword(Keyword::HIVEVAR); + + // Modifier is either HIVEVAR: or a ContextModifier (LOCAL, SESSION, etc), not both + let scope = if !hivevar { + self.parse_context_modifier() + } else { + None + }; + + if hivevar { + self.expect_token(&Token::Colon)?; + } + + if let Some(set_role_stmt) = self.maybe_parse(|parser| parser.parse_set_role(scope))? { return Ok(set_role_stmt); } - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() - && self.consume_token(&Token::LParen) + // Handle special cases first + if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) + || self.parse_keyword(Keyword::TIMEZONE) { - let variables = OneOrManyWithParens::Many( - self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? - .into_iter() - .map(|ident| ObjectName::from(vec![ident])) - .collect(), - ); - self.expect_token(&Token::RParen)?; - variables - } else { - OneOrManyWithParens::One(self.parse_object_name(false)?) - }; - - if matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES") - && dialect_of!(self is MySqlDialect | GenericDialect)) - { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + return Ok(Set::SingleAssignment { + scope, + hivevar, + variable: ObjectName::from(vec!["TIMEZONE".into()]), + values: self.parse_set_values(false)?, + } + .into()); + } else { + // A shorthand alias for SET TIME ZONE that doesn't require + // the assignment operator. It's originally PostgreSQL specific, + // but we allow it for all the dialects + return Ok(Set::SetTimeZone { + local: scope == Some(ContextModifier::Local), + value: self.parse_expr()?, + } + .into()); } - - let charset_name = self.parse_literal_string()?; + } else if self.dialect.supports_set_names() && self.parse_keyword(Keyword::NAMES) { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Set::SetNamesDefault {}.into()); + } + let charset_name = self.parse_identifier()?; let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { Some(self.parse_literal_string()?) } else { None }; - return Ok(Statement::SetNames { + return Ok(Set::SetNames { charset_name, collation_name, - }); - } - - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); - - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; } - - let mut values = vec![]; - loop { - let value = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Ok(expr) = self.parse_expr() { - expr - } else { - self.expected("variable value", self.peek_token())? - }; - - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - - if parenthesized_assignment { - self.expect_token(&Token::RParen)?; - } - return Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, - variables, - value: values, - }); - } - } - - let OneOrManyWithParens::One(variable) = variables else { - return self.expected("set variable", self.peek_token()); - }; - - if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { - // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] - match self.parse_expr() { - Ok(expr) => Ok(Statement::SetTimeZone { - local: modifier == Some(Keyword::LOCAL), - value: expr, - }), - _ => self.expected("timezone value", self.peek_token())?, - } - } else if variable.to_string() == "CHARACTERISTICS" { + .into()); + } else if self.parse_keyword(Keyword::CHARACTERISTICS) { self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; - Ok(Statement::SetTransaction { + return Ok(Set::SetTransaction { modes: self.parse_transaction_modes()?, snapshot: None, session: true, - }) - } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { + } + .into()); + } else if self.parse_keyword(Keyword::TRANSACTION) { if self.parse_keyword(Keyword::SNAPSHOT) { let snapshot_id = self.parse_value()?.value; - return Ok(Statement::SetTransaction { + return Ok(Set::SetTransaction { modes: vec![], snapshot: Some(snapshot_id), session: false, - }); + } + .into()); } - Ok(Statement::SetTransaction { + return Ok(Set::SetTransaction { modes: self.parse_transaction_modes()?, snapshot: None, session: false, - }) - } else if self.dialect.supports_set_stmt_without_operator() { - self.prev_token(); - self.parse_set_session_params() - } else { - self.expected("equals sign or TO", self.peek_token()) + } + .into()); } + + if self.dialect.supports_comma_separated_set_assignments() { + if scope.is_some() { + self.prev_token(); + } + + if let Some(assignments) = self + .maybe_parse(|parser| parser.parse_comma_separated(Parser::parse_set_assignment))? + { + return if assignments.len() > 1 { + Ok(Set::MultipleAssignments { assignments }.into()) + } else { + let SetAssignment { scope, name, value } = + assignments.into_iter().next().ok_or_else(|| { + ParserError::ParserError("Expected at least one assignment".to_string()) + })?; + + Ok(Set::SingleAssignment { + scope, + hivevar, + variable: name, + values: vec![value], + } + .into()) + }; + } + } + + let variables = if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) + { + let vars = OneOrManyWithParens::Many( + self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? + .into_iter() + .map(|ident| ObjectName::from(vec![ident])) + .collect(), + ); + self.expect_token(&Token::RParen)?; + vars + } else { + OneOrManyWithParens::One(self.parse_object_name(false)?) + }; + + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let stmt = match variables { + OneOrManyWithParens::One(var) => Set::SingleAssignment { + scope, + hivevar, + variable: var, + values: self.parse_set_values(false)?, + }, + OneOrManyWithParens::Many(vars) => Set::ParenthesizedAssignments { + variables: vars, + values: self.parse_set_values(true)?, + }, + }; + + return Ok(stmt.into()); + } + + if self.dialect.supports_set_stmt_without_operator() { + self.prev_token(); + return self.parse_set_session_params(); + }; + + self.expected("equals sign or TO", self.peek_token()) } pub fn parse_set_session_params(&mut self) -> Result { @@ -11073,15 +12306,20 @@ impl<'a> Parser<'a> { _ => return self.expected("IO, PROFILE, TIME or XML", self.peek_token()), }; let value = self.parse_session_param_value()?; - Ok(Statement::SetSessionParam(SetSessionParamKind::Statistics( - SetSessionParamStatistics { topic, value }, - ))) + Ok( + Set::SetSessionParam(SetSessionParamKind::Statistics(SetSessionParamStatistics { + topic, + value, + })) + .into(), + ) } else if self.parse_keyword(Keyword::IDENTITY_INSERT) { let obj = self.parse_object_name(false)?; let value = self.parse_session_param_value()?; - Ok(Statement::SetSessionParam( - SetSessionParamKind::IdentityInsert(SetSessionParamIdentityInsert { obj, value }), + Ok(Set::SetSessionParam(SetSessionParamKind::IdentityInsert( + SetSessionParamIdentityInsert { obj, value }, )) + .into()) } else if self.parse_keyword(Keyword::OFFSETS) { let keywords = self.parse_comma_separated(|parser| { let next_token = parser.next_token(); @@ -11091,9 +12329,13 @@ impl<'a> Parser<'a> { } })?; let value = self.parse_session_param_value()?; - Ok(Statement::SetSessionParam(SetSessionParamKind::Offsets( - SetSessionParamOffsets { keywords, value }, - ))) + Ok( + Set::SetSessionParam(SetSessionParamKind::Offsets(SetSessionParamOffsets { + keywords, + value, + })) + .into(), + ) } else { let names = self.parse_comma_separated(|parser| { let next_token = parser.next_token(); @@ -11103,9 +12345,13 @@ impl<'a> Parser<'a> { } })?; let value = self.parse_expr()?.to_string(); - Ok(Statement::SetSessionParam(SetSessionParamKind::Generic( - SetSessionParamGeneric { names, value }, - ))) + Ok( + Set::SetSessionParam(SetSessionParamKind::Generic(SetSessionParamGeneric { + names, + value, + })) + .into(), + ) } } @@ -11353,6 +12599,11 @@ impl<'a> Parser<'a> { // Note that for keywords to be properly handled here, they need to be // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as // a table alias. + let joins = self.parse_joins()?; + Ok(TableWithJoins { relation, joins }) + } + + fn parse_joins(&mut self) -> Result, ParserError> { let mut joins = vec![]; loop { let global = self.parse_keyword(Keyword::GLOBAL); @@ -11476,12 +12727,29 @@ impl<'a> Parser<'a> { Keyword::OUTER => { return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); } + Keyword::STRAIGHT_JOIN => { + let _ = self.next_token(); // consume STRAIGHT_JOIN + JoinOperator::StraightJoin + } _ if natural => { return self.expected("a join type after NATURAL", self.peek_token()); } _ => break, }; - let relation = self.parse_table_factor()?; + let mut relation = self.parse_table_factor()?; + + if !self + .dialect + .supports_left_associative_joins_without_parens() + && self.peek_parens_less_nested_join() + { + let joins = self.parse_joins()?; + relation = TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { relation, joins }), + alias: None, + }; + } + let join_constraint = self.parse_join_constraint(natural)?; Join { relation, @@ -11491,7 +12759,21 @@ impl<'a> Parser<'a> { }; joins.push(join); } - Ok(TableWithJoins { relation, joins }) + Ok(joins) + } + + fn peek_parens_less_nested_join(&self) -> bool { + matches!( + self.peek_token_ref().token, + Token::Word(Word { + keyword: Keyword::JOIN + | Keyword::INNER + | Keyword::LEFT + | Keyword::RIGHT + | Keyword::FULL, + .. + }) + ) } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` @@ -11602,6 +12884,7 @@ impl<'a> Parser<'a> { | TableFactor::Function { alias, .. } | TableFactor::UNNEST { alias, .. } | TableFactor::JsonTable { alias, .. } + | TableFactor::XmlTable { alias, .. } | TableFactor::OpenJsonTable { alias, .. } | TableFactor::TableFunction { alias, .. } | TableFactor::Pivot { alias, .. } @@ -11653,14 +12936,13 @@ impl<'a> Parser<'a> { with: None, body: Box::new(values), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias, }) @@ -11719,6 +13001,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { self.prev_token(); self.parse_open_json_table_factor() + } else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) { + self.prev_token(); + self.parse_xml_table_factor() } else { let name = self.parse_object_name(true)?; @@ -11821,7 +13106,13 @@ impl<'a> Parser<'a> { } else { return Ok(None); }; + self.parse_table_sample(modifier).map(Some) + } + fn parse_table_sample( + &mut self, + modifier: TableSampleModifier, + ) -> Result, ParserError> { let name = match self.parse_one_of_keywords(&[ Keyword::BERNOULLI, Keyword::ROW, @@ -11903,14 +13194,14 @@ impl<'a> Parser<'a> { None }; - Ok(Some(Box::new(TableSample { + Ok(Box::new(TableSample { modifier, name, quantity, seed, bucket, offset, - }))) + })) } fn parse_table_sample_seed( @@ -11951,6 +13242,99 @@ impl<'a> Parser<'a> { }) } + fn parse_xml_table_factor(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let namespaces = if self.parse_keyword(Keyword::XMLNAMESPACES) { + self.expect_token(&Token::LParen)?; + let namespaces = self.parse_comma_separated(Parser::parse_xml_namespace_definition)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::Comma)?; + namespaces + } else { + vec![] + }; + let row_expression = self.parse_expr()?; + let passing = self.parse_xml_passing_clause()?; + self.expect_keyword_is(Keyword::COLUMNS)?; + let columns = self.parse_comma_separated(Parser::parse_xml_table_column)?; + self.expect_token(&Token::RParen)?; + let alias = self.maybe_parse_table_alias()?; + Ok(TableFactor::XmlTable { + namespaces, + row_expression, + passing, + columns, + alias, + }) + } + + fn parse_xml_namespace_definition(&mut self) -> Result { + let uri = self.parse_expr()?; + self.expect_keyword_is(Keyword::AS)?; + let name = self.parse_identifier()?; + Ok(XmlNamespaceDefinition { uri, name }) + } + + fn parse_xml_table_column(&mut self) -> Result { + let name = self.parse_identifier()?; + + let option = if self.parse_keyword(Keyword::FOR) { + self.expect_keyword(Keyword::ORDINALITY)?; + XmlTableColumnOption::ForOrdinality + } else { + let r#type = self.parse_data_type()?; + let mut path = None; + let mut default = None; + + if self.parse_keyword(Keyword::PATH) { + path = Some(self.parse_expr()?); + } + + if self.parse_keyword(Keyword::DEFAULT) { + default = Some(self.parse_expr()?); + } + + let not_null = self.parse_keywords(&[Keyword::NOT, Keyword::NULL]); + if !not_null { + // NULL is the default but can be specified explicitly + let _ = self.parse_keyword(Keyword::NULL); + } + + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + nullable: !not_null, + } + }; + Ok(XmlTableColumn { name, option }) + } + + fn parse_xml_passing_clause(&mut self) -> Result { + let mut arguments = vec![]; + if self.parse_keyword(Keyword::PASSING) { + loop { + let by_value = + self.parse_keyword(Keyword::BY) && self.expect_keyword(Keyword::VALUE).is_ok(); + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + arguments.push(XmlPassingArgument { + expr, + alias, + by_value, + }); + if !self.consume_token(&Token::Comma) { + break; + } + } + } + Ok(XmlPassingClause { arguments }) + } + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { self.expect_token(&Token::LParen)?; @@ -12396,6 +13780,15 @@ impl<'a> Parser<'a> { &mut self, table: TableFactor, ) -> Result { + let null_inclusion = if self.parse_keyword(Keyword::INCLUDE) { + self.expect_keyword_is(Keyword::NULLS)?; + Some(NullInclusion::IncludeNulls) + } else if self.parse_keyword(Keyword::EXCLUDE) { + self.expect_keyword_is(Keyword::NULLS)?; + Some(NullInclusion::ExcludeNulls) + } else { + None + }; self.expect_token(&Token::LParen)?; let value = self.parse_identifier()?; self.expect_keyword_is(Keyword::FOR)?; @@ -12407,6 +13800,7 @@ impl<'a> Parser<'a> { Ok(TableFactor::Unpivot { table: Box::new(table), value, + null_inclusion, name, columns, alias, @@ -12430,7 +13824,7 @@ impl<'a> Parser<'a> { /// Parse a GRANT statement. pub fn parse_grant(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::TO)?; let grantees = self.parse_grantees()?; @@ -12438,16 +13832,35 @@ impl<'a> Parser<'a> { let with_grant_option = self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier().unwrap()); + let current_grants = + if self.parse_keywords(&[Keyword::COPY, Keyword::CURRENT, Keyword::GRANTS]) { + Some(CurrentGrantsKind::CopyCurrentGrants) + } else if self.parse_keywords(&[Keyword::REVOKE, Keyword::CURRENT, Keyword::GRANTS]) { + Some(CurrentGrantsKind::RevokeCurrentGrants) + } else { + None + }; + + let as_grantor = if self.parse_keywords(&[Keyword::AS]) { + Some(self.parse_identifier()?) + } else { + None + }; + + let granted_by = if self.parse_keywords(&[Keyword::GRANTED, Keyword::BY]) { + Some(self.parse_identifier()?) + } else { + None + }; Ok(Statement::Grant { privileges, objects, grantees, with_grant_option, + as_grantor, granted_by, + current_grants, }) } @@ -12455,7 +13868,7 @@ impl<'a> Parser<'a> { let mut values = vec![]; let mut grantee_type = GranteesType::None; loop { - grantee_type = if self.parse_keyword(Keyword::ROLE) { + let new_grantee_type = if self.parse_keyword(Keyword::ROLE) { GranteesType::Role } else if self.parse_keyword(Keyword::USER) { GranteesType::User @@ -12472,9 +13885,19 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::APPLICATION) { GranteesType::Application } else { - grantee_type // keep from previous iteraton, if not specified + grantee_type.clone() // keep from previous iteraton, if not specified }; + if self + .dialect + .get_reserved_grantees_types() + .contains(&new_grantee_type) + { + self.prev_token(); + } else { + grantee_type = new_grantee_type; + } + let grantee = if grantee_type == GranteesType::Public { Grantee { grantee_type: grantee_type.clone(), @@ -12489,7 +13912,7 @@ impl<'a> Parser<'a> { let ident = self.parse_identifier()?; if let GranteeName::ObjectName(namespace) = name { name = GranteeName::ObjectName(ObjectName::from(vec![Ident::new( - format!("{}:{}", namespace, ident), + format!("{namespace}:{ident}"), )])); }; } @@ -12509,7 +13932,7 @@ impl<'a> Parser<'a> { Ok(values) } - pub fn parse_grant_revoke_privileges_objects( + pub fn parse_grant_deny_revoke_privileges_objects( &mut self, ) -> Result<(Privileges, Option), ParserError> { let privileges = if self.parse_keyword(Keyword::ALL) { @@ -12526,6 +13949,82 @@ impl<'a> Parser<'a> { Some(GrantObjects::AllTablesInSchema { schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, }) + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::EXTERNAL, + Keyword::TABLES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::AllExternalTablesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::VIEWS, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::AllViewsInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::MATERIALIZED, + Keyword::VIEWS, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::AllMaterializedViewsInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::SCHEMAS, + Keyword::IN, + Keyword::DATABASE, + ]) { + Some(GrantObjects::FutureSchemasInDatabase { + databases: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::TABLES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::FutureTablesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::EXTERNAL, + Keyword::TABLES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::FutureExternalTablesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::VIEWS, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::FutureViewsInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::MATERIALIZED, + Keyword::VIEWS, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::FutureMaterializedViewsInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) } else if self.parse_keywords(&[ Keyword::ALL, Keyword::SEQUENCES, @@ -12535,11 +14034,39 @@ impl<'a> Parser<'a> { Some(GrantObjects::AllSequencesInSchema { schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, }) + } else if self.parse_keywords(&[ + Keyword::FUTURE, + Keyword::SEQUENCES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + Some(GrantObjects::FutureSequencesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + }) + } else if self.parse_keywords(&[Keyword::RESOURCE, Keyword::MONITOR]) { + Some(GrantObjects::ResourceMonitors( + self.parse_comma_separated(|p| p.parse_object_name(false))?, + )) + } else if self.parse_keywords(&[Keyword::COMPUTE, Keyword::POOL]) { + Some(GrantObjects::ComputePools( + self.parse_comma_separated(|p| p.parse_object_name(false))?, + )) + } else if self.parse_keywords(&[Keyword::FAILOVER, Keyword::GROUP]) { + Some(GrantObjects::FailoverGroup( + self.parse_comma_separated(|p| p.parse_object_name(false))?, + )) + } else if self.parse_keywords(&[Keyword::REPLICATION, Keyword::GROUP]) { + Some(GrantObjects::ReplicationGroup( + self.parse_comma_separated(|p| p.parse_object_name(false))?, + )) + } else if self.parse_keywords(&[Keyword::EXTERNAL, Keyword::VOLUME]) { + Some(GrantObjects::ExternalVolumes( + self.parse_comma_separated(|p| p.parse_object_name(false))?, + )) } else { let object_type = self.parse_one_of_keywords(&[ Keyword::SEQUENCE, Keyword::DATABASE, - Keyword::DATABASE, Keyword::SCHEMA, Keyword::TABLE, Keyword::VIEW, @@ -12548,9 +14075,13 @@ impl<'a> Parser<'a> { Keyword::VIEW, Keyword::WAREHOUSE, Keyword::INTEGRATION, + Keyword::USER, + Keyword::CONNECTION, + Keyword::PROCEDURE, + Keyword::FUNCTION, ]); let objects = - self.parse_comma_separated(|p| p.parse_object_name_with_wildcards(false, true)); + self.parse_comma_separated(|p| p.parse_object_name_inner(false, true)); match object_type { Some(Keyword::DATABASE) => Some(GrantObjects::Databases(objects?)), Some(Keyword::SCHEMA) => Some(GrantObjects::Schemas(objects?)), @@ -12558,6 +14089,15 @@ impl<'a> Parser<'a> { Some(Keyword::WAREHOUSE) => Some(GrantObjects::Warehouses(objects?)), Some(Keyword::INTEGRATION) => Some(GrantObjects::Integrations(objects?)), Some(Keyword::VIEW) => Some(GrantObjects::Views(objects?)), + Some(Keyword::USER) => Some(GrantObjects::Users(objects?)), + Some(Keyword::CONNECTION) => Some(GrantObjects::Connections(objects?)), + kw @ (Some(Keyword::PROCEDURE) | Some(Keyword::FUNCTION)) => { + if let Some(name) = objects?.first() { + self.parse_grant_procedure_or_function(name, &kw)? + } else { + self.expected("procedure or function name", self.peek_token())? + } + } Some(Keyword::TABLE) | None => Some(GrantObjects::Tables(objects?)), _ => unreachable!(), } @@ -12569,6 +14109,31 @@ impl<'a> Parser<'a> { Ok((privileges, objects)) } + fn parse_grant_procedure_or_function( + &mut self, + name: &ObjectName, + kw: &Option, + ) -> Result, ParserError> { + let arg_types = if self.consume_token(&Token::LParen) { + let list = self.parse_comma_separated0(Self::parse_data_type, Token::RParen)?; + self.expect_token(&Token::RParen)?; + list + } else { + vec![] + }; + match kw { + Some(Keyword::PROCEDURE) => Ok(Some(GrantObjects::Procedure { + name: name.clone(), + arg_types, + })), + Some(Keyword::FUNCTION) => Ok(Some(GrantObjects::Function { + name: name.clone(), + arg_types, + })), + _ => self.expected("procedure or function keywords", self.peek_token())?, + } + } + pub fn parse_grant_permission(&mut self) -> Result { fn parse_columns(parser: &mut Parser) -> Result>, ParserError> { let columns = parser.parse_parenthesized_column_list(Optional, false)?; @@ -12630,6 +14195,9 @@ impl<'a> Parser<'a> { Ok(Action::Create { obj_type }) } else if self.parse_keyword(Keyword::DELETE) { Ok(Action::Delete) + } else if self.parse_keyword(Keyword::EXEC) { + let obj_type = self.maybe_parse_action_execute_obj_type(); + Ok(Action::Exec { obj_type }) } else if self.parse_keyword(Keyword::EXECUTE) { let obj_type = self.maybe_parse_action_execute_obj_type(); Ok(Action::Execute { obj_type }) @@ -12643,10 +14211,10 @@ impl<'a> Parser<'a> { let manage_type = self.parse_action_manage_type()?; Ok(Action::Manage { manage_type }) } else if self.parse_keyword(Keyword::MODIFY) { - let modify_type = self.parse_action_modify_type()?; + let modify_type = self.parse_action_modify_type(); Ok(Action::Modify { modify_type }) } else if self.parse_keyword(Keyword::MONITOR) { - let monitor_type = self.parse_action_monitor_type()?; + let monitor_type = self.parse_action_monitor_type(); Ok(Action::Monitor { monitor_type }) } else if self.parse_keyword(Keyword::OPERATE) { Ok(Action::Operate) @@ -12787,29 +14355,29 @@ impl<'a> Parser<'a> { } } - fn parse_action_modify_type(&mut self) -> Result { + fn parse_action_modify_type(&mut self) -> Option { if self.parse_keywords(&[Keyword::LOG, Keyword::LEVEL]) { - Ok(ActionModifyType::LogLevel) + Some(ActionModifyType::LogLevel) } else if self.parse_keywords(&[Keyword::TRACE, Keyword::LEVEL]) { - Ok(ActionModifyType::TraceLevel) + Some(ActionModifyType::TraceLevel) } else if self.parse_keywords(&[Keyword::SESSION, Keyword::LOG, Keyword::LEVEL]) { - Ok(ActionModifyType::SessionLogLevel) + Some(ActionModifyType::SessionLogLevel) } else if self.parse_keywords(&[Keyword::SESSION, Keyword::TRACE, Keyword::LEVEL]) { - Ok(ActionModifyType::SessionTraceLevel) + Some(ActionModifyType::SessionTraceLevel) } else { - self.expected("GRANT MODIFY type", self.peek_token()) + None } } - fn parse_action_monitor_type(&mut self) -> Result { + fn parse_action_monitor_type(&mut self) -> Option { if self.parse_keyword(Keyword::EXECUTION) { - Ok(ActionMonitorType::Execution) + Some(ActionMonitorType::Execution) } else if self.parse_keyword(Keyword::SECURITY) { - Ok(ActionMonitorType::Security) + Some(ActionMonitorType::Security) } else if self.parse_keyword(Keyword::USAGE) { - Ok(ActionMonitorType::Usage) + Some(ActionMonitorType::Usage) } else { - self.expected("GRANT MONITOR type", self.peek_token()) + None } } @@ -12828,16 +14396,51 @@ impl<'a> Parser<'a> { } } + /// Parse [`Statement::Deny`] + pub fn parse_deny(&mut self) -> Result { + self.expect_keyword(Keyword::DENY)?; + + let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; + let objects = match objects { + Some(o) => o, + None => { + return parser_err!( + "DENY statements must specify an object", + self.peek_token().span.start + ) + } + }; + + self.expect_keyword_is(Keyword::TO)?; + let grantees = self.parse_grantees()?; + let cascade = self.parse_cascade_option(); + let granted_by = if self.parse_keywords(&[Keyword::AS]) { + Some(self.parse_identifier()?) + } else { + None + }; + + Ok(Statement::Deny(DenyStatement { + privileges, + objects, + grantees, + cascade, + granted_by, + })) + } + /// Parse a REVOKE statement pub fn parse_revoke(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::FROM)?; let grantees = self.parse_grantees()?; - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier().unwrap()); + let granted_by = if self.parse_keywords(&[Keyword::GRANTED, Keyword::BY]) { + Some(self.parse_identifier()?) + } else { + None + }; let cascade = self.parse_cascade_option(); @@ -13393,6 +14996,13 @@ impl<'a> Parser<'a> { /// Parse a comma-delimited list of projections after SELECT pub fn parse_select_item(&mut self) -> Result { + let prefix = self + .parse_one_of_keywords( + self.dialect + .get_reserved_keywords_for_select_item_operator(), + ) + .map(|keyword| Ident::new(format!("{keyword:?}"))); + match self.parse_wildcard_expr()? { Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( SelectItemQualifiedWildcardKind::ObjectName(prefix), @@ -13437,8 +15047,11 @@ impl<'a> Parser<'a> { expr => self .maybe_parse_select_item_alias() .map(|alias| match alias { - Some(alias) => SelectItem::ExprWithAlias { expr, alias }, - None => SelectItem::UnnamedExpr(expr), + Some(alias) => SelectItem::ExprWithAlias { + expr: maybe_prefixed_expr(expr, prefix), + alias, + }, + None => SelectItem::UnnamedExpr(maybe_prefixed_expr(expr, prefix)), }), } } @@ -13455,8 +15068,7 @@ impl<'a> Parser<'a> { } else { None }; - let opt_exclude = if opt_ilike.is_none() - && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + let opt_exclude = if opt_ilike.is_none() && self.dialect.supports_select_wildcard_exclude() { self.parse_optional_select_item_exclude()? } else { @@ -13631,10 +15243,42 @@ impl<'a> Parser<'a> { } } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + /// Parse an [OrderByExpr] expression. pub fn parse_order_by_expr(&mut self) -> Result { + self.parse_order_by_expr_inner(false) + .map(|(order_by, _)| order_by) + } + + /// Parse an [IndexColumn]. + pub fn parse_create_index_expr(&mut self) -> Result { + self.parse_order_by_expr_inner(true) + .map(|(column, operator_class)| IndexColumn { + column, + operator_class, + }) + } + + fn parse_order_by_expr_inner( + &mut self, + with_operator_class: bool, + ) -> Result<(OrderByExpr, Option), ParserError> { let expr = self.parse_expr()?; + let operator_class: Option = if with_operator_class { + // We check that if non of the following keywords are present, then we parse an + // identifier as operator class. + if self + .peek_one_of_keywords(&[Keyword::ASC, Keyword::DESC, Keyword::NULLS, Keyword::WITH]) + .is_some() + { + None + } else { + self.maybe_parse(|parser| parser.parse_identifier())? + } + } else { + None + }; + let options = self.parse_order_by_options()?; let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) @@ -13645,11 +15289,14 @@ impl<'a> Parser<'a> { None }; - Ok(OrderByExpr { - expr, - options, - with_fill, - }) + Ok(( + OrderByExpr { + expr, + options, + with_fill, + }, + operator_class, + )) } fn parse_order_by_options(&mut self) -> Result { @@ -13773,7 +15420,8 @@ impl<'a> Parser<'a> { /// Parse a FETCH clause pub fn parse_fetch(&mut self) -> Result { - self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; + let _ = self.parse_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]); + let (quantity, percent) = if self .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) .is_some() @@ -13782,16 +15430,16 @@ impl<'a> Parser<'a> { } else { let quantity = Expr::Value(self.parse_value()?); let percent = self.parse_keyword(Keyword::PERCENT); - self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; + let _ = self.parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]); (Some(quantity), percent) }; + let with_ties = if self.parse_keyword(Keyword::ONLY) { false - } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { - true } else { - return self.expected("one of ONLY or WITH TIES", self.peek_token()); + self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) }; + Ok(Fetch { with_ties, percent, @@ -13854,7 +15502,7 @@ impl<'a> Parser<'a> { transaction: Some(BeginTransactionKind::Transaction), modifier: None, statements: vec![], - exception_statements: None, + exception: None, has_end_keyword: false, }) } @@ -13886,11 +15534,56 @@ impl<'a> Parser<'a> { transaction, modifier, statements: vec![], - exception_statements: None, + exception: None, has_end_keyword: false, }) } + pub fn parse_begin_exception_end(&mut self) -> Result { + let statements = self.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?; + + let exception = if self.parse_keyword(Keyword::EXCEPTION) { + let mut when = Vec::new(); + + // We can have multiple `WHEN` arms so we consume all cases until `END` + while !self.peek_keyword(Keyword::END) { + self.expect_keyword(Keyword::WHEN)?; + + // Each `WHEN` case can have one or more conditions, e.g. + // WHEN EXCEPTION_1 [OR EXCEPTION_2] THEN + // So we parse identifiers until the `THEN` keyword. + let mut idents = Vec::new(); + + while !self.parse_keyword(Keyword::THEN) { + let ident = self.parse_identifier()?; + idents.push(ident); + + self.maybe_parse(|p| p.expect_keyword(Keyword::OR))?; + } + + let statements = self.parse_statement_list(&[Keyword::WHEN, Keyword::END])?; + + when.push(ExceptionWhen { idents, statements }); + } + + Some(when) + } else { + None + }; + + self.expect_keyword(Keyword::END)?; + + Ok(Statement::StartTransaction { + begin: true, + statements, + exception, + has_end_keyword: true, + transaction: None, + modifier: None, + modes: Default::default(), + }) + } + pub fn parse_end(&mut self) -> Result { let modifier = if !self.dialect.supports_end_transaction_modifier() { None @@ -14114,10 +15807,9 @@ impl<'a> Parser<'a> { pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { let mut clauses = vec![]; loop { - if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { + if !(self.parse_keyword(Keyword::WHEN)) { break; } - self.expect_keyword_is(Keyword::WHEN)?; let mut clause_kind = MergeClauseKind::Matched; if self.parse_keyword(Keyword::NOT) { @@ -14211,6 +15903,34 @@ impl<'a> Parser<'a> { Ok(clauses) } + fn parse_output(&mut self) -> Result { + self.expect_keyword_is(Keyword::OUTPUT)?; + let select_items = self.parse_projection()?; + self.expect_keyword_is(Keyword::INTO)?; + let into_table = self.parse_select_into()?; + + Ok(OutputClause { + select_items, + into_table, + }) + } + + fn parse_select_into(&mut self) -> Result { + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let unlogged = self.parse_keyword(Keyword::UNLOGGED); + let table = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + + Ok(SelectInto { + temporary, + unlogged, + table, + name, + }) + } + pub fn parse_merge(&mut self) -> Result { let into = self.parse_keyword(Keyword::INTO); @@ -14221,6 +15941,11 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::ON)?; let on = self.parse_expr()?; let clauses = self.parse_merge_clauses()?; + let output = if self.peek_keyword(Keyword::OUTPUT) { + Some(self.parse_output()?) + } else { + None + }; Ok(Statement::Merge { into, @@ -14228,6 +15953,7 @@ impl<'a> Parser<'a> { source, on: Box::new(on), clauses, + output, }) } @@ -14428,6 +16154,49 @@ impl<'a> Parser<'a> { Ok(sequence_options) } + /// Parse a `CREATE SERVER` statement. + /// + /// See [Statement::CreateServer] + pub fn parse_pg_create_server(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + let server_type = if self.parse_keyword(Keyword::TYPE) { + Some(self.parse_identifier()?) + } else { + None + }; + + let version = if self.parse_keyword(Keyword::VERSION) { + Some(self.parse_identifier()?) + } else { + None + }; + + self.expect_keywords(&[Keyword::FOREIGN, Keyword::DATA, Keyword::WRAPPER])?; + let foreign_data_wrapper = self.parse_object_name(false)?; + + let mut options = None; + if self.parse_keyword(Keyword::OPTIONS) { + self.expect_token(&Token::LParen)?; + options = Some(self.parse_comma_separated(|p| { + let key = p.parse_identifier()?; + let value = p.parse_identifier()?; + Ok(CreateServerOption { key, value }) + })?); + self.expect_token(&Token::RParen)?; + } + + Ok(Statement::CreateServer(CreateServerStatement { + name, + if_not_exists: ine, + server_type, + version, + foreign_data_wrapper, + options, + })) + } + /// The index of the first unprocessed token. pub fn index(&self) -> usize { self.index @@ -14451,22 +16220,30 @@ impl<'a> Parser<'a> { pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { let name = self.parse_object_name(false)?; let params = self.parse_optional_procedure_parameters()?; + + let language = if self.parse_keyword(Keyword::LANGUAGE) { + Some(self.parse_identifier()?) + } else { + None + }; + self.expect_keyword_is(Keyword::AS)?; - self.expect_keyword_is(Keyword::BEGIN)?; - let statements = self.parse_statements()?; - self.expect_keyword_is(Keyword::END)?; + + let body = self.parse_conditional_statements(&[Keyword::END])?; + Ok(Statement::CreateProcedure { name, or_alter, params, - body: statements, + language, + body, }) } pub fn parse_window_spec(&mut self) -> Result { let window_name = match self.peek_token().token { Token::Word(word) if word.keyword == Keyword::NoKeyword => { - self.parse_optional_indent()? + self.parse_optional_ident()? } _ => None, }; @@ -14557,9 +16334,9 @@ impl<'a> Parser<'a> { fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(|p| p.parse_identifier())?; + let idents = self.parse_comma_separated0(|p| p.parse_identifier(), Token::RParen)?; self.expect_token(&Token::RParen)?; - Ok(partitions) + Ok(idents) } fn parse_column_position(&mut self) -> Result, ParserError> { @@ -14577,6 +16354,23 @@ impl<'a> Parser<'a> { } } + /// Parse [Statement::Print] + fn parse_print(&mut self) -> Result { + Ok(Statement::Print(PrintStatement { + message: Box::new(self.parse_expr()?), + })) + } + + /// Parse [Statement::Return] + fn parse_return(&mut self) -> Result { + match self.maybe_parse(|p| p.parse_expr())? { + Some(expr) => Ok(Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(expr)), + })), + None => Ok(Statement::Return(ReturnStatement { value: None })), + } + } + /// Consume the parser and return its underlying token buffer pub fn into_tokens(self) -> Vec { self.tokens @@ -14715,6 +16509,17 @@ impl<'a> Parser<'a> { } } +fn maybe_prefixed_expr(expr: Expr, prefix: Option) -> Expr { + if let Some(prefix) = prefix { + Expr::Prefixed { + prefix, + value: Box::new(expr), + } + } else { + expr + } +} + impl Word { #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")] pub fn to_ident(&self, span: Span) -> Ident { @@ -15177,6 +16982,20 @@ mod tests { }}; } + fn mk_expected_col(name: &str) -> IndexColumn { + IndexColumn { + column: OrderByExpr { + expr: Expr::Identifier(name.into()), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: None, + } + } + let dialect = TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})]); @@ -15187,7 +17006,7 @@ mod tests { display_as_key: false, name: None, index_type: None, - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); @@ -15198,7 +17017,7 @@ mod tests { display_as_key: true, name: None, index_type: None, - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); @@ -15209,7 +17028,7 @@ mod tests { display_as_key: false, name: Some(Ident::with_quote('\'', "index")), index_type: None, - columns: vec![Ident::new("c1"), Ident::new("c2")], + columns: vec![mk_expected_col("c1"), mk_expected_col("c2")], } ); @@ -15220,7 +17039,7 @@ mod tests { display_as_key: false, name: None, index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); @@ -15231,7 +17050,7 @@ mod tests { display_as_key: false, name: None, index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); @@ -15242,7 +17061,7 @@ mod tests { display_as_key: false, name: Some(Ident::new("idx_name")), index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); @@ -15253,7 +17072,7 @@ mod tests { display_as_key: false, name: Some(Ident::new("idx_name")), index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], + columns: vec![mk_expected_col("c1")], } ); } diff --git a/src/test_utils.rs b/src/test_utils.rs index 6270ac42..654f2723 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -151,6 +151,8 @@ impl TestedDialects { /// /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string + /// + /// For multiple statements, use [`statements_parse_to`]. pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); @@ -166,6 +168,24 @@ impl TestedDialects { only_statement } + /// The same as [`one_statement_parses_to`] but it works for a multiple statements + pub fn statements_parse_to(&self, sql: &str, canonical: &str) -> Vec { + let statements = self.parse_sql_statements(sql).expect(sql); + if !canonical.is_empty() && sql != canonical { + assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements); + } else { + assert_eq!( + sql, + statements + .iter() + .map(|s| s.to_string()) + .collect::>() + .join("; ") + ); + } + statements + } + /// Ensures that `sql` parses as an [`Expr`], and that /// re-serializing the parse result produces canonical pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr { @@ -250,7 +270,7 @@ impl TestedDialects { tokenizer = tokenizer.with_unescape(options.unescape); } let tokens = tokenizer.tokenize().unwrap(); - assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect); + assert_eq!(expected, tokens, "Tokenized differently for {dialect:?}"); }); } } @@ -274,6 +294,11 @@ pub fn all_dialects() -> TestedDialects { ]) } +// Returns all available dialects with the specified parser options +pub fn all_dialects_with_options(options: ParserOptions) -> TestedDialects { + TestedDialects::new_with_options(all_dialects().dialects, options) +} + /// Returns all dialects matching the given predicate. pub fn all_dialects_where(predicate: F) -> TestedDialects where @@ -325,10 +350,12 @@ pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTa operations, on_cluster: _, location: _, + iceberg, } => { assert_eq!(name.to_string(), expected_name); assert!(!if_exists); assert!(!is_only); + assert!(!iceberg); only(operations) } _ => panic!("Expected ALTER TABLE statement"), @@ -344,6 +371,11 @@ pub fn number(n: &str) -> Value { Value::Number(n.parse().unwrap(), false) } +/// Creates a [Value::SingleQuotedString] +pub fn single_quoted_string(s: impl Into) -> Value { + Value::SingleQuotedString(s.into()) +} + pub fn table_alias(name: impl Into) -> Option { Some(TableAlias { name: Ident::new(name), @@ -426,3 +458,52 @@ pub fn call(function: &str, args: impl IntoIterator) -> Expr { within_group: vec![], }) } + +/// Gets the first index column (mysql calls it a key part) of the first index found in a +/// [`Statement::CreateIndex`], [`Statement::CreateTable`], or [`Statement::AlterTable`]. +pub fn index_column(stmt: Statement) -> Expr { + match stmt { + Statement::CreateIndex(CreateIndex { columns, .. }) => { + columns.first().unwrap().column.expr.clone() + } + Statement::CreateTable(CreateTable { constraints, .. }) => { + match constraints.first().unwrap() { + TableConstraint::Index { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::Unique { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::PrimaryKey { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::FulltextOrSpatial { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + _ => panic!("Expected an index, unique, primary, full text, or spatial constraint (foreign key does not support general key part expressions)"), + } + } + Statement::AlterTable { operations, .. } => match operations.first().unwrap() { + AlterTableOperation::AddConstraint { constraint, .. } => { + match constraint { + TableConstraint::Index { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::Unique { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::PrimaryKey { columns, .. } => { + columns.first().unwrap().column.expr.clone() + } + TableConstraint::FulltextOrSpatial { + columns, + .. + } => columns.first().unwrap().column.expr.clone(), + _ => panic!("Expected an index, unique, primary, full text, or spatial constraint (foreign key does not support general key part expressions)"), + } + } + _ => panic!("Expected a constraint"), + }, + _ => panic!("Expected CREATE INDEX, ALTER TABLE, or CREATE TABLE, got: {stmt:?}"), + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bc0f0efe..8382a534 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -246,6 +246,8 @@ pub enum Token { ShiftLeftVerticalBar, /// `|>> PostgreSQL/Redshift geometrical binary operator (Is strictly above?) VerticalBarShiftRight, + /// `|> BigQuery pipe operator + VerticalBarRightAngleBracket, /// `#>>`, extracts JSON sub-object at the specified path as text HashLongArrow, /// jsonb @> jsonb -> boolean: Test whether left json contains the right json @@ -359,6 +361,7 @@ impl fmt::Display for Token { Token::AmpersandRightAngleBracket => f.write_str("&>"), Token::AmpersandLeftAngleBracketVerticalBar => f.write_str("&<|"), Token::VerticalBarAmpersandRightAngleBracket => f.write_str("|&>"), + Token::VerticalBarRightAngleBracket => f.write_str("|>"), Token::TwoWayArrow => f.write_str("<->"), Token::LeftAngleBracketCaret => f.write_str("<^"), Token::RightAngleBracketCaret => f.write_str(">^"), @@ -895,7 +898,7 @@ impl<'a> Tokenizer<'a> { }; let mut location = state.location(); - while let Some(token) = self.next_token(&mut state)? { + while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { let span = location.span_to(state.location()); buf.push(TokenWithSpan { token, span }); @@ -932,7 +935,11 @@ impl<'a> Tokenizer<'a> { } /// Get the next token or return None - fn next_token(&self, chars: &mut State) -> Result, TokenizerError> { + fn next_token( + &self, + chars: &mut State, + prev_token: Option<&Token>, + ) -> Result, TokenizerError> { match chars.peek() { Some(&ch) => match ch { ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)), @@ -1184,6 +1191,22 @@ impl<'a> Tokenizer<'a> { } // numbers and period '0'..='9' | '.' => { + // special case where if ._ is encountered after a word then that word + // is a table and the _ is the start of the col name. + // if the prev token is not a word, then this is not a valid sql + // word or number. + if ch == '.' && chars.peekable.clone().nth(1) == Some('_') { + if let Some(Token::Word(_)) = prev_token { + chars.next(); + return Ok(Some(Token::Period)); + } + + return self.tokenizer_error( + chars.location(), + "Unexpected character '_'".to_string(), + ); + } + // Some dialects support underscore as number separator // There can only be one at a time and it must be followed by another digit let is_number_separator = |ch: char, next_char: Option| { @@ -1211,17 +1234,29 @@ impl<'a> Tokenizer<'a> { chars.next(); } + // If the dialect supports identifiers that start with a numeric prefix + // and we have now consumed a dot, check if the previous token was a Word. + // If so, what follows is definitely not part of a decimal number and + // we should yield the dot as a dedicated token so compound identifiers + // starting with digits can be parsed correctly. + if s == "." && self.dialect.supports_numeric_prefix() { + if let Some(Token::Word(_)) = prev_token { + return Ok(Some(Token::Period)); + } + } + + // Consume fractional digits. s += &peeking_next_take_while(chars, |ch, next_ch| { ch.is_ascii_digit() || is_number_separator(ch, next_ch) }); - // No number -> Token::Period + // No fraction -> Token::Period if s == "." { return Ok(Some(Token::Period)); } - let mut exponent_part = String::new(); // Parse exponent as number + let mut exponent_part = String::new(); if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') { let mut char_clone = chars.peekable.clone(); exponent_part.push(char_clone.next().unwrap()); @@ -1250,14 +1285,23 @@ impl<'a> Tokenizer<'a> { } } - // mysql dialect supports identifiers that start with a numeric prefix, - // as long as they aren't an exponent number. - if self.dialect.supports_numeric_prefix() && exponent_part.is_empty() { - let word = - peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); + // If the dialect supports identifiers that start with a numeric prefix, + // we need to check if the value is in fact an identifier and must thus + // be tokenized as a word. + if self.dialect.supports_numeric_prefix() { + if exponent_part.is_empty() { + // If it is not a number with an exponent, it may be + // an identifier starting with digits. + let word = + peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); - if !word.is_empty() { - s += word.as_str(); + if !word.is_empty() { + s += word.as_str(); + return Ok(Some(Token::make_word(s.as_str(), None))); + } + } else if prev_token == Some(&Token::Period) { + // If the previous token was a period, thus not belonging to a number, + // the value we have is part of an identifier. return Ok(Some(Token::make_word(s.as_str(), None))); } } @@ -1378,6 +1422,9 @@ impl<'a> Tokenizer<'a> { _ => self.start_binop_opt(chars, "|>", None), } } + Some('>') if self.dialect.supports_pipe_operator() => { + self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket) + } // Bitshift '|' operator _ => self.start_binop(chars, "|", Token::Pipe), } @@ -1704,7 +1751,7 @@ impl<'a> Tokenizer<'a> { (None, Some(tok)) => Ok(Some(tok)), (None, None) => self.tokenizer_error( chars.location(), - format!("Expected a valid binary operator after '{}'", prefix), + format!("Expected a valid binary operator after '{prefix}'"), ), } } @@ -1762,7 +1809,7 @@ impl<'a> Tokenizer<'a> { chars.next(); let mut temp = String::new(); - let end_delimiter = format!("${}$", value); + let end_delimiter = format!("${value}$"); loop { match chars.next() { @@ -2011,8 +2058,13 @@ impl<'a> Tokenizer<'a> { num_consecutive_quotes = 0; if let Some(next) = chars.peek() { - if !self.unescape { - // In no-escape mode, the given query has to be saved completely including backslashes. + if !self.unescape + || (self.dialect.ignores_wildcard_escapes() + && (*next == '%' || *next == '_')) + { + // In no-escape mode, the given query has to be saved completely + // including backslashes. Similarly, with ignore_like_wildcard_escapes, + // the backslash is not stripped. s.push(ch); s.push(*next); chars.next(); // consume next @@ -2350,13 +2402,13 @@ fn take_char_from_hex_digits( location: chars.location(), })?; let digit = next_char.to_digit(16).ok_or_else(|| TokenizerError { - message: format!("Invalid hex digit in escaped unicode string: {}", next_char), + message: format!("Invalid hex digit in escaped unicode string: {next_char}"), location: chars.location(), })?; result = result * 16 + digit; } char::from_u32(result).ok_or_else(|| TokenizerError { - message: format!("Invalid unicode character: {:x}", result), + message: format!("Invalid unicode character: {result:x}"), location: chars.location(), }) } @@ -3452,7 +3504,7 @@ mod tests { } fn check_unescape(s: &str, expected: Option<&str>) { - let s = format!("'{}'", s); + let s = format!("'{s}'"); let mut state = State { peekable: s.chars().peekable(), line: 0, @@ -3585,6 +3637,9 @@ mod tests { (r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#), (r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#), (r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#), + (r#"'\q'"#, r#"\q"#, r#"q"#), + (r#"'\%\_'"#, r#"\%\_"#, r#"%_"#), + (r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#), ] { let tokens = Tokenizer::new(&dialect, sql) .with_unescape(false) @@ -3618,6 +3673,16 @@ mod tests { compare(expected, tokens); } + + // MySQL special case for LIKE escapes + for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] { + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + + let expected = vec![Token::SingleQuotedString(expected.to_string())]; + + compare(expected, tokens); + } } #[test] @@ -3942,4 +4007,67 @@ mod tests { ], ); } + + #[test] + fn test_tokenize_identifiers_numeric_prefix() { + all_dialects_where(|dialect| dialect.supports_numeric_prefix()) + .tokenizes_to("123abc", vec![Token::make_word("123abc", None)]); + + all_dialects_where(|dialect| dialect.supports_numeric_prefix()) + .tokenizes_to("12e34", vec![Token::Number("12e34".to_string(), false)]); + + all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to( + "t.12e34", + vec![ + Token::make_word("t", None), + Token::Period, + Token::make_word("12e34", None), + ], + ); + + all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to( + "t.1two3", + vec![ + Token::make_word("t", None), + Token::Period, + Token::make_word("1two3", None), + ], + ); + } + + #[test] + fn tokenize_period_underscore() { + let sql = String::from("SELECT table._col"); + // a dialect that supports underscores in numeric literals + let dialect = PostgreSqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "table".to_string(), + quote_style: None, + keyword: Keyword::TABLE, + }), + Token::Period, + Token::Word(Word { + value: "_col".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + ]; + + compare(expected, tokens); + + let sql = String::from("SELECT ._123"); + if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() { + panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); + } + + let sql = String::from("SELECT ._abc"); + if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() { + panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); + } + } } diff --git a/tests/pretty_print.rs b/tests/pretty_print.rs new file mode 100644 index 00000000..f5a9d861 --- /dev/null +++ b/tests/pretty_print.rs @@ -0,0 +1,414 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use sqlparser::dialect::GenericDialect; +use sqlparser::parser::Parser; + +fn prettify(sql: &str) -> String { + let ast = Parser::parse_sql(&GenericDialect {}, sql).unwrap(); + format!("{:#}", ast[0]) +} + +#[test] +fn test_pretty_print_select() { + assert_eq!( + prettify("SELECT a, b, c FROM my_table WHERE x = 1 AND y = 2"), + r#" +SELECT + a, + b, + c +FROM + my_table +WHERE + x = 1 AND y = 2 +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_join() { + assert_eq!( + prettify("SELECT a FROM table1 JOIN table2 ON table1.id = table2.id"), + r#" +SELECT + a +FROM + table1 + JOIN table2 ON table1.id = table2.id +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_subquery() { + assert_eq!( + prettify("SELECT * FROM (SELECT a, b FROM my_table) AS subquery"), + r#" +SELECT + * +FROM + ( + SELECT + a, + b + FROM + my_table + ) AS subquery +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_union() { + assert_eq!( + prettify("SELECT a FROM table1 UNION SELECT b FROM table2"), + r#" +SELECT + a +FROM + table1 +UNION +SELECT + b +FROM + table2 +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_group_by() { + assert_eq!( + prettify("SELECT a, COUNT(*) FROM my_table GROUP BY a HAVING COUNT(*) > 1"), + r#" +SELECT + a, + COUNT(*) +FROM + my_table +GROUP BY + a +HAVING + COUNT(*) > 1 +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_cte() { + assert_eq!( + prettify("WITH cte AS (SELECT a, b FROM my_table) SELECT * FROM cte"), + r#" +WITH cte AS ( + SELECT + a, + b + FROM + my_table +) +SELECT + * +FROM + cte +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_case_when() { + assert_eq!( + prettify("SELECT CASE WHEN x > 0 THEN 'positive' WHEN x < 0 THEN 'negative' ELSE 'zero' END FROM my_table"), + r#" +SELECT + CASE + WHEN x > 0 THEN + 'positive' + WHEN x < 0 THEN + 'negative' + ELSE + 'zero' + END +FROM + my_table +"#.trim() + ); +} + +#[test] +fn test_pretty_print_window_function() { + assert_eq!( + prettify("SELECT id, value, ROW_NUMBER() OVER (PARTITION BY category ORDER BY value DESC) as rank FROM my_table"), + r#" +SELECT + id, + value, + ROW_NUMBER() OVER ( + PARTITION BY category + ORDER BY value DESC + ) AS rank +FROM + my_table +"#.trim() + ); +} + +#[test] +fn test_pretty_print_multiline_string() { + assert_eq!( + prettify("SELECT 'multiline\nstring' AS str"), + r#" +SELECT + 'multiline +string' AS str +"# + .trim(), + "A literal string with a newline should be kept as is. The contents of the string should not be indented." + ); +} + +#[test] +fn test_pretty_print_insert_values() { + assert_eq!( + prettify("INSERT INTO my_table (a, b, c) VALUES (1, 2, 3), (4, 5, 6)"), + r#" +INSERT INTO my_table (a, b, c) +VALUES + (1, 2, 3), + (4, 5, 6) +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_insert_select() { + assert_eq!( + prettify("INSERT INTO my_table (a, b) SELECT x, y FROM source_table RETURNING a AS id"), + r#" +INSERT INTO my_table (a, b) +SELECT + x, + y +FROM + source_table +RETURNING + a AS id +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_update() { + assert_eq!( + prettify("UPDATE my_table SET a = 1, b = 2 WHERE x > 0 RETURNING id, name"), + r#" +UPDATE my_table +SET + a = 1, + b = 2 +WHERE + x > 0 +RETURNING + id, + name +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_delete() { + assert_eq!( + prettify("DELETE FROM my_table WHERE x > 0 RETURNING id, name"), + r#" +DELETE FROM + my_table +WHERE + x > 0 +RETURNING + id, + name +"# + .trim() + ); + + assert_eq!( + prettify("DELETE table1, table2"), + r#" +DELETE + table1, + table2 +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_create_table() { + assert_eq!( + prettify("CREATE TABLE my_table (id INT PRIMARY KEY, name VARCHAR(255) NOT NULL, CONSTRAINT fk_other FOREIGN KEY (id) REFERENCES other_table(id))"), + r#" +CREATE TABLE my_table ( + id INT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + CONSTRAINT fk_other FOREIGN KEY (id) REFERENCES other_table(id) +) +"# + .trim() + ); +} + +#[test] +fn test_pretty_print_create_view() { + assert_eq!( + prettify("CREATE VIEW my_view AS SELECT a, b FROM my_table WHERE x > 0"), + r#" +CREATE VIEW my_view AS +SELECT + a, + b +FROM + my_table +WHERE + x > 0 +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_create_function() { + assert_eq!( + prettify("CREATE FUNCTION my_func() RETURNS INT BEGIN SELECT COUNT(*) INTO @count FROM my_table; RETURN @count; END"), + r#" +CREATE FUNCTION my_func() RETURNS INT +BEGIN + SELECT COUNT(*) INTO @count FROM my_table; + RETURN @count; +END +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_json_table() { + assert_eq!( + prettify("SELECT * FROM JSON_TABLE(@json, '$[*]' COLUMNS (id INT PATH '$.id', name VARCHAR(255) PATH '$.name')) AS jt"), + r#" +SELECT + * +FROM + JSON_TABLE( + @json, + '$[*]' COLUMNS ( + id INT PATH '$.id', + name VARCHAR(255) PATH '$.name' + ) + ) AS jt +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_transaction_blocks() { + assert_eq!( + prettify("BEGIN; UPDATE my_table SET x = 1; COMMIT;"), + r#" +BEGIN; +UPDATE my_table SET x = 1; +COMMIT; +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_control_flow() { + assert_eq!( + prettify("IF x > 0 THEN SELECT 'positive'; ELSE SELECT 'negative'; END IF;"), + r#" +IF x > 0 THEN + SELECT 'positive'; +ELSE + SELECT 'negative'; +END IF; +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_merge() { + assert_eq!( + prettify("MERGE INTO target_table t USING source_table s ON t.id = s.id WHEN MATCHED THEN UPDATE SET t.value = s.value WHEN NOT MATCHED THEN INSERT (id, value) VALUES (s.id, s.value)"), + r#" +MERGE INTO target_table t +USING source_table s ON t.id = s.id +WHEN MATCHED THEN + UPDATE SET t.value = s.value +WHEN NOT MATCHED THEN + INSERT (id, value) VALUES (s.id, s.value) +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_create_index() { + assert_eq!( + prettify("CREATE INDEX idx_name ON my_table (column1, column2)"), + r#" +CREATE INDEX idx_name +ON my_table (column1, column2) +"# + .trim() + ); +} + +#[test] +#[ignore = "https://github.com/apache/datafusion-sqlparser-rs/issues/1850"] +fn test_pretty_print_explain() { + assert_eq!( + prettify("EXPLAIN ANALYZE SELECT * FROM my_table WHERE x > 0"), + r#" +EXPLAIN ANALYZE +SELECT + * +FROM + my_table +WHERE + x > 0 +"# + .trim() + ); +} diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3037d4ae..2ba54d3e 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -261,10 +261,10 @@ fn parse_at_at_identifier() { #[test] fn parse_begin() { - let sql = r#"BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2; END"#; + let sql = r#"BEGIN SELECT 1; EXCEPTION WHEN ERROR THEN SELECT 2; RAISE USING MESSAGE = FORMAT('ERR: %s', 'Bad'); END"#; let Statement::StartTransaction { statements, - exception_statements, + exception, has_end_keyword, .. } = bigquery().verified_stmt(sql) @@ -272,7 +272,10 @@ fn parse_begin() { unreachable!(); }; assert_eq!(1, statements.len()); - assert_eq!(1, exception_statements.unwrap().len()); + assert!(exception.is_some()); + + let exception = exception.unwrap(); + assert_eq!(1, exception.len()); assert!(has_end_keyword); bigquery().verified_stmt( @@ -352,14 +355,16 @@ fn parse_create_view_with_options() { ViewColumnDef { name: Ident::new("age"), data_type: None, - options: Some(vec![ColumnOption::Options(vec![SqlOption::KeyValue { - key: Ident::new("description"), - value: Expr::Value( - Value::DoubleQuotedString("field age".to_string()).with_span( - Span::new(Location::new(1, 42), Location::new(1, 52)) - ) - ), - }])]), + options: Some(ColumnOptions::CommaSeparated(vec![ColumnOption::Options( + vec![SqlOption::KeyValue { + key: Ident::new("description"), + value: Expr::Value( + Value::DoubleQuotedString("field age".to_string()).with_span( + Span::new(Location::new(1, 42), Location::new(1, 52)) + ) + ), + }] + )])), }, ], columns @@ -484,7 +489,7 @@ fn parse_create_table_with_options() { columns, partition_by, cluster_by, - options, + table_options, .. }) => { assert_eq!( @@ -536,10 +541,10 @@ fn parse_create_table_with_options() { ( Some(Box::new(Expr::Identifier(Ident::new("_PARTITIONDATE")))), Some(WrappedCollection::NoWrapping(vec![ - Ident::new("userid"), - Ident::new("age"), + Expr::Identifier(Ident::new("userid")), + Expr::Identifier(Ident::new("age")), ])), - Some(vec![ + CreateTableOptions::Options(vec![ SqlOption::KeyValue { key: Ident::new("partition_expiration_days"), value: Expr::Value( @@ -561,7 +566,7 @@ fn parse_create_table_with_options() { }, ]) ), - (partition_by, cluster_by, options) + (partition_by, cluster_by, table_options) ) } _ => unreachable!(), @@ -601,11 +606,13 @@ fn parse_nested_data_types() { field_name: Some("a".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( Box::new(DataType::Int64,) - )) + )), + options: None, }, StructField { field_name: Some("b".into()), - field_type: DataType::Bytes(Some(42)) + field_type: DataType::Bytes(Some(42)), + options: None, }, ], StructBracketKind::AngleBrackets @@ -619,6 +626,7 @@ fn parse_nested_data_types() { vec![StructField { field_name: None, field_type: DataType::Int64, + options: None, }], StructBracketKind::AngleBrackets ), @@ -632,35 +640,6 @@ fn parse_nested_data_types() { } } -#[test] -fn parse_invalid_brackets() { - let sql = "SELECT STRUCT>(NULL)"; - assert_eq!( - bigquery_and_generic() - .parse_sql_statements(sql) - .unwrap_err(), - ParserError::ParserError("unmatched > in STRUCT literal".to_string()) - ); - - let sql = "SELECT STRUCT>>(NULL)"; - assert_eq!( - bigquery_and_generic() - .parse_sql_statements(sql) - .unwrap_err(), - ParserError::ParserError("Expected: (, found: >".to_string()) - ); - - let sql = "CREATE TABLE table (x STRUCT>>)"; - assert_eq!( - bigquery_and_generic() - .parse_sql_statements(sql) - .unwrap_err(), - ParserError::ParserError( - "Expected: ',' or ')' after column definition, found: >".to_string() - ) - ); -} - #[test] fn parse_tuple_struct_literal() { // tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax @@ -771,6 +750,7 @@ fn parse_typed_struct_syntax_bigquery() { fields: vec![StructField { field_name: None, field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -799,7 +779,8 @@ fn parse_typed_struct_syntax_bigquery() { quote_style: None, span: Span::empty(), }), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }, StructField { field_name: Some(Ident { @@ -807,7 +788,8 @@ fn parse_typed_struct_syntax_bigquery() { quote_style: None, span: Span::empty(), }), - field_type: DataType::String(None) + field_type: DataType::String(None), + options: None, }, ] }, @@ -825,17 +807,20 @@ fn parse_typed_struct_syntax_bigquery() { field_name: Some("arr".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( DataType::Float64 - ))) + ))), + options: None, }, StructField { field_name: Some("str".into()), field_type: DataType::Struct( vec![StructField { field_name: None, - field_type: DataType::Bool + field_type: DataType::Bool, + options: None, }], StructBracketKind::AngleBrackets - ) + ), + options: None, }, ] }, @@ -858,13 +843,15 @@ fn parse_typed_struct_syntax_bigquery() { field_type: DataType::Struct( Default::default(), StructBracketKind::AngleBrackets - ) + ), + options: None, }, StructField { field_name: Some("y".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( DataType::Struct(Default::default(), StructBracketKind::AngleBrackets) - ))) + ))), + options: None, }, ] }, @@ -879,7 +866,8 @@ fn parse_typed_struct_syntax_bigquery() { values: vec![Expr::Value(Value::Boolean(true).with_empty_span())], fields: vec![StructField { field_name: None, - field_type: DataType::Bool + field_type: DataType::Bool, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -891,7 +879,8 @@ fn parse_typed_struct_syntax_bigquery() { )], fields: vec![StructField { field_name: None, - field_type: DataType::Bytes(Some(42)) + field_type: DataType::Bytes(Some(42)), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -907,7 +896,8 @@ fn parse_typed_struct_syntax_bigquery() { )], fields: vec![StructField { field_name: None, - field_type: DataType::Date + field_type: DataType::Date, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -916,11 +906,15 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Datetime(None), - value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()), + span: Span::empty(), + }, }], fields: vec![StructField { field_name: None, - field_type: DataType::Datetime(None) + field_type: DataType::Datetime(None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -930,7 +924,8 @@ fn parse_typed_struct_syntax_bigquery() { values: vec![Expr::value(number("5.0"))], fields: vec![StructField { field_name: None, - field_type: DataType::Float64 + field_type: DataType::Float64, + options: None, }] }, expr_from_projection(&select.projection[2]) @@ -940,7 +935,8 @@ fn parse_typed_struct_syntax_bigquery() { values: vec![Expr::value(number("1"))], fields: vec![StructField { field_name: None, - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[3]) @@ -962,7 +958,8 @@ fn parse_typed_struct_syntax_bigquery() { })], fields: vec![StructField { field_name: None, - field_type: DataType::Interval + field_type: DataType::Interval, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -971,13 +968,17 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::JSON, - value: Value::SingleQuotedString( - r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.into() - ) + value: ValueWithSpan { + value: Value::SingleQuotedString( + r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.into() + ), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::JSON + field_type: DataType::JSON, + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -993,7 +994,8 @@ fn parse_typed_struct_syntax_bigquery() { )], fields: vec![StructField { field_name: None, - field_type: DataType::String(Some(42)) + field_type: DataType::String(Some(42)), + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1002,11 +1004,17 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Timestamp(None, TimezoneInfo::None), - value: Value::SingleQuotedString("2008-12-25 15:30:00 America/Los_Angeles".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString( + "2008-12-25 15:30:00 America/Los_Angeles".into() + ), + span: Span::empty(), + }, }], fields: vec![StructField { field_name: None, - field_type: DataType::Timestamp(None, TimezoneInfo::None) + field_type: DataType::Timestamp(None, TimezoneInfo::None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1016,11 +1024,15 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Time(None, TimezoneInfo::None), - value: Value::SingleQuotedString("15:30:00".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("15:30:00".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Time(None, TimezoneInfo::None) + field_type: DataType::Time(None, TimezoneInfo::None), + options: None, }] }, expr_from_projection(&select.projection[2]) @@ -1033,11 +1045,15 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Numeric(ExactNumberInfo::None), - value: Value::SingleQuotedString("1".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Numeric(ExactNumberInfo::None) + field_type: DataType::Numeric(ExactNumberInfo::None), + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1046,11 +1062,15 @@ fn parse_typed_struct_syntax_bigquery() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString("1".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::BigNumeric(ExactNumberInfo::None) + field_type: DataType::BigNumeric(ExactNumberInfo::None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1067,10 +1087,12 @@ fn parse_typed_struct_syntax_bigquery() { StructField { field_name: Some("key".into()), field_type: DataType::Int64, + options: None, }, StructField { field_name: Some("value".into()), field_type: DataType::Int64, + options: None, }, ] }, @@ -1092,6 +1114,7 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { fields: vec![StructField { field_name: None, field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1120,7 +1143,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { quote_style: None, span: Span::empty(), }), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }, StructField { field_name: Some(Ident { @@ -1128,7 +1152,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { quote_style: None, span: Span::empty(), }), - field_type: DataType::String(None) + field_type: DataType::String(None), + options: None, }, ] }, @@ -1151,13 +1176,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { field_type: DataType::Struct( Default::default(), StructBracketKind::AngleBrackets - ) + ), + options: None, }, StructField { field_name: Some("y".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( DataType::Struct(Default::default(), StructBracketKind::AngleBrackets) - ))) + ))), + options: None, }, ] }, @@ -1172,7 +1199,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { values: vec![Expr::Value(Value::Boolean(true).with_empty_span())], fields: vec![StructField { field_name: None, - field_type: DataType::Bool + field_type: DataType::Bool, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1184,7 +1212,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { )], fields: vec![StructField { field_name: None, - field_type: DataType::Bytes(Some(42)) + field_type: DataType::Bytes(Some(42)), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1200,7 +1229,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { )], fields: vec![StructField { field_name: None, - field_type: DataType::Date + field_type: DataType::Date, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1209,11 +1239,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Datetime(None), - value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Datetime(None) + field_type: DataType::Datetime(None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1223,7 +1257,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { values: vec![Expr::value(number("5.0"))], fields: vec![StructField { field_name: None, - field_type: DataType::Float64 + field_type: DataType::Float64, + options: None, }] }, expr_from_projection(&select.projection[2]) @@ -1233,7 +1268,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { values: vec![Expr::value(number("1"))], fields: vec![StructField { field_name: None, - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[3]) @@ -1255,7 +1291,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { })], fields: vec![StructField { field_name: None, - field_type: DataType::Interval + field_type: DataType::Interval, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1264,13 +1301,17 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::JSON, - value: Value::SingleQuotedString( - r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.into() - ) + value: ValueWithSpan { + value: Value::SingleQuotedString( + r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.into() + ), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::JSON + field_type: DataType::JSON, + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1286,7 +1327,8 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { )], fields: vec![StructField { field_name: None, - field_type: DataType::String(Some(42)) + field_type: DataType::String(Some(42)), + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1295,11 +1337,17 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Timestamp(None, TimezoneInfo::None), - value: Value::SingleQuotedString("2008-12-25 15:30:00 America/Los_Angeles".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString( + "2008-12-25 15:30:00 America/Los_Angeles".into() + ), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Timestamp(None, TimezoneInfo::None) + field_type: DataType::Timestamp(None, TimezoneInfo::None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1309,11 +1357,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Time(None, TimezoneInfo::None), - value: Value::SingleQuotedString("15:30:00".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("15:30:00".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Time(None, TimezoneInfo::None) + field_type: DataType::Time(None, TimezoneInfo::None), + options: None, }] }, expr_from_projection(&select.projection[2]) @@ -1326,11 +1378,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::Numeric(ExactNumberInfo::None), - value: Value::SingleQuotedString("1".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::Numeric(ExactNumberInfo::None) + field_type: DataType::Numeric(ExactNumberInfo::None), + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1339,11 +1395,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { &Expr::Struct { values: vec![Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString("1".into()) + value: ValueWithSpan { + value: Value::SingleQuotedString("1".into()), + span: Span::empty(), + } }], fields: vec![StructField { field_name: None, - field_type: DataType::BigNumeric(ExactNumberInfo::None) + field_type: DataType::BigNumeric(ExactNumberInfo::None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1360,7 +1420,8 @@ fn parse_typed_struct_with_field_name_bigquery() { values: vec![Expr::value(number("5"))], fields: vec![StructField { field_name: Some(Ident::from("x")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1372,7 +1433,8 @@ fn parse_typed_struct_with_field_name_bigquery() { )], fields: vec![StructField { field_name: Some(Ident::from("y")), - field_type: DataType::String(None) + field_type: DataType::String(None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1387,11 +1449,13 @@ fn parse_typed_struct_with_field_name_bigquery() { fields: vec![ StructField { field_name: Some(Ident::from("x")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }, StructField { field_name: Some(Ident::from("y")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, } ] }, @@ -1409,7 +1473,8 @@ fn parse_typed_struct_with_field_name_bigquery_and_generic() { values: vec![Expr::value(number("5"))], fields: vec![StructField { field_name: Some(Ident::from("x")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }] }, expr_from_projection(&select.projection[0]) @@ -1421,7 +1486,8 @@ fn parse_typed_struct_with_field_name_bigquery_and_generic() { )], fields: vec![StructField { field_name: Some(Ident::from("y")), - field_type: DataType::String(None) + field_type: DataType::String(None), + options: None, }] }, expr_from_projection(&select.projection[1]) @@ -1436,11 +1502,13 @@ fn parse_typed_struct_with_field_name_bigquery_and_generic() { fields: vec![ StructField { field_name: Some(Ident::from("x")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, }, StructField { field_name: Some(Ident::from("y")), - field_type: DataType::Int64 + field_type: DataType::Int64, + options: None, } ] }, @@ -1735,6 +1803,7 @@ fn parse_merge() { }, ], }; + match bigquery_and_generic().verified_stmt(sql) { Statement::Merge { into, @@ -1742,6 +1811,7 @@ fn parse_merge() { source, on, clauses, + .. } => { assert!(!into); assert_eq!( @@ -2132,6 +2202,7 @@ fn test_bigquery_create_function() { assert_eq!( stmt, Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace: true, temporary: true, if_not_exists: false, @@ -2310,16 +2381,46 @@ fn bigquery_select_expr_star() { #[test] fn test_select_as_struct() { - bigquery().verified_only_select("SELECT * FROM (SELECT AS VALUE STRUCT(123 AS a, false AS b))"); + for (sql, parse_to) in [ + ( + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ( + "SELECT * FROM (SELECT DISTINCT AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT DISTINCT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ( + "SELECT * FROM (SELECT ALL AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ] { + bigquery().one_statement_parses_to(sql, parse_to); + } + let select = bigquery().verified_only_select("SELECT AS STRUCT 1 AS a, 2 AS b"); assert_eq!(Some(ValueTableMode::AsStruct), select.value_table_mode); } #[test] fn test_select_as_value() { - bigquery().verified_only_select( - "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", - ); + for (sql, parse_to) in [ + ( + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ( + "SELECT * FROM (SELECT DISTINCT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT DISTINCT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ( + "SELECT * FROM (SELECT ALL AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ] { + bigquery().one_statement_parses_to(sql, parse_to); + } + let select = bigquery().verified_only_select("SELECT AS VALUE STRUCT(1 AS a, 2 AS b) AS xyz"); assert_eq!(Some(ValueTableMode::AsValue), select.value_table_mode); } @@ -2332,7 +2433,10 @@ fn test_triple_quote_typed_strings() { assert_eq!( Expr::TypedString { data_type: DataType::JSON, - value: Value::TripleDoubleQuotedString(r#"{"foo":"bar's"}"#.into()) + value: ValueWithSpan { + value: Value::TripleDoubleQuotedString(r#"{"foo":"bar's"}"#.into()), + span: Span::empty(), + } }, expr ); @@ -2374,3 +2478,91 @@ fn test_any_type() { fn test_any_type_dont_break_custom_type() { bigquery_and_generic().verified_stmt("CREATE TABLE foo (x ANY)"); } + +#[test] +fn test_struct_field_options() { + bigquery().verified_stmt(concat!( + "CREATE TABLE my_table (", + "f0 STRUCT, ", + "f1 STRUCT<", + "a STRING OPTIONS(description = 'This is a string', type = 'string'), ", + "b INT64", + "> OPTIONS(description = 'This is a struct field')", + ")", + )); +} + +#[test] +fn test_struct_trailing_and_nested_bracket() { + bigquery().verified_stmt(concat!( + "CREATE TABLE my_table (", + "f0 STRING, ", + "f1 STRUCT>, ", + "f2 STRING", + ")", + )); + + // More complex nested structs + bigquery().verified_stmt(concat!( + "CREATE TABLE my_table (", + "f0 STRING, ", + "f1 STRUCT>>, ", + "f2 STRUCT>>>, ", + "f3 STRUCT>", + ")", + )); + + // Bad case with missing closing bracket + assert_eq!( + ParserError::ParserError("Expected: >, found: )".to_owned()), + bigquery() + .parse_sql_statements("CREATE TABLE my_table(f1 STRUCT after parsing data type STRUCT)".to_owned() + ), + bigquery() + .parse_sql_statements("CREATE TABLE my_table(f1 STRUCT>)") + .unwrap_err() + ); + + // Base case with redundant closing bracket in nested struct + assert_eq!( + ParserError::ParserError( + "Expected: ',' or ')' after column definition, found: >".to_owned() + ), + bigquery() + .parse_sql_statements("CREATE TABLE my_table(f1 STRUCT>>, c INT64)") + .unwrap_err() + ); + + let sql = "SELECT STRUCT>(NULL)"; + assert_eq!( + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), + ParserError::ParserError("unmatched > in STRUCT literal".to_string()) + ); + + let sql = "SELECT STRUCT>>(NULL)"; + assert_eq!( + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), + ParserError::ParserError("Expected: (, found: >".to_string()) + ); + + let sql = "CREATE TABLE table (x STRUCT>>)"; + assert_eq!( + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), + ParserError::ParserError( + "Expected: ',' or ')' after column definition, found: >".to_string() + ) + ); +} diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 72a64a48..9e5b6ce8 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -28,7 +28,7 @@ use test_utils::*; use sqlparser::ast::Expr::{BinaryOp, Identifier}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; -use sqlparser::ast::Value::Number; +use sqlparser::ast::Value::Boolean; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; @@ -60,6 +60,7 @@ fn parse_map_access_expr() { ), })], })], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("foos")])), @@ -219,10 +220,14 @@ fn parse_delimited_identifiers() { #[test] fn parse_create_table() { - clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#); - clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#); + clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY ("x")"#); + clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY "x""#); clickhouse().verified_stmt( - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#, + r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#, + ); + clickhouse().one_statement_parses_to( + "CREATE TABLE x (a int) ENGINE = MergeTree() ORDER BY a", + "CREATE TABLE x (a INT) ENGINE = MergeTree ORDER BY a", ); } @@ -589,7 +594,7 @@ fn parse_clickhouse_data_types() { #[test] fn parse_create_table_with_nullable() { - let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#; + let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE = MergeTree ORDER BY (`k`)"#; // ClickHouse has a case-sensitive definition of data type, but canonical representation is not let canonical_sql = sql.replace("String", "STRING"); @@ -669,11 +674,13 @@ fn parse_create_table_with_nested_data_types() { DataType::Tuple(vec![ StructField { field_name: None, - field_type: DataType::FixedString(128) + field_type: DataType::FixedString(128), + options: None, }, StructField { field_name: None, - field_type: DataType::Int128 + field_type: DataType::Int128, + options: None, } ]) ))), @@ -685,12 +692,14 @@ fn parse_create_table_with_nested_data_types() { StructField { field_name: Some("a".into()), field_type: DataType::Datetime64(9, None), + options: None, }, StructField { field_name: Some("b".into()), field_type: DataType::Array(ArrayElemTypeDef::Parenthesis( Box::new(DataType::Uuid) - )) + )), + options: None, }, ]), options: vec![], @@ -714,14 +723,14 @@ fn parse_create_table_with_nested_data_types() { fn parse_create_table_with_primary_key() { match clickhouse_and_generic().verified_stmt(concat!( r#"CREATE TABLE db.table (`i` INT, `k` INT)"#, - " ENGINE=SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')", + " ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')", " PRIMARY KEY tuple(i)", " ORDER BY tuple(i)", )) { Statement::CreateTable(CreateTable { name, columns, - engine, + table_options, primary_key, order_by, .. @@ -742,16 +751,23 @@ fn parse_create_table_with_primary_key() { ], columns ); - assert_eq!( - engine, - Some(TableEngine { - name: "SharedMergeTree".to_string(), - parameters: Some(vec![ + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("SharedMergeTree")), + values: vec![ Ident::with_quote('\'', "/clickhouse/tables/{uuid}/{shard}"), Ident::with_quote('\'', "{replica}"), - ]), - }) - ); + ] + } + ))); + fn assert_function(actual: &Function, name: &str, arg: &str) -> bool { assert_eq!(actual.name, ObjectName::from(vec![Ident::new(name)])); assert_eq!( @@ -798,7 +814,7 @@ fn parse_create_table_with_variant_default_expressions() { " b DATETIME EPHEMERAL now(),", " c DATETIME EPHEMERAL,", " d STRING ALIAS toString(c)", - ") ENGINE=MergeTree" + ") ENGINE = MergeTree" ); match clickhouse_and_generic().verified_stmt(sql) { Statement::CreateTable(CreateTable { columns, .. }) => { @@ -903,7 +919,7 @@ fn parse_create_view_with_fields_data_types() { }]), vec![] )), - options: None + options: None, }, ViewColumnDef { name: "f".into(), @@ -915,7 +931,7 @@ fn parse_create_view_with_fields_data_types() { }]), vec![] )), - options: None + options: None, }, ] ); @@ -944,42 +960,113 @@ fn parse_limit_by() { clickhouse_and_generic().verified_stmt( r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset, toStartOfDay(created_at)"#, ); + clickhouse_and_generic().parse_sql_statements( + r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC BY asset, toStartOfDay(created_at)"#, + ).expect_err("BY without LIMIT"); + clickhouse_and_generic() + .parse_sql_statements("SELECT * FROM T OFFSET 5 BY foo") + .expect_err("BY with OFFSET but without LIMIT"); } #[test] fn parse_settings_in_query() { - match clickhouse_and_generic() - .verified_stmt(r#"SELECT * FROM t SETTINGS max_threads = 1, max_block_size = 10000"#) - { - Statement::Query(query) => { - assert_eq!( - query.settings, - Some(vec![ - Setting { - key: Ident::new("max_threads"), - value: Number("1".parse().unwrap(), false) - }, - Setting { - key: Ident::new("max_block_size"), - value: Number("10000".parse().unwrap(), false) - }, - ]) - ); + fn check_settings(sql: &str, expected: Vec) { + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + assert_eq!(q.settings, Some(expected)); + } + _ => unreachable!(), } - _ => unreachable!(), + } + + for (sql, expected_settings) in [ + ( + r#"SELECT * FROM t SETTINGS max_threads = 1, max_block_size = 10000"#, + vec![ + Setting { + key: Ident::new("max_threads"), + value: Expr::value(number("1")), + }, + Setting { + key: Ident::new("max_block_size"), + value: Expr::value(number("10000")), + }, + ], + ), + ( + r#"SELECT * FROM t SETTINGS additional_table_filters = {'table_1': 'x != 2'}"#, + vec![Setting { + key: Ident::new("additional_table_filters"), + value: Expr::Dictionary(vec![DictionaryField { + key: Ident::with_quote('\'', "table_1"), + value: Expr::value(single_quoted_string("x != 2")).into(), + }]), + }], + ), + ( + r#"SELECT * FROM t SETTINGS additional_result_filter = 'x != 2', query_plan_optimize_lazy_materialization = false"#, + vec![ + Setting { + key: Ident::new("additional_result_filter"), + value: Expr::value(single_quoted_string("x != 2")), + }, + Setting { + key: Ident::new("query_plan_optimize_lazy_materialization"), + value: Expr::value(Boolean(false)), + }, + ], + ), + ] { + check_settings(sql, expected_settings); } let invalid_cases = vec![ - "SELECT * FROM t SETTINGS a", - "SELECT * FROM t SETTINGS a=", - "SELECT * FROM t SETTINGS a=1, b", - "SELECT * FROM t SETTINGS a=1, b=", - "SELECT * FROM t SETTINGS a=1, b=c", + ("SELECT * FROM t SETTINGS a", "Expected: =, found: EOF"), + ( + "SELECT * FROM t SETTINGS a=", + "Expected: an expression, found: EOF", + ), + ("SELECT * FROM t SETTINGS a=1, b", "Expected: =, found: EOF"), + ( + "SELECT * FROM t SETTINGS a=1, b=", + "Expected: an expression, found: EOF", + ), + ( + "SELECT * FROM t SETTINGS a = {", + "Expected: identifier, found: EOF", + ), + ( + "SELECT * FROM t SETTINGS a = {'b'", + "Expected: :, found: EOF", + ), + ( + "SELECT * FROM t SETTINGS a = {'b': ", + "Expected: an expression, found: EOF", + ), + ( + "SELECT * FROM t SETTINGS a = {'b': 'c',}", + "Expected: identifier, found: }", + ), + ( + "SELECT * FROM t SETTINGS a = {'b': 'c', 'd'}", + "Expected: :, found: }", + ), + ( + "SELECT * FROM t SETTINGS a = {'b': 'c', 'd': }", + "Expected: an expression, found: }", + ), + ( + "SELECT * FROM t SETTINGS a = {ANY(b)}", + "Expected: :, found: (", + ), ]; - for sql in invalid_cases { - clickhouse_and_generic() - .parse_sql_statements(sql) - .expect_err("Expected: SETTINGS key = value, found: "); + for (sql, error_msg) in invalid_cases { + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), + ParserError(error_msg.to_string()) + ); } } #[test] @@ -1107,7 +1194,14 @@ fn parse_select_order_by_with_fill_interpolate() { }, select.order_by.expect("ORDER BY expected") ); - assert_eq!(Some(Expr::value(number("2"))), select.limit); + assert_eq!( + select.limit_clause, + Some(LimitClause::LimitOffset { + limit: Some(Expr::value(number("2"))), + offset: None, + limit_by: vec![] + }) + ); } #[test] @@ -1321,7 +1415,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - clickhouse().verified_stmt(&format!("USE {}", object_name)), + clickhouse().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -1329,7 +1423,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - clickhouse().verified_stmt(&format!("USE {0}{1}{0}", quote, object_name)), + clickhouse().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -1343,7 +1437,7 @@ fn parse_use() { fn test_query_with_format_clause() { let format_options = vec!["TabSeparated", "JSONCompact", "NULL"]; for format in &format_options { - let sql = format!("SELECT * FROM t FORMAT {}", format); + let sql = format!("SELECT * FROM t FORMAT {format}"); match clickhouse_and_generic().verified_stmt(&sql) { Statement::Query(query) => { if *format == "NULL" { @@ -1526,11 +1620,11 @@ fn parse_select_table_function_settings() { settings: Some(vec![ Setting { key: "s0".into(), - value: Value::Number("3".parse().unwrap(), false), + value: Expr::value(number("3")), }, Setting { key: "s1".into(), - value: Value::SingleQuotedString("s".into()), + value: Expr::value(single_quoted_string("s")), }, ]), }, @@ -1551,11 +1645,11 @@ fn parse_select_table_function_settings() { settings: Some(vec![ Setting { key: "s0".into(), - value: Value::Number("3".parse().unwrap(), false), + value: Expr::value(number("3")), }, Setting { key: "s1".into(), - value: Value::SingleQuotedString("s".into()), + value: Expr::value(single_quoted_string("s")), }, ]), }, @@ -1565,7 +1659,6 @@ fn parse_select_table_function_settings() { "SELECT * FROM t(SETTINGS a=)", "SELECT * FROM t(SETTINGS a=1, b)", "SELECT * FROM t(SETTINGS a=1, b=)", - "SELECT * FROM t(SETTINGS a=1, b=c)", ]; for sql in invalid_cases { clickhouse_and_generic() diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0a68d31e..15144479 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -40,8 +40,9 @@ use sqlparser::parser::{Parser, ParserError, ParserOptions}; use sqlparser::tokenizer::Tokenizer; use sqlparser::tokenizer::{Location, Span}; use test_utils::{ - all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, - join, number, only, table, table_alias, table_from_name, TestedDialects, + all_dialects, all_dialects_where, all_dialects_with_options, alter_table_op, assert_eq_vec, + call, expr_from_projection, join, number, only, table, table_alias, table_from_name, + TestedDialects, }; #[macro_use] @@ -459,6 +460,7 @@ fn parse_update_set_from() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("id"))), ], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("t1")])), @@ -483,14 +485,13 @@ fn parse_update_set_from() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -900,7 +901,12 @@ fn parse_simple_select() { assert!(select.distinct.is_none()); assert_eq!(3, select.projection.len()); let select = verified_query(sql); - assert_eq!(Some(Expr::value(number("5"))), select.limit); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::value(number("5"))), + offset: None, + limit_by: vec![], + }; + assert_eq!(Some(expected_limit_clause), select.limit_clause); } #[test] @@ -908,14 +914,31 @@ fn parse_limit() { verified_stmt("SELECT * FROM user LIMIT 1"); } +#[test] +fn parse_invalid_limit_by() { + all_dialects() + .parse_sql_statements("SELECT * FROM user BY name") + .expect_err("BY without LIMIT"); +} + #[test] fn parse_limit_is_not_an_alias() { // In dialects supporting LIMIT it shouldn't be parsed as a table alias let ast = verified_query("SELECT id FROM customer LIMIT 1"); - assert_eq!(Some(Expr::value(number("1"))), ast.limit); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::value(number("1"))), + offset: None, + limit_by: vec![], + }; + assert_eq!(Some(expected_limit_clause), ast.limit_clause); let ast = verified_query("SELECT 1 LIMIT 5"); - assert_eq!(Some(Expr::value(number("5"))), ast.limit); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::value(number("5"))), + offset: None, + limit_by: vec![], + }; + assert_eq!(Some(expected_limit_clause), ast.limit_clause); } #[test] @@ -1211,7 +1234,6 @@ fn parse_select_expr_star() { "SELECT 2. * 3 FROM T", ); dialects.verified_only_select("SELECT myfunc().* FROM T"); - dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T"); // Invalid let res = dialects.parse_sql_statements("SELECT foo.*.* FROM T"); @@ -1219,6 +1241,11 @@ fn parse_select_expr_star() { ParserError::ParserError("Expected: end of statement, found: .".to_string()), res.unwrap_err() ); + + let dialects = all_dialects_where(|d| { + d.supports_select_expr_star() && d.supports_select_wildcard_except() + }); + dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T"); } #[test] @@ -2019,7 +2046,7 @@ fn parse_ilike() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some('^'.to_string()), + escape_char: Some(Value::SingleQuotedString('^'.to_string())), any: false, }, select.selection.unwrap() @@ -2083,7 +2110,7 @@ fn parse_like() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some('^'.to_string()), + escape_char: Some(Value::SingleQuotedString('^'.to_string())), any: false, }, select.selection.unwrap() @@ -2146,7 +2173,24 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some('^'.to_string()), + escape_char: Some(Value::SingleQuotedString('^'.to_string())), + }, + select.selection.unwrap() + ); + + let sql = &format!( + "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::SimilarTo { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value( + (Value::SingleQuotedString("%a".to_string())).with_empty_span() + )), + escape_char: Some(Value::Null), }, select.selection.unwrap() ); @@ -2164,7 +2208,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some('^'.to_string()), + escape_char: Some(Value::SingleQuotedString('^'.to_string())), })), select.selection.unwrap() ); @@ -2211,6 +2255,22 @@ fn parse_in_subquery() { ); } +#[test] +fn parse_in_union() { + let sql = "SELECT * FROM customers WHERE segment IN ((SELECT segm FROM bar) UNION (SELECT segm FROM bar2))"; + let select = verified_only_select(sql); + assert_eq!( + Expr::InSubquery { + expr: Box::new(Expr::Identifier(Ident::new("segment"))), + subquery: Box::new(verified_query( + "(SELECT segm FROM bar) UNION (SELECT segm FROM bar2)" + )), + negated: false, + }, + select.selection.unwrap() + ); +} + #[test] fn parse_in_unnest() { fn chk(negated: bool) { @@ -2493,7 +2553,12 @@ fn parse_select_order_by_limit() { ]), select.order_by.expect("ORDER BY expected").kind ); - assert_eq!(Some(Expr::value(number("2"))), select.limit); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::value(number("2"))), + offset: None, + limit_by: vec![], + }; + assert_eq!(Some(expected_limit_clause), select.limit_clause); } #[test] @@ -2654,7 +2719,12 @@ fn parse_select_order_by_nulls_order() { ]), select.order_by.expect("ORDER BY expeccted").kind ); - assert_eq!(Some(Expr::value(number("2"))), select.limit); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::value(number("2"))), + offset: None, + limit_by: vec![], + }; + assert_eq!(Some(expected_limit_clause), select.limit_clause); } #[test] @@ -2777,6 +2847,38 @@ fn parse_group_by_special_grouping_sets() { } } +#[test] +fn parse_group_by_grouping_sets_single_values() { + let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), a, (b), c, ())"; + let canonical = + "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), (c), ())"; + match all_dialects().one_statement_parses_to(sql, canonical) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + assert_eq!( + group_by, + &GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")) + ], + vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![ + vec![ + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")) + ], + vec![Expr::Identifier(Ident::new("a"))], + vec![Expr::Identifier(Ident::new("b"))], + vec![Expr::Identifier(Ident::new("c"))], + vec![] + ]))] + ) + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_select_having() { let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1"; @@ -2864,6 +2966,14 @@ fn parse_limit_accepts_all() { "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT ALL", "SELECT id, fname, lname FROM customer WHERE id = 1", ); + one_statement_parses_to( + "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT ALL OFFSET 1", + "SELECT id, fname, lname FROM customer WHERE id = 1 OFFSET 1", + ); + one_statement_parses_to( + "SELECT id, fname, lname FROM customer WHERE id = 1 OFFSET 1 LIMIT ALL", + "SELECT id, fname, lname FROM customer WHERE id = 1 OFFSET 1", + ); } #[test] @@ -3476,7 +3586,7 @@ fn test_double_value() { for (input, expected) in test_cases { for (i, expr) in input.iter().enumerate() { if let Statement::Query(query) = - dialects.one_statement_parses_to(&format!("SELECT {}", expr), "") + dialects.one_statement_parses_to(&format!("SELECT {expr}"), "") { if let SetExpr::Select(select) = *query.body { assert_eq!(expected[i], select.projection[0]); @@ -3604,7 +3714,7 @@ fn parse_create_table() { name, columns, constraints, - with_options, + table_options, if_not_exists: false, external: false, file_format: None, @@ -3706,6 +3816,7 @@ fn parse_create_table() { vec![ TableConstraint::ForeignKey { name: Some("fkey".into()), + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable3".into()]), referred_columns: vec!["lat".into()], @@ -3715,6 +3826,7 @@ fn parse_create_table() { }, TableConstraint::ForeignKey { name: Some("fkey2".into()), + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["lat".into()], @@ -3724,6 +3836,7 @@ fn parse_create_table() { }, TableConstraint::ForeignKey { name: None, + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["lat".into()], @@ -3733,6 +3846,7 @@ fn parse_create_table() { }, TableConstraint::ForeignKey { name: None, + index_name: None, columns: vec!["lng".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["longitude".into()], @@ -3742,7 +3856,7 @@ fn parse_create_table() { }, ] ); - assert_eq!(with_options, vec![]); + assert_eq!(table_options, CreateTableOptions::None); } _ => unreachable!(), } @@ -3787,7 +3901,7 @@ fn parse_create_table_with_constraint_characteristics() { name, columns, constraints, - with_options, + table_options, if_not_exists: false, external: false, file_format: None, @@ -3829,6 +3943,7 @@ fn parse_create_table_with_constraint_characteristics() { vec![ TableConstraint::ForeignKey { name: Some("fkey".into()), + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable3".into()]), referred_columns: vec!["lat".into()], @@ -3842,6 +3957,7 @@ fn parse_create_table_with_constraint_characteristics() { }, TableConstraint::ForeignKey { name: Some("fkey2".into()), + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["lat".into()], @@ -3855,6 +3971,7 @@ fn parse_create_table_with_constraint_characteristics() { }, TableConstraint::ForeignKey { name: None, + index_name: None, columns: vec!["lat".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["lat".into()], @@ -3868,6 +3985,7 @@ fn parse_create_table_with_constraint_characteristics() { }, TableConstraint::ForeignKey { name: None, + index_name: None, columns: vec!["lng".into()], foreign_table: ObjectName::from(vec!["othertable4".into()]), referred_columns: vec!["longitude".into()], @@ -3881,7 +3999,7 @@ fn parse_create_table_with_constraint_characteristics() { }, ] ); - assert_eq!(with_options, vec![]); + assert_eq!(table_options, CreateTableOptions::None); } _ => unreachable!(), } @@ -3928,13 +4046,13 @@ fn parse_create_table_column_constraint_characteristics() { syntax }; - let sql = format!("CREATE TABLE t (a int UNIQUE {})", syntax); + let sql = format!("CREATE TABLE t (a int UNIQUE {syntax})"); let expected_clause = if syntax.is_empty() { String::new() } else { format!(" {syntax}") }; - let expected = format!("CREATE TABLE t (a INT UNIQUE{})", expected_clause); + let expected = format!("CREATE TABLE t (a INT UNIQUE{expected_clause})"); let ast = one_statement_parses_to(&sql, &expected); let expected_value = if deferrable.is_some() || initially.is_some() || enforced.is_some() { @@ -4170,6 +4288,14 @@ fn parse_create_schema() { } _ => unreachable!(), } + + verified_stmt(r#"CREATE SCHEMA a.b.c OPTIONS(key1 = 'value1', key2 = 'value2')"#); + verified_stmt(r#"CREATE SCHEMA IF NOT EXISTS a OPTIONS(key1 = 'value1')"#); + verified_stmt(r#"CREATE SCHEMA IF NOT EXISTS a OPTIONS()"#); + verified_stmt(r#"CREATE SCHEMA IF NOT EXISTS a DEFAULT COLLATE 'und:ci' OPTIONS()"#); + verified_stmt(r#"CREATE SCHEMA a.b.c WITH (key1 = 'value1', key2 = 'value2')"#); + verified_stmt(r#"CREATE SCHEMA IF NOT EXISTS a WITH (key1 = 'value1')"#); + verified_stmt(r#"CREATE SCHEMA IF NOT EXISTS a WITH ()"#); } #[test] @@ -4247,14 +4373,13 @@ fn parse_create_table_as_table() { schema_name: None, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }); match verified_stmt(sql1) { @@ -4274,14 +4399,13 @@ fn parse_create_table_as_table() { schema_name: Some("schema_name".to_string()), }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }); match verified_stmt(sql2) { @@ -4365,7 +4489,11 @@ fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; match generic.verified_stmt(sql) { - Statement::CreateTable(CreateTable { with_options, .. }) => { + Statement::CreateTable(CreateTable { table_options, .. }) => { + let with_options = match table_options { + CreateTableOptions::With(options) => options, + _ => unreachable!(), + }; assert_eq!( vec![ SqlOption::KeyValue { @@ -4426,7 +4554,7 @@ fn parse_create_external_table() { name, columns, constraints, - with_options, + table_options, if_not_exists, external, file_format, @@ -4469,7 +4597,7 @@ fn parse_create_external_table() { assert_eq!(FileFormat::TEXTFILE, file_format.unwrap()); assert_eq!("/tmp/example.csv", location.unwrap()); - assert_eq!(with_options, vec![]); + assert_eq!(table_options, CreateTableOptions::None); assert!(!if_not_exists); } _ => unreachable!(), @@ -4494,7 +4622,7 @@ fn parse_create_or_replace_external_table() { name, columns, constraints, - with_options, + table_options, if_not_exists, external, file_format, @@ -4523,7 +4651,7 @@ fn parse_create_or_replace_external_table() { assert_eq!(FileFormat::TEXTFILE, file_format.unwrap()); assert_eq!("/tmp/example.csv", location.unwrap()); - assert_eq!(with_options, vec![]); + assert_eq!(table_options, CreateTableOptions::None); assert!(!if_not_exists); assert!(or_replace); } @@ -4851,7 +4979,7 @@ fn parse_alter_table_constraints() { match alter_table_op(verified_stmt(&format!( "ALTER TABLE tab ADD {constraint_text}" ))) { - AlterTableOperation::AddConstraint(constraint) => { + AlterTableOperation::AddConstraint { constraint, .. } => { assert_eq!(constraint_text, constraint.to_string()); } _ => unreachable!(), @@ -4866,22 +4994,26 @@ fn parse_alter_table_drop_column() { check_one("DROP COLUMN IF EXISTS is_active CASCADE"); check_one("DROP COLUMN IF EXISTS is_active RESTRICT"); one_statement_parses_to( - "ALTER TABLE tab DROP IF EXISTS is_active CASCADE", + "ALTER TABLE tab DROP COLUMN IF EXISTS is_active CASCADE", "ALTER TABLE tab DROP COLUMN IF EXISTS is_active CASCADE", ); one_statement_parses_to( "ALTER TABLE tab DROP is_active CASCADE", - "ALTER TABLE tab DROP COLUMN is_active CASCADE", + "ALTER TABLE tab DROP is_active CASCADE", ); + let dialects = all_dialects_where(|d| d.supports_comma_separated_drop_column_list()); + dialects.verified_stmt("ALTER TABLE tbl DROP COLUMN c1, c2, c3"); + fn check_one(constraint_text: &str) { match alter_table_op(verified_stmt(&format!("ALTER TABLE tab {constraint_text}"))) { AlterTableOperation::DropColumn { - column_name, + has_column_keyword: true, + column_names, if_exists, drop_behavior, } => { - assert_eq!("is_active", column_name.to_string()); + assert_eq!("is_active", column_names.first().unwrap().to_string()); assert!(if_exists); match drop_behavior { None => assert!(constraint_text.ends_with(" is_active")), @@ -4951,22 +5083,21 @@ fn parse_alter_table_alter_column_type() { AlterColumnOperation::SetDataType { data_type: DataType::Text, using: None, + had_set: true, } ); } _ => unreachable!(), } + verified_stmt(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); - let dialect = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let dialects = all_dialects_where(|d| d.supports_alter_column_type_using()); + dialects.verified_stmt(&format!( + "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" + )); - let res = - dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); - assert_eq!( - ParserError::ParserError("Expected: SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), - res.unwrap_err() - ); - - let res = dialect.parse_sql_statements(&format!( + let dialects = all_dialects_except(|d| d.supports_alter_column_type_using()); + let res = dialects.parse_sql_statements(&format!( "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" )); assert_eq!( @@ -5419,7 +5550,8 @@ fn parse_named_window_functions() { WINDOW w AS (PARTITION BY x), win AS (ORDER BY y)"; supported_dialects.verified_stmt(sql); - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))) + .verified_only_select(sql); const EXPECTED_PROJ_QTY: usize = 2; assert_eq!(EXPECTED_PROJ_QTY, select.projection.len()); @@ -5449,6 +5581,7 @@ fn parse_named_window_functions() { #[test] fn parse_window_clause() { + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))); let sql = "SELECT * \ FROM mytable \ WINDOW \ @@ -5461,10 +5594,14 @@ fn parse_window_clause() { window7 AS (window1 ROWS UNBOUNDED PRECEDING), \ window8 AS (window1 PARTITION BY a ORDER BY b ROWS UNBOUNDED PRECEDING) \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); let sql = "SELECT * from mytable WINDOW window1 AS window2"; - let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let dialects = all_dialects_except(|d| { + d.is::() + || d.is::() + || d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)) + }); let res = dialects.parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: window2".to_string()), @@ -5474,6 +5611,7 @@ fn parse_window_clause() { #[test] fn test_parse_named_window() { + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))); let sql = "SELECT \ MIN(c12) OVER window1 AS min1, \ MAX(c12) OVER window2 AS max1 \ @@ -5481,7 +5619,7 @@ fn test_parse_named_window() { WINDOW window1 AS (ORDER BY C12), \ window2 AS (PARTITION BY C11) \ ORDER BY C3"; - let actual_select_only = verified_only_select(sql); + let actual_select_only = dialects.verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), distinct: None, @@ -5559,6 +5697,7 @@ fn test_parse_named_window() { }, }, ], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident { @@ -5630,6 +5769,10 @@ fn test_parse_named_window() { #[test] fn parse_window_and_qualify_clause() { + let dialects = all_dialects_except(|d| { + d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)) + || d.is_table_alias(&Keyword::QUALIFY, &mut Parser::new(d)) + }); let sql = "SELECT \ MIN(c12) OVER window1 AS min1 \ FROM aggregate_test_100 \ @@ -5637,7 +5780,7 @@ fn parse_window_and_qualify_clause() { WINDOW window1 AS (ORDER BY C12), \ window2 AS (PARTITION BY C11) \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); let sql = "SELECT \ MIN(c12) OVER window1 AS min1 \ @@ -5646,7 +5789,7 @@ fn parse_window_and_qualify_clause() { window2 AS (PARTITION BY C11) \ QUALIFY ROW_NUMBER() OVER my_window \ ORDER BY C3"; - verified_only_select(sql); + dialects.verified_only_select(sql); } #[test] @@ -5746,7 +5889,10 @@ fn parse_literal_date() { assert_eq!( &Expr::TypedString { data_type: DataType::Date, - value: Value::SingleQuotedString("1999-01-01".into()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01".into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -5759,7 +5905,10 @@ fn parse_literal_time() { assert_eq!( &Expr::TypedString { data_type: DataType::Time(None, TimezoneInfo::None), - value: Value::SingleQuotedString("01:23:34".into()), + value: ValueWithSpan { + value: Value::SingleQuotedString("01:23:34".into()), + span: Span::empty(), + }, }, expr_from_projection(only(&select.projection)), ); @@ -5772,7 +5921,10 @@ fn parse_literal_datetime() { assert_eq!( &Expr::TypedString { data_type: DataType::Datetime(None), - value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01 01:23:34.45".into()), + span: Span::empty(), + }, }, expr_from_projection(only(&select.projection)), ); @@ -5785,7 +5937,10 @@ fn parse_literal_timestamp_without_time_zone() { assert_eq!( &Expr::TypedString { data_type: DataType::Timestamp(None, TimezoneInfo::None), - value: Value::SingleQuotedString("1999-01-01 01:23:34".into()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01 01:23:34".into()), + span: Span::empty(), + }, }, expr_from_projection(only(&select.projection)), ); @@ -5800,7 +5955,10 @@ fn parse_literal_timestamp_with_time_zone() { assert_eq!( &Expr::TypedString { data_type: DataType::Timestamp(None, TimezoneInfo::Tz), - value: Value::SingleQuotedString("1999-01-01 01:23:34Z".into()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1999-01-01 01:23:34Z".into()), + span: Span::empty(), + }, }, expr_from_projection(only(&select.projection)), ); @@ -6196,6 +6354,7 @@ fn parse_interval_and_or_xor() { quote_style: None, span: Span::empty(), }))], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident { @@ -6273,14 +6432,13 @@ fn parse_interval_and_or_xor() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }))]; assert_eq!(actual_ast, expected_ast); @@ -6373,8 +6531,9 @@ fn parse_json_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::JSON, - value: Value::SingleQuotedString( - r#"{ + value: ValueWithSpan { + value: Value::SingleQuotedString( + r#"{ "id": 10, "type": "fruit", "name": "apple", @@ -6394,8 +6553,10 @@ fn parse_json_keyword() { ] } }"# - .to_string() - ) + .to_string() + ), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6407,7 +6568,10 @@ fn parse_typed_strings() { assert_eq!( Expr::TypedString { data_type: DataType::JSON, - value: Value::SingleQuotedString(r#"{"foo":"bar"}"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"{"foo":"bar"}"#.into()), + span: Span::empty(), + } }, expr ); @@ -6425,7 +6589,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"0"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"0"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6436,7 +6603,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"123456"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"123456"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6447,7 +6617,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"-3.14"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"-3.14"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6458,7 +6631,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"-0.54321"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"-0.54321"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6469,7 +6645,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"1.23456e05"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"1.23456e05"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6480,7 +6659,10 @@ fn parse_bignumeric_keyword() { assert_eq!( &Expr::TypedString { data_type: DataType::BigNumeric(ExactNumberInfo::None), - value: Value::SingleQuotedString(r#"-9.876e-3"#.into()) + value: ValueWithSpan { + value: Value::SingleQuotedString(r#"-9.876e-3"#.into()), + span: Span::empty(), + } }, expr_from_projection(only(&select.projection)), ); @@ -6769,6 +6951,8 @@ fn parse_searched_case_expr() { let select = verified_only_select(sql); assert_eq!( &Case { + case_token: AttachedToken::empty(), + end_token: AttachedToken::empty(), operand: None, conditions: vec![ CaseWhen { @@ -6808,6 +6992,8 @@ fn parse_simple_case_expr() { use self::Expr::{Case, Identifier}; assert_eq!( &Case { + case_token: AttachedToken::empty(), + end_token: AttachedToken::empty(), operand: Some(Box::new(Identifier(Ident::new("foo")))), conditions: vec![CaseWhen { condition: Expr::value(number("1")), @@ -7272,7 +7458,8 @@ fn parse_join_syntax_variants() { "SELECT c1 FROM t1 FULL JOIN t2 USING(c1)", ); - let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); + let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::OUTER, &mut Parser::new(d))); + let res = dialects.parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()), res.unwrap_err() @@ -7383,6 +7570,33 @@ fn parse_recursive_cte() { assert_eq!(with.cte_tables.first().unwrap(), &expected); } +#[test] +fn parse_cte_in_data_modification_statements() { + match verified_stmt("WITH x AS (SELECT 1) UPDATE t SET bar = (SELECT * FROM x)") { + Statement::Query(query) => { + assert_eq!(query.with.unwrap().to_string(), "WITH x AS (SELECT 1)"); + assert!(matches!(*query.body, SetExpr::Update(_))); + } + other => panic!("Expected: UPDATE, got: {other:?}"), + } + + match verified_stmt("WITH t (x) AS (SELECT 9) DELETE FROM q WHERE id IN (SELECT x FROM t)") { + Statement::Query(query) => { + assert_eq!(query.with.unwrap().to_string(), "WITH t (x) AS (SELECT 9)"); + assert!(matches!(*query.body, SetExpr::Delete(_))); + } + other => panic!("Expected: DELETE, got: {other:?}"), + } + + match verified_stmt("WITH x AS (SELECT 42) INSERT INTO t SELECT foo FROM x") { + Statement::Query(query) => { + assert_eq!(query.with.unwrap().to_string(), "WITH x AS (SELECT 42)"); + assert!(matches!(*query.body, SetExpr::Insert(_))); + } + other => panic!("Expected: INSERT, got: {other:?}"), + } +} + #[test] fn parse_derived_tables() { let sql = "SELECT a.x, b.y FROM (SELECT x FROM foo) AS a CROSS JOIN (SELECT y FROM bar) AS b"; @@ -7543,6 +7757,9 @@ fn parse_substring() { verified_stmt("SELECT SUBSTRING('1', 1, 3)"); verified_stmt("SELECT SUBSTRING('1', 1)"); verified_stmt("SELECT SUBSTRING('1' FOR 3)"); + verified_stmt("SELECT SUBSTRING('foo' FROM 1 FOR 2) FROM t"); + verified_stmt("SELECT SUBSTR('foo' FROM 1 FOR 2) FROM t"); + verified_stmt("SELECT SUBSTR('foo', 1, 2) FROM t"); } #[test] @@ -7623,7 +7840,6 @@ fn parse_trim() { Box::new(MySqlDialect {}), //Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), - Box::new(DuckDbDialect {}), ]); assert_eq!( @@ -7759,6 +7975,7 @@ fn parse_create_view() { let sql = "CREATE VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, columns, query, @@ -7773,6 +7990,7 @@ fn parse_create_view() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); assert_eq!("SELECT foo FROM bar", query.to_string()); @@ -7789,6 +8007,8 @@ fn parse_create_view() { } _ => unreachable!(), } + + let _ = verified_stmt("CREATE OR ALTER VIEW v AS SELECT 1"); } #[test] @@ -7823,6 +8043,7 @@ fn parse_create_view_with_columns() { // match all_dialects().verified_stmt(sql) { match all_dialects_except(|d| d.is::()).verified_stmt(sql) { Statement::CreateView { + or_alter, name, columns, or_replace, @@ -7837,6 +8058,7 @@ fn parse_create_view_with_columns() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); assert_eq!( columns, @@ -7845,7 +8067,7 @@ fn parse_create_view_with_columns() { .map(|name| ViewColumnDef { name, data_type: None, - options: None + options: None, }) .collect::>() ); @@ -7870,6 +8092,7 @@ fn parse_create_view_temporary() { let sql = "CREATE TEMPORARY VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, columns, query, @@ -7884,6 +8107,7 @@ fn parse_create_view_temporary() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); assert_eq!("SELECT foo FROM bar", query.to_string()); @@ -7907,6 +8131,7 @@ fn parse_create_or_replace_view() { let sql = "CREATE OR REPLACE VIEW v AS SELECT 1"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, columns, or_replace, @@ -7921,6 +8146,7 @@ fn parse_create_or_replace_view() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); assert_eq!(options, CreateTableOptions::None); @@ -7948,6 +8174,7 @@ fn parse_create_or_replace_materialized_view() { let sql = "CREATE OR REPLACE MATERIALIZED VIEW v AS SELECT 1"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, columns, or_replace, @@ -7962,6 +8189,7 @@ fn parse_create_or_replace_materialized_view() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); assert_eq!(options, CreateTableOptions::None); @@ -7985,6 +8213,7 @@ fn parse_create_materialized_view() { let sql = "CREATE MATERIALIZED VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, or_replace, columns, @@ -7999,6 +8228,7 @@ fn parse_create_materialized_view() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); assert_eq!("SELECT foo FROM bar", query.to_string()); @@ -8022,6 +8252,7 @@ fn parse_create_materialized_view_with_cluster_by() { let sql = "CREATE MATERIALIZED VIEW myschema.myview CLUSTER BY (foo) AS SELECT foo FROM bar"; match verified_stmt(sql) { Statement::CreateView { + or_alter, name, or_replace, columns, @@ -8036,6 +8267,7 @@ fn parse_create_materialized_view_with_cluster_by() { to, params, } => { + assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); assert_eq!("SELECT foo FROM bar", query.to_string()); @@ -8130,6 +8362,9 @@ fn parse_drop_view() { } _ => unreachable!(), } + + verified_stmt("DROP MATERIALIZED VIEW a.b.c"); + verified_stmt("DROP MATERIALIZED VIEW IF EXISTS a.b.c"); } #[test] @@ -8148,55 +8383,65 @@ fn parse_offset() { let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut Parser::new(d))); - let expect = Some(Offset { - value: Expr::value(number("2")), - rows: OffsetRows::Rows, + let expected_limit_clause = &Some(LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { + value: Expr::value(number("2")), + rows: OffsetRows::Rows, + }), + limit_by: vec![], }); let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS"); - assert_eq!(ast.offset, expect); + assert_eq!(&ast.limit_clause, expected_limit_clause); let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS"); - assert_eq!(ast.offset, expect); + assert_eq!(&ast.limit_clause, expected_limit_clause); let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS"); - assert_eq!(ast.offset, expect); + assert_eq!(&ast.limit_clause, expected_limit_clause); let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS"); - assert_eq!(ast.offset, expect); + assert_eq!(&ast.limit_clause, expected_limit_clause); let ast = dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS"); - assert_eq!(ast.offset, expect); + assert_eq!(&ast.limit_clause, expected_limit_clause); match *ast.body { SetExpr::Select(s) => match only(s.from).relation { TableFactor::Derived { subquery, .. } => { - assert_eq!(subquery.offset, expect); + assert_eq!(&subquery.limit_clause, expected_limit_clause); } _ => panic!("Test broke"), }, _ => panic!("Test broke"), } - let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS"); - assert_eq!( - ast.offset, - Some(Offset { + let expected_limit_clause = LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { value: Expr::value(number("0")), rows: OffsetRows::Rows, - }) - ); - let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW"); - assert_eq!( - ast.offset, - Some(Offset { + }), + limit_by: vec![], + }; + let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS"); + assert_eq!(ast.limit_clause, Some(expected_limit_clause)); + let expected_limit_clause = LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { value: Expr::value(number("1")), rows: OffsetRows::Row, - }) - ); - let ast = dialects.verified_query("SELECT 'foo' OFFSET 1"); - assert_eq!( - ast.offset, - Some(Offset { - value: Expr::value(number("1")), + }), + limit_by: vec![], + }; + let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW"); + assert_eq!(ast.limit_clause, Some(expected_limit_clause)); + let expected_limit_clause = LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { + value: Expr::value(number("2")), rows: OffsetRows::None, - }) - ); + }), + limit_by: vec![], + }; + let ast = dialects.verified_query("SELECT 'foo' OFFSET 2"); + assert_eq!(ast.limit_clause, Some(expected_limit_clause)); } #[test] @@ -8246,13 +8491,15 @@ fn parse_fetch() { let ast = verified_query( "SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY", ); - assert_eq!( - ast.offset, - Some(Offset { + let expected_limit_clause = Some(LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { value: Expr::value(number("2")), rows: OffsetRows::Rows, - }) - ); + }), + limit_by: vec![], + }); + assert_eq!(ast.limit_clause, expected_limit_clause); assert_eq!(ast.fetch, fetch_first_two_rows_only); let ast = verified_query( "SELECT foo FROM (SELECT * FROM bar FETCH FIRST 2 ROWS ONLY) FETCH FIRST 2 ROWS ONLY", @@ -8268,24 +8515,20 @@ fn parse_fetch() { _ => panic!("Test broke"), } let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY) OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY"); - assert_eq!( - ast.offset, - Some(Offset { + let expected_limit_clause = &Some(LimitClause::LimitOffset { + limit: None, + offset: Some(Offset { value: Expr::value(number("2")), rows: OffsetRows::Rows, - }) - ); + }), + limit_by: vec![], + }); + assert_eq!(&ast.limit_clause, expected_limit_clause); assert_eq!(ast.fetch, fetch_first_two_rows_only); match *ast.body { SetExpr::Select(s) => match only(s.from).relation { TableFactor::Derived { subquery, .. } => { - assert_eq!( - subquery.offset, - Some(Offset { - value: Expr::value(number("2")), - rows: OffsetRows::Rows, - }) - ); + assert_eq!(&subquery.limit_clause, expected_limit_clause); assert_eq!(subquery.fetch, fetch_first_two_rows_only); } _ => panic!("Test broke"), @@ -8381,6 +8624,7 @@ fn lateral_function() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], + exclude: None, top_before_distinct: false, into: None, from: vec![TableWithJoins { @@ -8428,8 +8672,11 @@ fn lateral_function() { #[test] fn parse_start_transaction() { let dialects = all_dialects_except(|d| - // BigQuery does not support this syntax - d.is::()); + // BigQuery and Snowflake does not support this syntax + // + // BigQuery: + // Snowflake: + d.is::() || d.is::()); match dialects .verified_stmt("START TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { @@ -8528,11 +8775,11 @@ fn parse_set_transaction() { // TRANSACTION, so no need to duplicate the tests here. We just do a quick // sanity check. match verified_stmt("SET TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE") { - Statement::SetTransaction { + Statement::Set(Set::SetTransaction { modes, session, snapshot, - } => { + }) => { assert_eq!( modes, vec![ @@ -8551,20 +8798,17 @@ fn parse_set_transaction() { #[test] fn parse_set_variable() { match verified_stmt("SET SOMETHING = '1'") { - Statement::SetVariable { - local, + Statement::Set(Set::SingleAssignment { + scope, hivevar, - variables, - value, - } => { - assert!(!local); + variable, + values, + }) => { + assert_eq!(scope, None); assert!(!hivevar); + assert_eq!(variable, ObjectName::from(vec!["SOMETHING".into()])); assert_eq!( - variables, - OneOrManyWithParens::One(ObjectName::from(vec!["SOMETHING".into()])) - ); - assert_eq!( - value, + values, vec![Expr::Value( (Value::SingleQuotedString("1".into())).with_empty_span() )] @@ -8573,27 +8817,40 @@ fn parse_set_variable() { _ => unreachable!(), } + match verified_stmt("SET GLOBAL VARIABLE = 'Value'") { + Statement::Set(Set::SingleAssignment { + scope, + hivevar, + variable, + values, + }) => { + assert_eq!(scope, Some(ContextModifier::Global)); + assert!(!hivevar); + assert_eq!(variable, ObjectName::from(vec!["VARIABLE".into()])); + assert_eq!( + values, + vec![Expr::Value( + (Value::SingleQuotedString("Value".into())).with_empty_span() + )] + ); + } + _ => unreachable!(), + } + let multi_variable_dialects = all_dialects_where(|d| d.supports_parenthesized_set_variables()); let sql = r#"SET (a, b, c) = (1, 2, 3)"#; match multi_variable_dialects.verified_stmt(sql) { - Statement::SetVariable { - local, - hivevar, - variables, - value, - } => { - assert!(!local); - assert!(!hivevar); + Statement::Set(Set::ParenthesizedAssignments { variables, values }) => { assert_eq!( variables, - OneOrManyWithParens::Many(vec![ + vec![ ObjectName::from(vec!["a".into()]), ObjectName::from(vec!["b".into()]), ObjectName::from(vec!["c".into()]), - ]) + ] ); assert_eq!( - value, + values, vec![ Expr::value(number("1")), Expr::value(number("2")), @@ -8653,20 +8910,17 @@ fn parse_set_variable() { #[test] fn parse_set_role_as_variable() { match verified_stmt("SET role = 'foobar'") { - Statement::SetVariable { - local, + Statement::Set(Set::SingleAssignment { + scope, hivevar, - variables, - value, - } => { - assert!(!local); + variable, + values, + }) => { + assert_eq!(scope, None); assert!(!hivevar); + assert_eq!(variable, ObjectName::from(vec!["role".into()])); assert_eq!( - variables, - OneOrManyWithParens::One(ObjectName::from(vec!["role".into()])) - ); - assert_eq!( - value, + values, vec![Expr::Value( (Value::SingleQuotedString("foobar".into())).with_empty_span() )] @@ -8703,20 +8957,17 @@ fn parse_double_colon_cast_at_timezone() { #[test] fn parse_set_time_zone() { match verified_stmt("SET TIMEZONE = 'UTC'") { - Statement::SetVariable { - local, + Statement::Set(Set::SingleAssignment { + scope, hivevar, - variables: variable, - value, - } => { - assert!(!local); + variable, + values, + }) => { + assert_eq!(scope, None); assert!(!hivevar); + assert_eq!(variable, ObjectName::from(vec!["TIMEZONE".into()])); assert_eq!( - variable, - OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) - ); - assert_eq!( - value, + values, vec![Expr::Value( (Value::SingleQuotedString("UTC".into())).with_empty_span() )] @@ -8728,20 +8979,6 @@ fn parse_set_time_zone() { one_statement_parses_to("SET TIME ZONE TO 'UTC'", "SET TIMEZONE = 'UTC'"); } -#[test] -fn parse_set_time_zone_alias() { - match verified_stmt("SET TIME ZONE 'UTC'") { - Statement::SetTimeZone { local, value } => { - assert!(!local); - assert_eq!( - value, - Expr::Value((Value::SingleQuotedString("UTC".into())).with_empty_span()) - ); - } - _ => unreachable!(), - } -} - #[test] fn parse_commit() { match verified_stmt("COMMIT") { @@ -8842,22 +9079,28 @@ fn ensure_multiple_dialects_are_tested() { #[test] fn parse_create_index() { let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test(name,age DESC)"; - let indexed_columns = vec![ - OrderByExpr { - expr: Expr::Identifier(Ident::new("name")), - options: OrderByOptions { - asc: None, - nulls_first: None, + let indexed_columns: Vec = vec![ + IndexColumn { + operator_class: None, + column: OrderByExpr { + expr: Expr::Identifier(Ident::new("name")), + with_fill: None, + options: OrderByOptions { + asc: None, + nulls_first: None, + }, }, - with_fill: None, }, - OrderByExpr { - expr: Expr::Identifier(Ident::new("age")), - options: OrderByOptions { - asc: Some(false), - nulls_first: None, + IndexColumn { + operator_class: None, + column: OrderByExpr { + expr: Expr::Identifier(Ident::new("age")), + with_fill: None, + options: OrderByOptions { + asc: Some(false), + nulls_first: None, + }, }, - with_fill: None, }, ]; match verified_stmt(sql) { @@ -8881,23 +9124,29 @@ fn parse_create_index() { #[test] fn test_create_index_with_using_function() { - let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test USING btree (name,age DESC)"; - let indexed_columns = vec![ - OrderByExpr { - expr: Expr::Identifier(Ident::new("name")), - options: OrderByOptions { - asc: None, - nulls_first: None, + let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test USING BTREE (name,age DESC)"; + let indexed_columns: Vec = vec![ + IndexColumn { + operator_class: None, + column: OrderByExpr { + expr: Expr::Identifier(Ident::new("name")), + with_fill: None, + options: OrderByOptions { + asc: None, + nulls_first: None, + }, }, - with_fill: None, }, - OrderByExpr { - expr: Expr::Identifier(Ident::new("age")), - options: OrderByOptions { - asc: Some(false), - nulls_first: None, + IndexColumn { + operator_class: None, + column: OrderByExpr { + expr: Expr::Identifier(Ident::new("age")), + with_fill: None, + options: OrderByOptions { + asc: Some(false), + nulls_first: None, + }, }, - with_fill: None, }, ]; match verified_stmt(sql) { @@ -8916,7 +9165,7 @@ fn test_create_index_with_using_function() { }) => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); - assert_eq!("btree", using.unwrap().to_string()); + assert_eq!("BTREE", using.unwrap().to_string()); assert_eq!(indexed_columns, columns); assert!(unique); assert!(!concurrently); @@ -8931,13 +9180,16 @@ fn test_create_index_with_using_function() { #[test] fn test_create_index_with_with_clause() { let sql = "CREATE UNIQUE INDEX title_idx ON films(title) WITH (fillfactor = 70, single_param)"; - let indexed_columns = vec![OrderByExpr { - expr: Expr::Identifier(Ident::new("title")), - options: OrderByOptions { - asc: None, - nulls_first: None, + let indexed_columns: Vec = vec![IndexColumn { + column: OrderByExpr { + expr: Expr::Identifier(Ident::new("title")), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, }, - with_fill: None, + operator_class: None, }]; let with_parameters = vec![ Expr::BinaryOp { @@ -9203,13 +9455,58 @@ fn parse_grant() { verified_stmt("GRANT SELECT ON ALL TABLES IN SCHEMA db1.sc1 TO APPLICATION role1"); verified_stmt("GRANT SELECT ON ALL TABLES IN SCHEMA db1.sc1 TO APPLICATION ROLE role1"); verified_stmt("GRANT SELECT ON ALL TABLES IN SCHEMA db1.sc1 TO SHARE share1"); + verified_stmt("GRANT SELECT ON ALL VIEWS IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON ALL MATERIALIZED VIEWS IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON ALL EXTERNAL TABLES IN SCHEMA db1.sc1 TO ROLE role1"); verified_stmt("GRANT USAGE ON SCHEMA sc1 TO a:b"); verified_stmt("GRANT USAGE ON SCHEMA sc1 TO GROUP group1"); verified_stmt("GRANT OWNERSHIP ON ALL TABLES IN SCHEMA DEV_STAS_ROGOZHIN TO ROLE ANALYST"); + verified_stmt("GRANT OWNERSHIP ON ALL TABLES IN SCHEMA DEV_STAS_ROGOZHIN TO ROLE ANALYST COPY CURRENT GRANTS"); + verified_stmt("GRANT OWNERSHIP ON ALL TABLES IN SCHEMA DEV_STAS_ROGOZHIN TO ROLE ANALYST REVOKE CURRENT GRANTS"); verified_stmt("GRANT USAGE ON DATABASE db1 TO ROLE role1"); verified_stmt("GRANT USAGE ON WAREHOUSE wh1 TO ROLE role1"); verified_stmt("GRANT OWNERSHIP ON INTEGRATION int1 TO ROLE role1"); verified_stmt("GRANT SELECT ON VIEW view1 TO ROLE role1"); + verified_stmt("GRANT EXEC ON my_sp TO runner"); + verified_stmt("GRANT UPDATE ON my_table TO updater_role AS dbo"); + all_dialects_where(|d| d.identifier_quote_style("none") == Some('[')) + .verified_stmt("GRANT SELECT ON [my_table] TO [public]"); + verified_stmt("GRANT SELECT ON FUTURE SCHEMAS IN DATABASE db1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON FUTURE TABLES IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON FUTURE EXTERNAL TABLES IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON FUTURE VIEWS IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON FUTURE MATERIALIZED VIEWS IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT SELECT ON FUTURE SEQUENCES IN SCHEMA db1.sc1 TO ROLE role1"); + verified_stmt("GRANT USAGE ON PROCEDURE db1.sc1.foo(INT) TO ROLE role1"); + verified_stmt("GRANT USAGE ON FUNCTION db1.sc1.foo(INT) TO ROLE role1"); +} + +#[test] +fn parse_deny() { + let sql = "DENY INSERT, DELETE ON users TO analyst CASCADE AS admin"; + match verified_stmt(sql) { + Statement::Deny(deny) => { + assert_eq!( + Privileges::Actions(vec![Action::Insert { columns: None }, Action::Delete]), + deny.privileges + ); + assert_eq!( + &GrantObjects::Tables(vec![ObjectName::from(vec![Ident::new("users")])]), + &deny.objects + ); + assert_eq_vec(&["analyst"], &deny.grantees); + assert_eq!(Some(CascadeOption::Cascade), deny.cascade); + assert_eq!(Some(Ident::from("admin")), deny.granted_by); + } + _ => unreachable!(), + } + + verified_stmt("DENY SELECT, INSERT, UPDATE, DELETE ON db1.sc1 TO role1, role2"); + verified_stmt("DENY ALL ON db1.sc1 TO role1"); + verified_stmt("DENY EXEC ON my_sp TO runner"); + + all_dialects_where(|d| d.identifier_quote_style("none") == Some('[')) + .verified_stmt("DENY SELECT ON [my_table] TO [public]"); } #[test] @@ -9276,6 +9573,7 @@ fn parse_merge() { source, on, clauses, + .. }, Statement::Merge { into: no_into, @@ -9283,6 +9581,7 @@ fn parse_merge() { source: source_no_into, on: on_no_into, clauses: clauses_no_into, + .. }, ) => { assert!(into); @@ -9322,6 +9621,7 @@ fn parse_merge() { projection: vec![SelectItem::Wildcard( WildcardAdditionalOptions::default() )], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![ @@ -9346,14 +9646,13 @@ fn parse_merge() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), alias: Some(TableAlias { name: Ident { @@ -9477,6 +9776,19 @@ fn parse_merge() { verified_stmt(sql); } +#[test] +fn test_merge_with_output() { + let sql = "MERGE INTO target_table USING source_table \ + ON target_table.id = source_table.oooid \ + WHEN MATCHED THEN \ + UPDATE SET target_table.description = source_table.description \ + WHEN NOT MATCHED THEN \ + INSERT (ID, description) VALUES (source_table.id, source_table.description) \ + OUTPUT inserted.* INTO log_target"; + + verified_stmt(sql); +} + #[test] fn test_merge_into_using_table() { let sql = "MERGE INTO target_table USING source_table \ @@ -9666,21 +9978,18 @@ fn test_placeholder() { }) ); - let sql = "SELECT * FROM student LIMIT $1 OFFSET $2"; - let ast = dialects.verified_query(sql); - assert_eq!( - ast.limit, - Some(Expr::Value( - (Value::Placeholder("$1".into())).with_empty_span() - )) - ); - assert_eq!( - ast.offset, - Some(Offset { + let ast = dialects.verified_query("SELECT * FROM student LIMIT $1 OFFSET $2"); + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::Value( + (Value::Placeholder("$1".into())).with_empty_span(), + )), + offset: Some(Offset { value: Expr::Value((Value::Placeholder("$2".into())).with_empty_span()), rows: OffsetRows::None, }), - ); + limit_by: vec![], + }; + assert_eq!(ast.limit_clause, Some(expected_limit_clause)); let dialects = TestedDialects::new(vec![ Box::new(GenericDialect {}), @@ -9760,40 +10069,34 @@ fn verified_expr(query: &str) -> Expr { #[test] fn parse_offset_and_limit() { let sql = "SELECT foo FROM bar LIMIT 1 OFFSET 2"; - let expect = Some(Offset { - value: Expr::value(number("2")), - rows: OffsetRows::None, + let expected_limit_clause = Some(LimitClause::LimitOffset { + limit: Some(Expr::value(number("1"))), + offset: Some(Offset { + value: Expr::value(number("2")), + rows: OffsetRows::None, + }), + limit_by: vec![], }); let ast = verified_query(sql); - assert_eq!(ast.offset, expect); - assert_eq!(ast.limit, Some(Expr::value(number("1")))); + assert_eq!(ast.limit_clause, expected_limit_clause); // different order is OK one_statement_parses_to("SELECT foo FROM bar OFFSET 2 LIMIT 1", sql); // mysql syntax is ok for some dialects - TestedDialects::new(vec![ - Box::new(GenericDialect {}), - Box::new(MySqlDialect {}), - Box::new(SQLiteDialect {}), - Box::new(ClickHouseDialect {}), - ]) - .one_statement_parses_to("SELECT foo FROM bar LIMIT 2, 1", sql); + all_dialects_where(|d| d.supports_limit_comma()) + .verified_query("SELECT foo FROM bar LIMIT 2, 1"); // expressions are allowed let sql = "SELECT foo FROM bar LIMIT 1 + 2 OFFSET 3 * 4"; let ast = verified_query(sql); - assert_eq!( - ast.limit, - Some(Expr::BinaryOp { + let expected_limit_clause = LimitClause::LimitOffset { + limit: Some(Expr::BinaryOp { left: Box::new(Expr::value(number("1"))), op: BinaryOperator::Plus, right: Box::new(Expr::value(number("2"))), }), - ); - assert_eq!( - ast.offset, - Some(Offset { + offset: Some(Offset { value: Expr::BinaryOp { left: Box::new(Expr::value(number("3"))), op: BinaryOperator::Multiply, @@ -9801,7 +10104,12 @@ fn parse_offset_and_limit() { }, rows: OffsetRows::None, }), - ); + limit_by: vec![], + }; + assert_eq!(ast.limit_clause, Some(expected_limit_clause),); + + // OFFSET without LIMIT + verified_stmt("SELECT foo FROM bar OFFSET 2"); // Can't repeat OFFSET / LIMIT let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2"); @@ -9826,7 +10134,7 @@ fn parse_offset_and_limit() { #[test] fn parse_time_functions() { fn test_time_function(func_name: &'static str) { - let sql = format!("SELECT {}()", func_name); + let sql = format!("SELECT {func_name}()"); let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName::from(vec![Ident::new(func_name)]), @@ -9848,7 +10156,7 @@ fn parse_time_functions() { ); // Validating Parenthesis - let sql_without_parens = format!("SELECT {}", func_name); + let sql_without_parens = format!("SELECT {func_name}"); let mut ast_without_parens = select_localtime_func_call_ast; ast_without_parens.args = FunctionArguments::None; assert_eq!( @@ -10387,15 +10695,8 @@ fn parse_with_recursion_limit() { #[test] fn parse_escaped_string_with_unescape() { - fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = TestedDialects::new(vec![ - Box::new(MySqlDialect {}), - Box::new(BigQueryDialect {}), - Box::new(SnowflakeDialect {}), - ]) - .one_statement_parses_to(sql, ""); - - match stmt { + fn assert_mysql_query_value(dialects: &TestedDialects, sql: &str, quoted: &str) { + match dialects.one_statement_parses_to(sql, "") { Statement::Query(query) => match *query.body { SetExpr::Select(value) => { let expr = expr_from_projection(only(&value.projection)); @@ -10411,17 +10712,38 @@ fn parse_escaped_string_with_unescape() { _ => unreachable!(), }; } + + let escaping_dialects = + &all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape()); + let no_wildcard_exception = &all_dialects_where(|dialect| { + dialect.supports_string_literal_backslash_escape() && !dialect.ignores_wildcard_escapes() + }); + let with_wildcard_exception = &all_dialects_where(|dialect| { + dialect.supports_string_literal_backslash_escape() && dialect.ignores_wildcard_escapes() + }); + let sql = r"SELECT 'I\'m fine'"; - assert_mysql_query_value(sql, "I'm fine"); + assert_mysql_query_value(escaping_dialects, sql, "I'm fine"); let sql = r#"SELECT 'I''m fine'"#; - assert_mysql_query_value(sql, "I'm fine"); + assert_mysql_query_value(escaping_dialects, sql, "I'm fine"); let sql = r#"SELECT 'I\"m fine'"#; - assert_mysql_query_value(sql, "I\"m fine"); + assert_mysql_query_value(escaping_dialects, sql, "I\"m fine"); let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '"; - assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h "); + assert_mysql_query_value( + no_wildcard_exception, + sql, + "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ", + ); + + // check MySQL doesn't remove backslash from escaped LIKE wildcards + assert_mysql_query_value( + with_wildcard_exception, + sql, + "Testing: \0 \\ \\% \\_ \u{8} \n \r \t \u{1a} \u{7} h ", + ); } #[test] @@ -10568,49 +10890,47 @@ fn parse_unpivot_table() { "SELECT * FROM sales AS s ", "UNPIVOT(quantity FOR quarter IN (Q1, Q2, Q3, Q4)) AS u (product, quarter, quantity)" ); - - pretty_assertions::assert_eq!( - verified_only_select(sql).from[0].relation, - Unpivot { - table: Box::new(TableFactor::Table { - name: ObjectName::from(vec![Ident::new("sales")]), - alias: Some(TableAlias { - name: Ident::new("s"), - columns: vec![] - }), - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - sample: None, - index_hints: vec![], - }), - value: Ident { - value: "quantity".to_string(), - quote_style: None, - span: Span::empty() - }, - - name: Ident { - value: "quarter".to_string(), - quote_style: None, - span: Span::empty() - }, - columns: ["Q1", "Q2", "Q3", "Q4"] - .into_iter() - .map(Ident::new) - .collect(), + let base_unpivot = Unpivot { + table: Box::new(TableFactor::Table { + name: ObjectName::from(vec![Ident::new("sales")]), alias: Some(TableAlias { - name: Ident::new("u"), - columns: ["product", "quarter", "quantity"] - .into_iter() - .map(TableAliasColumnDef::from_name) - .collect(), + name: Ident::new("s"), + columns: vec![], }), - } - ); + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }), + null_inclusion: None, + value: Ident { + value: "quantity".to_string(), + quote_style: None, + span: Span::empty(), + }, + + name: Ident { + value: "quarter".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: ["Q1", "Q2", "Q3", "Q4"] + .into_iter() + .map(Ident::new) + .collect(), + alias: Some(TableAlias { + name: Ident::new("u"), + columns: ["product", "quarter", "quantity"] + .into_iter() + .map(TableAliasColumnDef::from_name) + .collect(), + }), + }; + pretty_assertions::assert_eq!(verified_only_select(sql).from[0].relation, base_unpivot); assert_eq!(verified_stmt(sql).to_string(), sql); let sql_without_aliases = concat!( @@ -10630,6 +10950,38 @@ fn parse_unpivot_table() { verified_stmt(sql_without_aliases).to_string(), sql_without_aliases ); + + let sql_unpivot_exclude_nulls = concat!( + "SELECT * FROM sales AS s ", + "UNPIVOT EXCLUDE NULLS (quantity FOR quarter IN (Q1, Q2, Q3, Q4)) AS u (product, quarter, quantity)" + ); + + if let Unpivot { null_inclusion, .. } = + &verified_only_select(sql_unpivot_exclude_nulls).from[0].relation + { + assert_eq!(*null_inclusion, Some(NullInclusion::ExcludeNulls)); + } + + assert_eq!( + verified_stmt(sql_unpivot_exclude_nulls).to_string(), + sql_unpivot_exclude_nulls + ); + + let sql_unpivot_include_nulls = concat!( + "SELECT * FROM sales AS s ", + "UNPIVOT INCLUDE NULLS (quantity FOR quarter IN (Q1, Q2, Q3, Q4)) AS u (product, quarter, quantity)" + ); + + if let Unpivot { null_inclusion, .. } = + &verified_only_select(sql_unpivot_include_nulls).from[0].relation + { + assert_eq!(*null_inclusion, Some(NullInclusion::IncludeNulls)); + } + + assert_eq!( + verified_stmt(sql_unpivot_include_nulls).to_string(), + sql_unpivot_include_nulls + ); } #[test] @@ -10726,6 +11078,7 @@ fn parse_pivot_unpivot_table() { sample: None, index_hints: vec![], }), + null_inclusion: None, value: Ident { value: "population".to_string(), quote_style: None, @@ -10804,10 +11157,17 @@ fn parse_non_latin_identifiers() { Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), ]); - supported_dialects.verified_stmt("SELECT a.説明 FROM test.public.inter01 AS a"); supported_dialects.verified_stmt("SELECT a.説明 FROM inter01 AS a, inter01_transactions AS b WHERE a.説明 = b.取引 GROUP BY a.説明"); supported_dialects.verified_stmt("SELECT 説明, hühnervögel, garçon, Москва, 東京 FROM inter01"); + + let supported_dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(MySqlDialect {}), + ]); assert!(supported_dialects .parse_sql_statements("SELECT 💝 FROM table1") .is_err()); @@ -10856,7 +11216,7 @@ fn parse_trailing_comma() { trailing_commas.verified_stmt(r#"SELECT "from" FROM "from""#); // doesn't allow any trailing commas - let trailing_commas = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let trailing_commas = TestedDialects::new(vec![Box::new(PostgreSqlDialect {})]); assert_eq!( trailing_commas @@ -11180,6 +11540,7 @@ fn parse_unload() { top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("tab")])), @@ -11201,15 +11562,14 @@ fn parse_unload() { flavor: SelectFlavor::Standard, }))), with: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, order_by: None, settings: None, format_clause: None, + pipe_operators: vec![], }), to: Ident { value: "s3://...".to_string(), @@ -11280,7 +11640,9 @@ fn test_parse_inline_comment() { // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) match all_dialects_except(|d| d.is::()).verified_stmt(sql) { Statement::CreateTable(CreateTable { - columns, comment, .. + columns, + table_options, + .. }) => { assert_eq!( columns, @@ -11294,8 +11656,10 @@ fn test_parse_inline_comment() { }] ); assert_eq!( - comment.unwrap(), - CommentDef::WithEq("comment with equal".to_string()) + table_options, + CreateTableOptions::Plain(vec![SqlOption::Comment(CommentDef::WithEq( + "comment with equal".to_string() + ))]) ); } _ => unreachable!(), @@ -11377,6 +11741,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], + exclude: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), joins: vec![], @@ -11458,6 +11823,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], + exclude: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), joins: vec![], @@ -11524,6 +11890,20 @@ fn parse_connect_by() { #[test] fn test_selective_aggregation() { + let testing_dialects = all_dialects_where(|d| d.supports_filter_during_aggregation()); + let expected_dialects: Vec> = vec![ + Box::new(PostgreSqlDialect {}), + Box::new(DatabricksDialect {}), + Box::new(HiveDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + Box::new(GenericDialect {}), + ]; + assert_eq!(testing_dialects.dialects.len(), expected_dialects.len()); + expected_dialects + .into_iter() + .for_each(|d| assert!(d.supports_filter_during_aggregation())); + let sql = concat!( "SELECT ", "ARRAY_AGG(name) FILTER (WHERE name IS NOT NULL), ", @@ -11531,9 +11911,7 @@ fn test_selective_aggregation() { "FROM region" ); assert_eq!( - all_dialects_where(|d| d.supports_filter_during_aggregation()) - .verified_only_select(sql) - .projection, + testing_dialects.verified_only_select(sql).projection, vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName::from(vec![Ident::new("ARRAY_AGG")]), @@ -11611,6 +11989,44 @@ fn test_group_by_grouping_sets() { ); } +#[test] +fn test_xmltable() { + all_dialects() + .verified_only_select("SELECT * FROM XMLTABLE('/root' PASSING data COLUMNS element TEXT)"); + + // Minimal meaningful working example: returns a single row with a single column named y containing the value z + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING 'z' COLUMNS y TEXT)", + ); + + // Test using subqueries + all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('z' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))"); + + // NOT NULL + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING '' COLUMNS y TEXT NOT NULL)", + ); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')"); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')"); + + // Example from https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING + all_dialects().verified_only_select( + "SELECT xmltable.* FROM xmldata, XMLTABLE('//ROWS/ROW' PASSING data COLUMNS id INT PATH '@id', ordinality FOR ORDINALITY, \"COUNTRY_NAME\" TEXT, country_id TEXT PATH 'COUNTRY_ID', size_sq_km FLOAT PATH 'SIZE[@unit = \"sq_km\"]', size_other TEXT PATH 'concat(SIZE[@unit!=\"sq_km\"], \" \", SIZE[@unit!=\"sq_km\"]/@unit)', premier_name TEXT PATH 'PREMIER_NAME' DEFAULT 'not specified')" + ); + + // Example from DB2 docs without explicit PASSING clause: https://www.ibm.com/docs/en/db2/12.1.0?topic=xquery-simple-column-name-passing-xmlexists-xmlquery-xmltable + all_dialects().verified_only_select( + "SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X" + ); + + // Example from PostgreSQL with XMLNAMESPACES + all_dialects().verified_only_select( + "SELECT xmltable.* FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, 'http://example.com/b' AS \"B\"), '/x:example/x:item' PASSING (SELECT data FROM xmldata) COLUMNS foo INT PATH '@foo', bar INT PATH '@B:bar')" + ); +} + #[test] fn test_match_recognize() { use MatchRecognizePattern::*; @@ -12270,21 +12686,6 @@ fn parse_select_wildcard_with_except() { ); } -#[test] -fn parse_auto_increment_too_large() { - let dialect = GenericDialect {}; - let u64_max = u64::MAX; - let sql = - format!("CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) AUTO_INCREMENT=1{u64_max}"); - - let res = Parser::new(&dialect) - .try_with_sql(&sql) - .expect("tokenize to work") - .parse_statements(); - - assert!(res.is_err(), "{res:?}"); -} - #[test] fn test_group_by_nothing() { let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) @@ -12356,6 +12757,7 @@ fn test_extract_seconds_ok() { format: None, }), })], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -12374,14 +12776,13 @@ fn test_extract_seconds_ok() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }))]; assert_eq!(actual_ast, expected_ast); @@ -14004,11 +14405,10 @@ fn test_table_sample() { #[test] fn overflow() { - let expr = std::iter::repeat("1") - .take(1000) + let expr = std::iter::repeat_n("1", 1000) .collect::>() .join(" + "); - let sql = format!("SELECT {}", expr); + let sql = format!("SELECT {expr}"); let mut statements = Parser::parse_sql(&GenericDialect {}, sql.as_str()).unwrap(); let statement = statements.pop().unwrap(); @@ -14101,7 +14501,7 @@ fn test_visit_order() { let sql = "SELECT CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END"; let stmt = verified_stmt(sql); let mut visited = vec![]; - sqlparser::ast::visit_expressions(&stmt, |expr| { + let _ = sqlparser::ast::visit_expressions(&stmt, |expr| { visited.push(expr.to_string()); core::ops::ControlFlow::<()>::Continue(()) }); @@ -14120,6 +14520,221 @@ fn test_visit_order() { ); } +#[test] +fn parse_case_statement() { + let sql = "CASE 1 WHEN 2 THEN SELECT 1; SELECT 2; ELSE SELECT 3; END CASE"; + let Statement::Case(stmt) = verified_stmt(sql) else { + unreachable!() + }; + + assert_eq!(Some(Expr::value(number("1"))), stmt.match_expr); + assert_eq!( + Some(Expr::value(number("2"))), + stmt.when_blocks[0].condition + ); + assert_eq!(2, stmt.when_blocks[0].statements().len()); + assert_eq!(1, stmt.else_block.unwrap().statements().len()); + + verified_stmt(concat!( + "CASE 1", + " WHEN a THEN", + " SELECT 1; SELECT 2; SELECT 3;", + " WHEN b THEN", + " SELECT 4; SELECT 5;", + " ELSE", + " SELECT 7; SELECT 8;", + " END CASE" + )); + verified_stmt(concat!( + "CASE 1", + " WHEN a THEN", + " SELECT 1; SELECT 2; SELECT 3;", + " WHEN b THEN", + " SELECT 4; SELECT 5;", + " END CASE" + )); + verified_stmt(concat!( + "CASE 1", + " WHEN a THEN", + " SELECT 1; SELECT 2; SELECT 3;", + " END CASE" + )); + verified_stmt(concat!( + "CASE 1", + " WHEN a THEN", + " SELECT 1; SELECT 2; SELECT 3;", + " END" + )); + + assert_eq!( + ParserError::ParserError("Expected: THEN, found: END".to_string()), + parse_sql_statements("CASE 1 WHEN a END").unwrap_err() + ); + assert_eq!( + ParserError::ParserError("Expected: WHEN, found: ELSE".to_string()), + parse_sql_statements("CASE 1 ELSE SELECT 1; END").unwrap_err() + ); +} + +#[test] +fn test_case_statement_span() { + let sql = "CASE 1 WHEN 2 THEN SELECT 1; SELECT 2; ELSE SELECT 3; END CASE"; + let mut parser = Parser::new(&GenericDialect {}).try_with_sql(sql).unwrap(); + assert_eq!( + parser.parse_statement().unwrap().span(), + Span::new(Location::new(1, 1), Location::new(1, sql.len() as u64 + 1)) + ); +} + +#[test] +fn parse_if_statement() { + let dialects = all_dialects_except(|d| d.is::()); + + let sql = "IF 1 THEN SELECT 1; ELSEIF 2 THEN SELECT 2; ELSE SELECT 3; END IF"; + let Statement::If(IfStatement { + if_block, + elseif_blocks, + else_block, + .. + }) = dialects.verified_stmt(sql) + else { + unreachable!() + }; + assert_eq!(Some(Expr::value(number("1"))), if_block.condition); + assert_eq!(Some(Expr::value(number("2"))), elseif_blocks[0].condition); + assert_eq!(1, else_block.unwrap().statements().len()); + + dialects.verified_stmt(concat!( + "IF 1 THEN", + " SELECT 1;", + " SELECT 2;", + " SELECT 3;", + " ELSEIF 2 THEN", + " SELECT 4;", + " SELECT 5;", + " ELSEIF 3 THEN", + " SELECT 6;", + " SELECT 7;", + " ELSE", + " SELECT 8;", + " SELECT 9;", + " END IF" + )); + dialects.verified_stmt(concat!( + "IF 1 THEN", + " SELECT 1;", + " SELECT 2;", + " ELSE", + " SELECT 3;", + " SELECT 4;", + " END IF" + )); + dialects.verified_stmt(concat!( + "IF 1 THEN", + " SELECT 1;", + " SELECT 2;", + " SELECT 3;", + " ELSEIF 2 THEN", + " SELECT 3;", + " SELECT 4;", + " END IF" + )); + dialects.verified_stmt(concat!("IF 1 THEN", " SELECT 1;", " SELECT 2;", " END IF")); + dialects.verified_stmt(concat!( + "IF (1) THEN", + " SELECT 1;", + " SELECT 2;", + " END IF" + )); + dialects.verified_stmt("IF 1 THEN END IF"); + dialects.verified_stmt("IF 1 THEN SELECT 1; ELSEIF 1 THEN END IF"); + + assert_eq!( + ParserError::ParserError("Expected: IF, found: EOF".to_string()), + dialects + .parse_sql_statements("IF 1 THEN SELECT 1; ELSEIF 1 THEN SELECT 2; END") + .unwrap_err() + ); +} + +#[test] +fn test_if_statement_span() { + let sql = "IF 1=1 THEN SELECT 1; ELSEIF 1=2 THEN SELECT 2; ELSE SELECT 3; END IF"; + let mut parser = Parser::new(&GenericDialect {}).try_with_sql(sql).unwrap(); + assert_eq!( + parser.parse_statement().unwrap().span(), + Span::new(Location::new(1, 1), Location::new(1, sql.len() as u64 + 1)) + ); +} + +#[test] +fn test_if_statement_multiline_span() { + let sql_line1 = "IF 1 = 1 THEN SELECT 1;"; + let sql_line2 = "ELSEIF 1 = 2 THEN SELECT 2;"; + let sql_line3 = "ELSE SELECT 3;"; + let sql_line4 = "END IF"; + let sql = [sql_line1, sql_line2, sql_line3, sql_line4].join("\n"); + let mut parser = Parser::new(&GenericDialect {}).try_with_sql(&sql).unwrap(); + assert_eq!( + parser.parse_statement().unwrap().span(), + Span::new( + Location::new(1, 1), + Location::new(4, sql_line4.len() as u64 + 1) + ) + ); +} + +#[test] +fn test_conditional_statement_span() { + let sql = "IF 1=1 THEN SELECT 1; ELSEIF 1=2 THEN SELECT 2; ELSE SELECT 3; END IF"; + let mut parser = Parser::new(&GenericDialect {}).try_with_sql(sql).unwrap(); + match parser.parse_statement().unwrap() { + Statement::If(IfStatement { + if_block, + elseif_blocks, + else_block, + .. + }) => { + assert_eq!( + Span::new(Location::new(1, 1), Location::new(1, 21)), + if_block.span() + ); + assert_eq!( + Span::new(Location::new(1, 23), Location::new(1, 47)), + elseif_blocks[0].span() + ); + assert_eq!( + Span::new(Location::new(1, 49), Location::new(1, 62)), + else_block.unwrap().span() + ); + } + stmt => panic!("Unexpected statement: {stmt:?}"), + } +} + +#[test] +fn parse_raise_statement() { + let sql = "RAISE USING MESSAGE = 42"; + let Statement::Raise(stmt) = verified_stmt(sql) else { + unreachable!() + }; + assert_eq!( + Some(RaiseStatementValue::UsingMessage(Expr::value(number("42")))), + stmt.value + ); + + verified_stmt("RAISE USING MESSAGE = 'error'"); + verified_stmt("RAISE myerror"); + verified_stmt("RAISE 42"); + verified_stmt("RAISE using"); + verified_stmt("RAISE"); + + assert_eq!( + ParserError::ParserError("Expected: =, found: error".to_string()), + parse_sql_statements("RAISE USING MESSAGE error").unwrap_err() + ); +} + #[test] fn test_lambdas() { let dialects = all_dialects_where(|d| d.supports_lambda_functions()); @@ -14149,6 +14764,8 @@ fn test_lambdas() { Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), body: Box::new(Expr::Case { + case_token: AttachedToken::empty(), + end_token: AttachedToken::empty(), operand: None, conditions: vec![ CaseWhen { @@ -14213,6 +14830,7 @@ fn test_select_from_first() { distinct: None, top: None, projection, + exclude: None, top_before_distinct: false, into: None, from: vec![TableWithJoins { @@ -14239,14 +14857,13 @@ fn test_select_from_first() { flavor, }))), order_by: None, - limit: None, - offset: None, + limit_clause: None, fetch: None, locks: vec![], - limit_by: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }; assert_eq!(expected, ast); assert_eq!(ast.to_string(), q); @@ -14306,13 +14923,16 @@ fn test_geometric_unary_operators() { } #[test] -fn test_geomtery_type() { +fn test_geometry_type() { let sql = "point '1,2'"; assert_eq!( all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::Point), - value: Value::SingleQuotedString("1,2".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2".to_string()), + span: Span::empty(), + }, } ); @@ -14321,7 +14941,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::Line), - value: Value::SingleQuotedString("1,2,3,4".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3,4".to_string()), + span: Span::empty(), + }, } ); @@ -14330,7 +14953,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::GeometricPath), - value: Value::SingleQuotedString("1,2,3,4".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3,4".to_string()), + span: Span::empty(), + }, } ); let sql = "box '1,2,3,4'"; @@ -14338,7 +14964,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::GeometricBox), - value: Value::SingleQuotedString("1,2,3,4".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3,4".to_string()), + span: Span::empty(), + }, } ); @@ -14347,7 +14976,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::Circle), - value: Value::SingleQuotedString("1,2,3".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3".to_string()), + span: Span::empty(), + }, } ); @@ -14356,7 +14988,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::Polygon), - value: Value::SingleQuotedString("1,2,3,4".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3,4".to_string()), + span: Span::empty(), + }, } ); let sql = "lseg '1,2,3,4'"; @@ -14364,7 +14999,10 @@ fn test_geomtery_type() { all_dialects_where(|d| d.supports_geometric_types()).verified_expr(sql), Expr::TypedString { data_type: DataType::GeometricType(GeometricTypeKind::LineSegment), - value: Value::SingleQuotedString("1,2,3,4".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("1,2,3,4".to_string()), + span: Span::empty(), + }, } ); } @@ -14617,3 +15255,881 @@ fn parse_array_type_def_with_brackets() { dialects.verified_stmt("SELECT x::INT[]"); dialects.verified_stmt("SELECT STRING_TO_ARRAY('1,2,3', ',')::INT[3]"); } + +#[test] +fn parse_set_names() { + let dialects = all_dialects_where(|d| d.supports_set_names()); + dialects.verified_stmt("SET NAMES 'UTF8'"); + dialects.verified_stmt("SET NAMES 'utf8'"); + dialects.verified_stmt("SET NAMES UTF8 COLLATE bogus"); +} + +#[test] +fn parse_pipeline_operator() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + + // select pipe operator + dialects.verified_stmt("SELECT * FROM users |> SELECT id"); + dialects.verified_stmt("SELECT * FROM users |> SELECT id, name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> SELECT id user_id", + "SELECT * FROM users |> SELECT id AS user_id", + ); + dialects.verified_stmt("SELECT * FROM users |> SELECT id AS user_id"); + + // extend pipe operator + dialects.verified_stmt("SELECT * FROM users |> EXTEND id + 1 AS new_id"); + dialects.verified_stmt("SELECT * FROM users |> EXTEND id AS new_id, name AS new_name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> EXTEND id user_id", + "SELECT * FROM users |> EXTEND id AS user_id", + ); + + // set pipe operator + dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1"); + dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1, name = name + ' Doe'"); + + // drop pipe operator + dialects.verified_stmt("SELECT * FROM users |> DROP id"); + dialects.verified_stmt("SELECT * FROM users |> DROP id, name"); + + // as pipe operator + dialects.verified_stmt("SELECT * FROM users |> AS new_users"); + + // limit pipe operator + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 OFFSET 5"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> LIMIT 5"); + dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> WHERE true"); + + // where pipe operator + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1"); + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 AND name = 'John'"); + dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 OR name = 'John'"); + + // aggregate pipe operator full table + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*)"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> AGGREGATE COUNT(*) total_users", + "SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users", + ); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*), MIN(id)"); + + // aggregate pipe opeprator with grouping + dialects.verified_stmt( + "SELECT * FROM users |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + ); + dialects.verified_stmt( + "SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + ); + dialects + .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) ASC"); + + // order by pipe operator + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC"); + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); + dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); + + // tablesample pipe operator + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + + // rename pipe operator + dialects.verified_stmt("SELECT * FROM users |> RENAME old_name AS new_name"); + dialects.verified_stmt("SELECT * FROM users |> RENAME id AS user_id, name AS user_name"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> RENAME id user_id", + "SELECT * FROM users |> RENAME id AS user_id", + ); + + // union pipe operator + dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); + + // union pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", + ); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); + dialects.verified_stmt( + "SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // union pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); + + // union pipe operator with BY NAME and multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); + + // intersect pipe operator with BY NAME modifier + dialects + .verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); + + // intersect pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // intersect pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + + // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); + + // except pipe operator with BY NAME modifier + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); + + // except pipe operator with multiple queries + dialects.verified_stmt( + "SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + ); + + // except pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); + + // call pipe operator + dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); + dialects.verified_stmt( + "SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')", + ); + + // call pipe operator with complex arguments + dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); + dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); + + // call pipe operator with aliases + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); + dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); + dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); + + // multiple call pipe operators in sequence + dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); + dialects.verified_stmt( + "SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)", + ); + + // multiple call pipe operators with aliases + dialects.verified_stmt( + "SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", + ); + + // call pipe operators mixed with other pipe operators + dialects.verified_stmt( + "SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", + ); + dialects.verified_stmt( + "SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", + ); + + // pivot pipe operator + dialects.verified_stmt( + "SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", + ); + dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); + + // pivot pipe operator with multiple aggregate functions + dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); + + // pivot pipe operator with compound column names + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); + + // pivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); + + // pivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); + dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); + dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); + + // pivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", + "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", + "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + ); + + // unpivot pipe operator basic usage + dialects + .verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); + dialects.verified_stmt( + "SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", + ); + + // unpivot pipe operator with multiple columns + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); + dialects.verified_stmt( + "SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))", + ); + + // unpivot pipe operator mixed with other pipe operators + dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + + // unpivot pipe operator with aliases + dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); + dialects.verified_stmt( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); + dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); + + // unpivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_query_with_canonical( + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", + "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", + "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + ); + + // many pipes + dialects.verified_stmt( + "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", + ); + + // join pipe operator - INNER JOIN + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); + + // join pipe operator - LEFT JOIN + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt( + "SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - RIGHT JOIN + dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt( + "SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - FULL JOIN + dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", + ); + + // join pipe operator - CROSS JOIN + dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); + + // join pipe operator with USING + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders USING (user_id)", + "SELECT * FROM users |> JOIN orders USING(user_id)", + ); + dialects.verified_query_with_canonical( + "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", + "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", + ); + + // join pipe operator with alias + dialects.verified_query_with_canonical( + "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", + "SELECT * FROM users |> JOIN orders AS o ON users.id = o.user_id", + ); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); + + // join pipe operator with complex ON condition + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); + + // multiple join pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + + // join pipe operator with other pipe operators + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +} + +#[test] +fn parse_pipeline_operator_negative_tests() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + + // Test that plain EXCEPT without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT ALL fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT ALL (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> EXCEPT BY NAME (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that EXCEPT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("EXCEPT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements( + "SELECT * FROM users |> EXCEPT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() + ); + + // Test that plain INTERSECT without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT ALL fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT ALL (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT BY NAME without DISTINCT fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements("SELECT * FROM users |> INTERSECT BY NAME (SELECT * FROM admins)") + .unwrap_err() + ); + + // Test that INTERSECT ALL BY NAME fails + assert_eq!( + ParserError::ParserError("INTERSECT pipe operator requires DISTINCT modifier".to_string()), + dialects + .parse_sql_statements( + "SELECT * FROM users |> INTERSECT ALL BY NAME (SELECT * FROM admins)" + ) + .unwrap_err() + ); + + // Test that CALL without function name fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL") + .is_err()); + + // Test that CALL without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function") + .is_err()); + + // Test that CALL with invalid function syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL 123invalid") + .is_err()); + + // Test that CALL with malformed arguments fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function(,)") + .is_err()); + + // Test that CALL with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CALL my_function() AS") + .is_err()); + + // Test that PIVOT without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT SUM(amount) FOR month IN ('Jan')") + .is_err()); + + // Test that PIVOT without FOR keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) month IN ('Jan'))") + .is_err()); + + // Test that PIVOT without IN keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month ('Jan'))") + .is_err()); + + // Test that PIVOT with empty IN list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ())") + .is_err()); + + // Test that PIVOT with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> PIVOT(SUM(amount) FOR month IN ('Jan')) AS") + .is_err()); + + // Test UNPIVOT negative cases + + // Test that UNPIVOT without parentheses fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT value FOR name IN col1, col2") + .is_err()); + + // Test that UNPIVOT without FOR keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value name IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT without IN keyword fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name (col1, col2))") + .is_err()); + + // Test that UNPIVOT with missing value column fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(FOR name IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT with missing name column fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR IN (col1, col2))") + .is_err()); + + // Test that UNPIVOT with empty IN list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN ())") + .is_err()); + + // Test that UNPIVOT with invalid alias syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)) AS") + .is_err()); + + // Test that UNPIVOT with missing closing parenthesis fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> UNPIVOT(value FOR name IN (col1, col2)") + .is_err()); + + // Test that JOIN without table name fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN ON users.id = orders.user_id") + .is_err()); + + // Test that CROSS JOIN with ON condition fails + assert!(dialects + .parse_sql_statements( + "SELECT * FROM users |> CROSS JOIN orders ON users.id = orders.user_id" + ) + .is_err()); + + // Test that CROSS JOIN with USING condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> CROSS JOIN orders USING (user_id)") + .is_err()); + + // Test that JOIN with empty USING list fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING ()") + .is_err()); + + // Test that JOIN with malformed ON condition fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders ON") + .is_err()); + + // Test that JOIN with invalid USING syntax fails + assert!(dialects + .parse_sql_statements("SELECT * FROM users |> JOIN orders USING user_id") + .is_err()); +} + +#[test] +fn parse_multiple_set_statements() -> Result<(), ParserError> { + let dialects = all_dialects_where(|d| d.supports_comma_separated_set_assignments()); + let stmt = dialects.verified_stmt("SET @a = 1, b = 2"); + + match stmt { + Statement::Set(Set::MultipleAssignments { assignments }) => { + assert_eq!( + assignments, + vec![ + SetAssignment { + scope: None, + name: ObjectName::from(vec!["@a".into()]), + value: Expr::value(number("1")) + }, + SetAssignment { + scope: None, + name: ObjectName::from(vec!["b".into()]), + value: Expr::value(number("2")) + } + ] + ); + } + _ => panic!("Expected SetVariable with 2 variables and 2 values"), + }; + + let stmt = dialects.verified_stmt("SET GLOBAL @a = 1, SESSION b = 2, LOCAL c = 3, d = 4"); + + match stmt { + Statement::Set(Set::MultipleAssignments { assignments }) => { + assert_eq!( + assignments, + vec![ + SetAssignment { + scope: Some(ContextModifier::Global), + name: ObjectName::from(vec!["@a".into()]), + value: Expr::value(number("1")) + }, + SetAssignment { + scope: Some(ContextModifier::Session), + name: ObjectName::from(vec!["b".into()]), + value: Expr::value(number("2")) + }, + SetAssignment { + scope: Some(ContextModifier::Local), + name: ObjectName::from(vec!["c".into()]), + value: Expr::value(number("3")) + }, + SetAssignment { + scope: None, + name: ObjectName::from(vec!["d".into()]), + value: Expr::value(number("4")) + } + ] + ); + } + _ => panic!("Expected MultipleAssignments with 4 scoped variables and 4 values"), + }; + + Ok(()) +} + +#[test] +fn parse_set_time_zone_alias() { + match all_dialects().verified_stmt("SET TIME ZONE 'UTC'") { + Statement::Set(Set::SetTimeZone { local, value }) => { + assert!(!local); + assert_eq!( + value, + Expr::Value((Value::SingleQuotedString("UTC".into())).with_empty_span()) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_return() { + let stmt = all_dialects().verified_stmt("RETURN"); + assert_eq!(stmt, Statement::Return(ReturnStatement { value: None })); + + let _ = all_dialects().verified_stmt("RETURN 1"); +} + +#[test] +fn parse_subquery_limit() { + let _ = all_dialects().verified_stmt("SELECT t1_id, t1_name FROM t1 WHERE t1_id IN (SELECT t2_id FROM t2 WHERE t1_name = t2_name LIMIT 10)"); +} + +#[test] +fn test_open() { + let open_cursor = "OPEN Employee_Cursor"; + let stmt = all_dialects().verified_stmt(open_cursor); + assert_eq!( + stmt, + Statement::Open(OpenStatement { + cursor_name: Ident::new("Employee_Cursor"), + }) + ); +} + +#[test] +fn parse_truncate_only() { + let truncate = all_dialects().verified_stmt("TRUNCATE TABLE employee, ONLY dept"); + + let table_names = vec![ + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("employee")]), + only: false, + }, + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("dept")]), + only: true, + }, + ]; + + assert_eq!( + Statement::Truncate { + table_names, + partitions: None, + table: true, + identity: None, + cascade: None, + on_cluster: None, + }, + truncate + ); +} + +#[test] +fn check_enforced() { + all_dialects().verified_stmt( + "CREATE TABLE t (a INT, b INT, c INT, CHECK (a > 0) NOT ENFORCED, CHECK (b > 0) ENFORCED, CHECK (c > 0))", + ); +} + +#[test] +fn join_precedence() { + all_dialects_except(|d| !d.supports_left_associative_joins_without_parens()) + .verified_query_with_canonical( + "SELECT * + FROM t1 + NATURAL JOIN t5 + INNER JOIN t0 ON (t0.v1 + t5.v0) > 0 + WHERE t0.v1 = t1.v0", + // canonical string without parentheses + "SELECT * FROM t1 NATURAL JOIN t5 INNER JOIN t0 ON (t0.v1 + t5.v0) > 0 WHERE t0.v1 = t1.v0", + ); + all_dialects_except(|d| d.supports_left_associative_joins_without_parens()).verified_query_with_canonical( + "SELECT * + FROM t1 + NATURAL JOIN t5 + INNER JOIN t0 ON (t0.v1 + t5.v0) > 0 + WHERE t0.v1 = t1.v0", + // canonical string with parentheses + "SELECT * FROM t1 NATURAL JOIN (t5 INNER JOIN t0 ON (t0.v1 + t5.v0) > 0) WHERE t0.v1 = t1.v0", + ); +} + +#[test] +fn parse_create_procedure_with_language() { + let sql = r#"CREATE PROCEDURE test_proc LANGUAGE sql AS BEGIN SELECT 1; END"#; + match verified_stmt(sql) { + Statement::CreateProcedure { + or_alter, + name, + params, + language, + .. + } => { + assert_eq!(or_alter, false); + assert_eq!(name.to_string(), "test_proc"); + assert_eq!(params, Some(vec![])); + assert_eq!( + language, + Some(Ident { + value: "sql".into(), + quote_style: None, + span: Span { + start: Location::empty(), + end: Location::empty() + } + }) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_procedure_with_parameter_modes() { + let sql = r#"CREATE PROCEDURE test_proc (IN a INTEGER, OUT b TEXT, INOUT c TIMESTAMP, d BOOL) AS BEGIN SELECT 1; END"#; + match verified_stmt(sql) { + Statement::CreateProcedure { + or_alter, + name, + params, + .. + } => { + assert_eq!(or_alter, false); + assert_eq!(name.to_string(), "test_proc"); + let fake_span = Span { + start: Location { line: 0, column: 0 }, + end: Location { line: 0, column: 0 }, + }; + assert_eq!( + params, + Some(vec![ + ProcedureParam { + name: Ident { + value: "a".into(), + quote_style: None, + span: fake_span, + }, + data_type: DataType::Integer(None), + mode: Some(ArgMode::In) + }, + ProcedureParam { + name: Ident { + value: "b".into(), + quote_style: None, + span: fake_span, + }, + data_type: DataType::Text, + mode: Some(ArgMode::Out) + }, + ProcedureParam { + name: Ident { + value: "c".into(), + quote_style: None, + span: fake_span, + }, + data_type: DataType::Timestamp(None, TimezoneInfo::None), + mode: Some(ArgMode::InOut) + }, + ProcedureParam { + name: Ident { + value: "d".into(), + quote_style: None, + span: fake_span, + }, + data_type: DataType::Bool, + mode: None + }, + ]) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_select_exclude() { + let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude()); + match &dialects + .verified_only_select("SELECT * EXCLUDE c1 FROM test") + .projection[0] + { + SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Single(Ident::new("c1"))) + ); + } + _ => unreachable!(), + } + match &dialects + .verified_only_select("SELECT * EXCLUDE (c1, c2) FROM test") + .projection[0] + { + SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Multiple(vec![ + Ident::new("c1"), + Ident::new("c2") + ])) + ); + } + _ => unreachable!(), + } + let select = dialects.verified_only_select("SELECT * EXCLUDE c1, c2 FROM test"); + match &select.projection[0] { + SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Single(Ident::new("c1"))) + ); + } + _ => unreachable!(), + } + match &select.projection[1] { + SelectItem::UnnamedExpr(Expr::Identifier(ident)) => { + assert_eq!(*ident, Ident::new("c2")); + } + _ => unreachable!(), + } + + let dialects = all_dialects_where(|d| d.supports_select_exclude()); + let select = dialects.verified_only_select("SELECT *, c1 EXCLUDE c1 FROM test"); + match &select.projection[0] { + SelectItem::Wildcard(additional_options) => { + assert_eq!(*additional_options, WildcardAdditionalOptions::default()); + } + _ => unreachable!(), + } + assert_eq!( + select.exclude, + Some(ExcludeSelectItem::Single(Ident::new("c1"))) + ); + + let dialects = all_dialects_where(|d| { + d.supports_select_wildcard_exclude() && !d.supports_select_exclude() + }); + let select = dialects.verified_only_select("SELECT * EXCLUDE c1 FROM test"); + match &select.projection[0] { + SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Single(Ident::new("c1"))) + ); + } + _ => unreachable!(), + } + + // Dialects that only support the wildcard form and do not accept EXCLUDE as an implicity alias + // will fail when encountered with the `c2` ident + let dialects = all_dialects_where(|d| { + d.supports_select_wildcard_exclude() + && !d.supports_select_exclude() + && d.is_column_alias(&Keyword::EXCLUDE, &mut Parser::new(d)) + }); + assert_eq!( + dialects + .parse_sql_statements("SELECT *, c1 EXCLUDE c2 FROM test") + .err() + .unwrap(), + ParserError::ParserError("Expected: end of statement, found: c2".to_string()) + ); + + // Dialects that only support the wildcard form and accept EXCLUDE as an implicity alias + // will fail when encountered with the `EXCLUDE` keyword + let dialects = all_dialects_where(|d| { + d.supports_select_wildcard_exclude() + && !d.supports_select_exclude() + && !d.is_column_alias(&Keyword::EXCLUDE, &mut Parser::new(d)) + }); + assert_eq!( + dialects + .parse_sql_statements("SELECT *, c1 EXCLUDE c2 FROM test") + .err() + .unwrap(), + ParserError::ParserError("Expected: end of statement, found: EXCLUDE".to_string()) + ); +} + +#[test] +fn test_no_semicolon_required_between_statements() { + let sql = r#" +SELECT * FROM tbl1 +SELECT * FROM tbl2 + "#; + + let dialects = all_dialects_with_options(ParserOptions { + trailing_commas: false, + unescape: true, + require_semicolon_stmt_delimiter: false, + }); + let stmts = dialects.parse_sql_statements(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(stmts.iter().all(|s| matches!(s, Statement::Query { .. }))); +} diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 3b36d7a1..a27e0699 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. +use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::ast::*; use sqlparser::dialect::{DatabricksDialect, GenericDialect}; use sqlparser::parser::ParserError; +use sqlparser::tokenizer::Span; use test_utils::*; #[macro_use] @@ -108,6 +110,8 @@ fn test_databricks_lambdas() { Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), body: Box::new(Expr::Case { + case_token: AttachedToken::empty(), + end_token: AttachedToken::empty(), operand: None, conditions: vec![ CaseWhen { @@ -210,7 +214,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - databricks().verified_stmt(&format!("USE {}", object_name)), + databricks().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -218,7 +222,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - databricks().verified_stmt(&format!("USE {0}{1}{0}", quote, object_name)), + databricks().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -230,21 +234,21 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with keyword and different type of quotes assert_eq!( - databricks().verified_stmt(&format!("USE CATALOG {0}my_catalog{0}", quote)), + databricks().verified_stmt(&format!("USE CATALOG {quote}my_catalog{quote}")), Statement::Use(Use::Catalog(ObjectName::from(vec![Ident::with_quote( quote, "my_catalog".to_string(), )]))) ); assert_eq!( - databricks().verified_stmt(&format!("USE DATABASE {0}my_database{0}", quote)), + databricks().verified_stmt(&format!("USE DATABASE {quote}my_database{quote}")), Statement::Use(Use::Database(ObjectName::from(vec![Ident::with_quote( quote, "my_database".to_string(), )]))) ); assert_eq!( - databricks().verified_stmt(&format!("USE SCHEMA {0}my_schema{0}", quote)), + databricks().verified_stmt(&format!("USE SCHEMA {quote}my_schema{quote}")), Statement::Use(Use::Schema(ObjectName::from(vec![Ident::with_quote( quote, "my_schema".to_string(), @@ -317,3 +321,46 @@ fn parse_databricks_struct_function() { }) ); } + +#[test] +fn data_type_timestamp_ntz() { + // Literal + assert_eq!( + databricks().verified_expr("TIMESTAMP_NTZ '2025-03-29T18:52:00'"), + Expr::TypedString { + data_type: DataType::TimestampNtz, + value: ValueWithSpan { + value: Value::SingleQuotedString("2025-03-29T18:52:00".to_owned()), + span: Span::empty(), + } + } + ); + + // Cast + assert_eq!( + databricks().verified_expr("(created_at)::TIMESTAMP_NTZ"), + Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Nested(Box::new(Expr::Identifier( + "created_at".into() + )))), + data_type: DataType::TimestampNtz, + format: None + } + ); + + // Column definition + match databricks().verified_stmt("CREATE TABLE foo (x TIMESTAMP_NTZ)") { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ColumnDef { + name: "x".into(), + data_type: DataType::TimestampNtz, + options: vec![], + }] + ); + } + s => panic!("Unexpected statement: {s:?}"), + } +} diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index bed02428..fe14b7ba 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -24,6 +24,7 @@ use test_utils::*; use sqlparser::ast::*; use sqlparser::dialect::{DuckDbDialect, GenericDialect}; +use sqlparser::parser::ParserError; fn duckdb() -> TestedDialects { TestedDialects::new(vec![Box::new(DuckDbDialect {})]) @@ -44,10 +45,12 @@ fn test_struct() { StructField { field_name: Some(Ident::new("v")), field_type: DataType::Varchar(None), + options: None, }, StructField { field_name: Some(Ident::new("i")), field_type: DataType::Integer(None), + options: None, }, ], StructBracketKind::Parentheses, @@ -84,6 +87,7 @@ fn test_struct() { StructField { field_name: Some(Ident::new("v")), field_type: DataType::Varchar(None), + options: None, }, StructField { field_name: Some(Ident::new("s")), @@ -92,14 +96,17 @@ fn test_struct() { StructField { field_name: Some(Ident::new("a1")), field_type: DataType::Integer(None), + options: None, }, StructField { field_name: Some(Ident::new("a2")), field_type: DataType::Varchar(None), + options: None, }, ], StructBracketKind::Parentheses, ), + options: None, }, ], StructBracketKind::Parentheses, @@ -262,6 +269,7 @@ fn test_select_union_by_name() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], + exclude: None, top_before_distinct: false, into: None, from: vec![TableWithJoins { @@ -292,6 +300,7 @@ fn test_select_union_by_name() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], + exclude: None, top_before_distinct: false, into: None, from: vec![TableWithJoins { @@ -352,6 +361,32 @@ fn test_duckdb_load_extension() { ); } +#[test] +fn test_duckdb_specific_int_types() { + let duckdb_dtypes = vec![ + ("UTINYINT", DataType::UTinyInt), + ("USMALLINT", DataType::USmallInt), + ("UBIGINT", DataType::UBigInt), + ("UHUGEINT", DataType::UHugeInt), + ("HUGEINT", DataType::HugeInt), + ]; + for (dtype_string, data_type) in duckdb_dtypes { + let sql = format!("SELECT 123::{dtype_string}"); + let select = duckdb().verified_only_select(&sql); + assert_eq!( + &Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value( + Value::Number("123".parse().unwrap(), false).with_empty_span() + )), + data_type: data_type.clone(), + format: None, + }, + expr_from_projection(&select.projection[0]) + ); + } +} + #[test] fn test_duckdb_struct_literal() { //struct literal syntax https://duckdb.org/docs/sql/data_types/struct#creating-structs @@ -709,19 +744,13 @@ fn test_duckdb_union_datatype() { storage: Default::default(), location: Default::default() }), - table_properties: Default::default(), - with_options: Default::default(), file_format: Default::default(), location: Default::default(), query: Default::default(), without_rowid: Default::default(), like: Default::default(), clone: Default::default(), - engine: Default::default(), comment: Default::default(), - auto_increment_offset: Default::default(), - default_charset: Default::default(), - collation: Default::default(), on_commit: Default::default(), on_cluster: Default::default(), primary_key: Default::default(), @@ -729,7 +758,7 @@ fn test_duckdb_union_datatype() { partition_by: Default::default(), cluster_by: Default::default(), clustered_by: Default::default(), - options: Default::default(), + inherits: Default::default(), strict: Default::default(), copy_grants: Default::default(), enable_schema_evolution: Default::default(), @@ -745,6 +774,7 @@ fn test_duckdb_union_datatype() { catalog: Default::default(), catalog_sync: Default::default(), storage_serialization_policy: Default::default(), + table_options: CreateTableOptions::None }), stmt ); @@ -765,7 +795,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - duckdb().verified_stmt(&format!("USE {}", object_name)), + duckdb().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -773,7 +803,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - duckdb().verified_stmt(&format!("USE {0}{1}{0}", quote, object_name)), + duckdb().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -785,7 +815,9 @@ fn parse_use() { for "e in "e_styles { // Test double identifier with different type of quotes assert_eq!( - duckdb().verified_stmt(&format!("USE {0}CATALOG{0}.{0}my_schema{0}", quote)), + duckdb().verified_stmt(&format!( + "USE {quote}CATALOG{quote}.{quote}my_schema{quote}" + )), Statement::Use(Use::Object(ObjectName::from(vec![ Ident::with_quote(quote, "CATALOG"), Ident::with_quote(quote, "my_schema") @@ -801,3 +833,32 @@ fn parse_use() { ]))) ); } + +#[test] +fn test_duckdb_trim() { + let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(duckdb().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = duckdb().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value( + Value::SingleQuotedString("xyz".to_owned()).with_empty_span() + )), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value( + Value::SingleQuotedString("a".to_owned()).with_empty_span() + )]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), + duckdb().parse_sql_statements(error_sql).unwrap_err() + ); +} diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index d7f3c014..56a72ec8 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -22,11 +22,10 @@ use sqlparser::ast::{ ClusteredBy, CommentDef, CreateFunction, CreateFunctionBody, CreateFunctionUsing, CreateTable, - Expr, Function, FunctionArgumentList, FunctionArguments, Ident, ObjectName, - OneOrManyWithParens, OrderByExpr, OrderByOptions, SelectItem, Statement, TableFactor, - UnaryOperator, Use, Value, + Expr, Function, FunctionArgumentList, FunctionArguments, Ident, ObjectName, OrderByExpr, + OrderByOptions, SelectItem, Set, Statement, TableFactor, UnaryOperator, Use, Value, }; -use sqlparser::dialect::{GenericDialect, HiveDialect, MsSqlDialect}; +use sqlparser::dialect::{AnsiDialect, GenericDialect, HiveDialect}; use sqlparser::parser::ParserError; use sqlparser::test_utils::*; @@ -92,7 +91,7 @@ fn parse_msck() { } #[test] -fn parse_set() { +fn parse_set_hivevar() { let set = "SET HIVEVAR:name = a, b, c_d"; hive().verified_stmt(set); } @@ -134,9 +133,7 @@ fn create_table_with_comment() { Statement::CreateTable(CreateTable { comment, .. }) => { assert_eq!( comment, - Some(CommentDef::AfterColumnDefsWithoutEq( - "table comment".to_string() - )) + Some(CommentDef::WithoutEq("table comment".to_string())) ) } _ => unreachable!(), @@ -344,6 +341,9 @@ fn lateral_view() { fn sort_by() { let sort_by = "SELECT * FROM db.table SORT BY a"; hive().verified_stmt(sort_by); + + let sort_by_with_direction = "SELECT * FROM db.table SORT BY a, b DESC"; + hive().verified_stmt(sort_by_with_direction); } #[test] @@ -369,20 +369,20 @@ fn from_cte() { fn set_statement_with_minus() { assert_eq!( hive().verified_stmt("SET hive.tez.java.opts = -Xmx4g"), - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![ + variable: ObjectName::from(vec![ Ident::new("hive"), Ident::new("tez"), Ident::new("java"), Ident::new("opts") - ])), - value: vec![Expr::UnaryOp { + ]), + values: vec![Expr::UnaryOp { op: UnaryOperator::Minus, expr: Box::new(Expr::Identifier(Ident::new("Xmx4g"))) }], - } + }) ); assert_eq!( @@ -424,7 +424,7 @@ fn parse_create_function() { } // Test error in dialect that doesn't support parsing CREATE FUNCTION - let unsupported_dialects = TestedDialects::new(vec![Box::new(MsSqlDialect {})]); + let unsupported_dialects = TestedDialects::new(vec![Box::new(AnsiDialect {})]); assert_eq!( unsupported_dialects.parse_sql_statements(sql).unwrap_err(), @@ -524,7 +524,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - hive().verified_stmt(&format!("USE {}", object_name)), + hive().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -532,7 +532,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - hive().verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), + hive().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ec565e50..50c6448d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -23,7 +23,8 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::tokenizer::Span; +use sqlparser::keywords::Keyword; +use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Word}; use test_utils::*; use sqlparser::ast::DataType::{Int, Text, Varbinary}; @@ -31,7 +32,7 @@ use sqlparser::ast::DeclareAssignment::MsSqlAssignment; use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MsSqlDialect}; -use sqlparser::parser::ParserError; +use sqlparser::parser::{Parser, ParserError, ParserOptions}; #[test] fn parse_mssql_identifiers() { @@ -99,49 +100,53 @@ fn parse_mssql_delimited_identifiers() { #[test] fn parse_create_procedure() { - let sql = "CREATE OR ALTER PROCEDURE test (@foo INT, @bar VARCHAR(256)) AS BEGIN SELECT 1 END"; + let sql = "CREATE OR ALTER PROCEDURE test (@foo INT, @bar VARCHAR(256)) AS BEGIN SELECT 1; END"; assert_eq!( ms().verified_stmt(sql), Statement::CreateProcedure { or_alter: true, - body: vec![Statement::Query(Box::new(Query { - with: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - order_by: None, - settings: None, - format_clause: None, - body: Box::new(SetExpr::Select(Box::new(Select { - select_token: AttachedToken::empty(), - distinct: None, - top: None, - top_before_distinct: false, - projection: vec![SelectItem::UnnamedExpr(Expr::Value( - (number("1")).with_empty_span() - ))], - into: None, - from: vec![], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - window_before_qualify: false, - qualify: None, - value_table_mode: None, - connect_by: None, - flavor: SelectFlavor::Standard, - }))) - }))], + body: ConditionalStatements::BeginEnd(BeginEndStatements { + begin_token: AttachedToken::empty(), + statements: vec![Statement::Query(Box::new(Query { + with: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + order_by: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![SelectItem::UnnamedExpr(Expr::Value( + (number("1")).with_empty_span() + ))], + exclude: None, + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }))) + }))], + end_token: AttachedToken::empty(), + }), params: Some(vec![ ProcedureParam { name: Ident { @@ -149,7 +154,8 @@ fn parse_create_procedure() { quote_style: None, span: Span::empty(), }, - data_type: DataType::Int(None) + data_type: DataType::Int(None), + mode: None, }, ProcedureParam { name: Ident { @@ -160,33 +166,259 @@ fn parse_create_procedure() { data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { length: 256, unit: None - })) + })), + mode: None, } ]), name: ObjectName::from(vec![Ident { value: "test".into(), quote_style: None, span: Span::empty(), - }]) + }]), + language: None, } ) } #[test] fn parse_mssql_create_procedure() { - let _ = ms_and_generic().verified_stmt("CREATE OR ALTER PROCEDURE foo AS BEGIN SELECT 1 END"); - let _ = ms_and_generic().verified_stmt("CREATE PROCEDURE foo AS BEGIN SELECT 1 END"); + let _ = ms_and_generic().verified_stmt("CREATE OR ALTER PROCEDURE foo AS SELECT 1;"); + let _ = ms_and_generic().verified_stmt("CREATE OR ALTER PROCEDURE foo AS BEGIN SELECT 1; END"); + let _ = ms_and_generic().verified_stmt("CREATE PROCEDURE foo AS BEGIN SELECT 1; END"); let _ = ms().verified_stmt( - "CREATE PROCEDURE foo AS BEGIN SELECT [myColumn] FROM [myschema].[mytable] END", + "CREATE PROCEDURE foo AS BEGIN SELECT [myColumn] FROM [myschema].[mytable]; END", ); let _ = ms_and_generic().verified_stmt( - "CREATE PROCEDURE foo (@CustomerName NVARCHAR(50)) AS BEGIN SELECT * FROM DEV END", + "CREATE PROCEDURE foo (@CustomerName NVARCHAR(50)) AS BEGIN SELECT * FROM DEV; END", ); - let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test' END"); + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; END"); // Test a statement with END in it - let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN SELECT [foo], CASE WHEN [foo] IS NULL THEN 'empty' ELSE 'notempty' END AS [foo] END"); + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN SELECT [foo], CASE WHEN [foo] IS NULL THEN 'empty' ELSE 'notempty' END AS [foo]; END"); // Multiple statements - let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); + let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10; END"); +} + +#[test] +fn parse_create_function() { + let return_expression_function = "CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) RETURNS INT AS BEGIN RETURN 1; END"; + assert_eq!( + ms().verified_stmt(return_expression_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident::new("some_scalar_udf")]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident::new("@foo")), + data_type: DataType::Int(None), + default_expr: None, + }, + OperateFunctionArg { + mode: None, + name: Some(Ident::new("@bar")), + data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 256, + unit: None + })), + default_expr: None, + }, + ]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken::empty(), + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Value( + (number("1")).with_empty_span() + ))), + })], + end_token: AttachedToken::empty(), + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }), + ); + + let multi_statement_function = "\ + CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ + RETURNS INT \ + AS \ + BEGIN \ + SET @foo = @foo + 1; \ + RETURN @foo; \ + END\ + "; + let _ = ms().verified_stmt(multi_statement_function); + + let multi_statement_function_without_as = multi_statement_function.replace(" AS", ""); + let _ = ms().one_statement_parses_to( + &multi_statement_function_without_as, + multi_statement_function, + ); + + let create_function_with_conditional = "\ + CREATE FUNCTION some_scalar_udf() \ + RETURNS INT \ + AS \ + BEGIN \ + IF 1 = 2 \ + BEGIN \ + RETURN 1; \ + END; \ + RETURN 0; \ + END\ + "; + let _ = ms().verified_stmt(create_function_with_conditional); + + let create_or_alter_function = "\ + CREATE OR ALTER FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ + RETURNS INT \ + AS \ + BEGIN \ + SET @foo = @foo + 1; \ + RETURN @foo; \ + END\ + "; + let _ = ms().verified_stmt(create_or_alter_function); + + let create_function_with_return_expression = "\ + CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ + RETURNS INT \ + AS \ + BEGIN \ + RETURN CONVERT(INT, 1) + 2; \ + END\ + "; + let _ = ms().verified_stmt(create_function_with_return_expression); + + let create_inline_table_value_function = "\ + CREATE FUNCTION some_inline_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS TABLE \ + AS \ + RETURN (SELECT 1 AS col_1)\ + "; + let _ = ms().verified_stmt(create_inline_table_value_function); + + let create_inline_table_value_function_without_parentheses = "\ + CREATE FUNCTION some_inline_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS TABLE \ + AS \ + RETURN SELECT 1 AS col_1\ + "; + let _ = ms().verified_stmt(create_inline_table_value_function_without_parentheses); + + let create_inline_table_value_function_without_as = + create_inline_table_value_function.replace(" AS", ""); + let _ = ms().one_statement_parses_to( + &create_inline_table_value_function_without_as, + create_inline_table_value_function, + ); + + let create_multi_statement_table_value_function = "\ + CREATE FUNCTION some_multi_statement_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS @t TABLE (col_1 INT) \ + AS \ + BEGIN \ + INSERT INTO @t SELECT 1; \ + RETURN; \ + END\ + "; + let _ = ms().verified_stmt(create_multi_statement_table_value_function); + + let create_multi_statement_table_value_function_without_as = + create_multi_statement_table_value_function.replace(" AS", ""); + let _ = ms().one_statement_parses_to( + &create_multi_statement_table_value_function_without_as, + create_multi_statement_table_value_function, + ); + + let create_multi_statement_table_value_function_with_constraints = "\ + CREATE FUNCTION some_multi_statement_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS @t TABLE (col_1 INT NOT NULL) \ + AS \ + BEGIN \ + INSERT INTO @t SELECT 1; \ + RETURN @t; \ + END\ + "; + let _ = ms().verified_stmt(create_multi_statement_table_value_function_with_constraints); + + let create_multi_statement_tvf_without_table_definition = "\ + CREATE FUNCTION incorrect_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS @t TABLE () + AS \ + BEGIN \ + INSERT INTO @t SELECT 1; \ + RETURN @t; \ + END\ + "; + assert_eq!( + ParserError::ParserError("Unparsable function body".to_owned()), + ms().parse_sql_statements(create_multi_statement_tvf_without_table_definition) + .unwrap_err() + ); + + let create_inline_tvf_without_subquery_or_bare_select = "\ + CREATE FUNCTION incorrect_tvf(@foo INT, @bar VARCHAR(256)) \ + RETURNS TABLE + AS \ + RETURN 'hi'\ + "; + assert_eq!( + ParserError::ParserError( + "Expected a subquery (or bare SELECT statement) after RETURN".to_owned() + ), + ms().parse_sql_statements(create_inline_tvf_without_subquery_or_bare_select) + .unwrap_err() + ); +} + +#[test] +fn parse_create_function_parameter_default_values() { + let single_default_sql = + "CREATE FUNCTION test_func(@param1 INT = 42) RETURNS INT AS BEGIN RETURN @param1; END"; + assert_eq!( + ms().verified_stmt(single_default_sql), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident::new("test_func")]), + args: Some(vec![OperateFunctionArg { + mode: None, + name: Some(Ident::new("@param1")), + data_type: DataType::Int(None), + default_expr: Some(Expr::Value((number("42")).with_empty_span())), + },]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken::empty(), + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Identifier(Ident::new( + "@param1" + )))), + })], + end_token: AttachedToken::empty(), + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }), + ); } #[test] @@ -1135,7 +1367,9 @@ fn parse_substring_in_select() { (number("1")).with_empty_span() ))), special: true, + shorthand: false, })], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident { @@ -1161,14 +1395,13 @@ fn parse_substring_in_select() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), query ); @@ -1254,25 +1487,25 @@ fn parse_mssql_declare() { for_query: None }] }, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("@bar")])), - value: vec![Expr::Value( + variable: ObjectName::from(vec![Ident::new("@bar")]), + values: vec![Expr::Value( (Value::Number("2".parse().unwrap(), false)).with_empty_span() )], - }, + }), Statement::Query(Box::new(Query { with: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, order_by: None, settings: None, format_clause: None, + pipe_operators: vec![], + body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), distinct: None, @@ -1285,6 +1518,7 @@ fn parse_mssql_declare() { (Value::Number("4".parse().unwrap(), false)).with_empty_span() )), })], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1306,6 +1540,89 @@ fn parse_mssql_declare() { ], ast ); + + let declare_cursor_for_select = + "DECLARE vend_cursor CURSOR FOR SELECT * FROM Purchasing.Vendor"; + let _ = ms().verified_stmt(declare_cursor_for_select); +} + +#[test] +fn test_mssql_cursor() { + let full_cursor_usage = "\ + DECLARE Employee_Cursor CURSOR FOR \ + SELECT LastName, FirstName \ + FROM AdventureWorks2022.HumanResources.vEmployee \ + WHERE LastName LIKE 'B%'; \ + \ + OPEN Employee_Cursor; \ + \ + FETCH NEXT FROM Employee_Cursor; \ + \ + WHILE @@FETCH_STATUS = 0 \ + BEGIN \ + FETCH NEXT FROM Employee_Cursor; \ + END; \ + \ + CLOSE Employee_Cursor; \ + DEALLOCATE Employee_Cursor\ + "; + let _ = ms().statements_parse_to(full_cursor_usage, ""); +} + +#[test] +fn test_mssql_while_statement() { + let while_single_statement = "WHILE 1 = 0 PRINT 'Hello World';"; + let stmt = ms().verified_stmt(while_single_statement); + assert_eq!( + stmt, + Statement::While(sqlparser::ast::WhileStatement { + while_block: ConditionalStatementBlock { + start_token: AttachedToken(TokenWithSpan { + token: Token::Word(Word { + value: "WHILE".to_string(), + quote_style: None, + keyword: Keyword::WHILE + }), + span: Span::empty() + }), + condition: Some(Expr::BinaryOp { + left: Box::new(Expr::Value( + (Value::Number("1".parse().unwrap(), false)).with_empty_span() + )), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + (Value::Number("0".parse().unwrap(), false)).with_empty_span() + )), + }), + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![Statement::Print(PrintStatement { + message: Box::new(Expr::Value( + (Value::SingleQuotedString("Hello World".to_string())) + .with_empty_span() + )), + })], + } + } + }) + ); + + let while_begin_end = "\ + WHILE @@FETCH_STATUS = 0 \ + BEGIN \ + FETCH NEXT FROM Employee_Cursor; \ + END\ + "; + let _ = ms().verified_stmt(while_begin_end); + + let while_begin_end_multiple_statements = "\ + WHILE @@FETCH_STATUS = 0 \ + BEGIN \ + FETCH NEXT FROM Employee_Cursor; \ + PRINT 'Hello World'; \ + END\ + "; + let _ = ms().verified_stmt(while_begin_end_multiple_statements); } #[test] @@ -1359,7 +1676,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - ms().verified_stmt(&format!("USE {}", object_name)), + ms().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -1367,7 +1684,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - ms().verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), + ms().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -1556,7 +1873,6 @@ fn parse_create_table_with_valid_options() { span: Span::empty(), }, data_type: Int(None,), - options: vec![], }, ColumnDef { @@ -1566,7 +1882,6 @@ fn parse_create_table_with_valid_options() { span: Span::empty(), }, data_type: Int(None,), - options: vec![], }, ], @@ -1578,19 +1893,13 @@ fn parse_create_table_with_valid_options() { storage: None, location: None, },), - table_properties: vec![], - with_options, file_format: None, location: None, query: None, without_rowid: false, like: None, clone: None, - engine: None, comment: None, - auto_increment_offset: None, - default_charset: None, - collation: None, on_commit: None, on_cluster: None, primary_key: None, @@ -1598,7 +1907,7 @@ fn parse_create_table_with_valid_options() { partition_by: None, cluster_by: None, clustered_by: None, - options: None, + inherits: None, strict: false, iceberg: false, copy_grants: false, @@ -1615,11 +1924,28 @@ fn parse_create_table_with_valid_options() { catalog: None, catalog_sync: None, storage_serialization_policy: None, + table_options: CreateTableOptions::With(with_options) }) ); } } +#[test] +fn parse_nested_slash_star_comment() { + let sql = r#" + select + /* + comment level 1 + /* + comment level 2 + */ + */ + 1; + "#; + let canonical = "SELECT 1"; + ms().one_statement_parses_to(sql, canonical); +} + #[test] fn parse_create_table_with_invalid_options() { let invalid_cases = vec![ @@ -1732,19 +2058,13 @@ fn parse_create_table_with_identity_column() { storage: None, location: None, },), - table_properties: vec![], - with_options: vec![], file_format: None, location: None, query: None, without_rowid: false, like: None, clone: None, - engine: None, comment: None, - auto_increment_offset: None, - default_charset: None, - collation: None, on_commit: None, on_cluster: None, primary_key: None, @@ -1752,7 +2072,7 @@ fn parse_create_table_with_identity_column() { partition_by: None, cluster_by: None, clustered_by: None, - options: None, + inherits: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -1768,6 +2088,7 @@ fn parse_create_table_with_identity_column() { catalog: None, catalog_sync: None, storage_serialization_policy: None, + table_options: CreateTableOptions::None }), ); } @@ -1846,6 +2167,104 @@ fn parse_mssql_set_session_value() { ms().verified_stmt("SET ANSI_NULLS, ANSI_PADDING ON"); } +#[test] +fn parse_mssql_if_else() { + // Simple statements and blocks + ms().verified_stmt("IF 1 = 1 SELECT '1'; ELSE SELECT '2';"); + ms().verified_stmt("IF 1 = 1 BEGIN SET @A = 1; END ELSE SET @A = 2;"); + ms().verified_stmt( + "IF DATENAME(weekday, GETDATE()) IN (N'Saturday', N'Sunday') SELECT 'Weekend'; ELSE SELECT 'Weekday';" + ); + ms().verified_stmt( + "IF (SELECT COUNT(*) FROM a.b WHERE c LIKE 'x%') > 1 SELECT 'yes'; ELSE SELECT 'No';", + ); + + // Multiple statements + let stmts = ms() + .parse_sql_statements("DECLARE @A INT; IF 1=1 BEGIN SET @A = 1 END ELSE SET @A = 2") + .unwrap(); + match &stmts[..] { + [Statement::Declare { .. }, Statement::If(stmt)] => { + assert_eq!( + stmt.to_string(), + "IF 1 = 1 BEGIN SET @A = 1; END ELSE SET @A = 2;" + ); + } + _ => panic!("Unexpected statements: {stmts:?}"), + } +} + +#[test] +fn test_mssql_if_else_span() { + let sql = "IF 1 = 1 SELECT '1' ELSE SELECT '2'"; + let mut parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); + assert_eq!( + parser.parse_statement().unwrap().span(), + Span::new(Location::new(1, 1), Location::new(1, sql.len() as u64 + 1)) + ); +} + +#[test] +fn test_mssql_if_else_multiline_span() { + let sql_line1 = "IF 1 = 1"; + let sql_line2 = "SELECT '1'"; + let sql_line3 = "ELSE SELECT '2'"; + let sql = [sql_line1, sql_line2, sql_line3].join("\n"); + let mut parser = Parser::new(&MsSqlDialect {}).try_with_sql(&sql).unwrap(); + assert_eq!( + parser.parse_statement().unwrap().span(), + Span::new( + Location::new(1, 1), + Location::new(3, sql_line3.len() as u64 + 1) + ) + ); +} + +#[test] +fn test_mssql_if_statements_span() { + // Simple statements + let mut sql = "IF 1 = 1 SELECT '1' ELSE SELECT '2'"; + let mut parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); + match parser.parse_statement().unwrap() { + Statement::If(IfStatement { + if_block, + else_block: Some(else_block), + .. + }) => { + assert_eq!( + if_block.span(), + Span::new(Location::new(1, 1), Location::new(1, 20)) + ); + assert_eq!( + else_block.span(), + Span::new(Location::new(1, 21), Location::new(1, 36)) + ); + } + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Blocks + sql = "IF 1 = 1 BEGIN SET @A = 1; END ELSE BEGIN SET @A = 2 END"; + parser = Parser::new(&MsSqlDialect {}).try_with_sql(sql).unwrap(); + match parser.parse_statement().unwrap() { + Statement::If(IfStatement { + if_block, + else_block: Some(else_block), + .. + }) => { + assert_eq!( + if_block.span(), + Span::new(Location::new(1, 1), Location::new(1, 31)) + ); + assert_eq!( + else_block.span(), + Span::new(Location::new(1, 32), Location::new(1, 57)) + ); + } + stmt => panic!("Unexpected statement: {stmt:?}"), + } +} + #[test] fn parse_mssql_varbinary_max_length() { let sql = "CREATE TABLE example (var_binary_col VARBINARY(MAX))"; @@ -1899,9 +2318,192 @@ fn parse_mssql_varbinary_max_length() { } } +#[test] +fn parse_mssql_table_identifier_with_default_schema() { + ms().verified_stmt("SELECT * FROM mydatabase..MyTable"); +} + fn ms() -> TestedDialects { TestedDialects::new(vec![Box::new(MsSqlDialect {})]) } + +// MS SQL dialect with support for optional semi-colon statement delimiters +fn tsql() -> TestedDialects { + TestedDialects::new_with_options( + vec![Box::new(MsSqlDialect {})], + ParserOptions { + trailing_commas: false, + unescape: true, + require_semicolon_stmt_delimiter: false, + }, + ) +} + fn ms_and_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(MsSqlDialect {}), Box::new(GenericDialect {})]) } + +#[test] +fn parse_mssql_merge_with_output() { + let stmt = "MERGE dso.products AS t \ + USING dsi.products AS \ + s ON s.ProductID = t.ProductID \ + WHEN MATCHED AND \ + NOT (t.ProductName = s.ProductName OR (ISNULL(t.ProductName, s.ProductName) IS NULL)) \ + THEN UPDATE SET t.ProductName = s.ProductName \ + WHEN NOT MATCHED BY TARGET \ + THEN INSERT (ProductID, ProductName) \ + VALUES (s.ProductID, s.ProductName) \ + WHEN NOT MATCHED BY SOURCE THEN DELETE \ + OUTPUT $action, deleted.ProductID INTO dsi.temp_products"; + ms_and_generic().verified_stmt(stmt); +} + +#[test] +fn parse_create_trigger() { + let create_trigger = "\ + CREATE OR ALTER TRIGGER reminder1 \ + ON Sales.Customer \ + AFTER INSERT, UPDATE \ + AS RAISERROR('Notify Customer Relations', 16, 10);\ + "; + let create_stmt = ms().verified_stmt(create_trigger); + assert_eq!( + create_stmt, + Statement::CreateTrigger { + or_alter: true, + or_replace: false, + is_constraint: false, + name: ObjectName::from(vec![Ident::new("reminder1")]), + period: TriggerPeriod::After, + events: vec![TriggerEvent::Insert, TriggerEvent::Update(vec![]),], + table_name: ObjectName::from(vec![Ident::new("Sales"), Ident::new("Customer")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Statement, + include_each: false, + condition: None, + exec_body: None, + statements: Some(ConditionalStatements::Sequence { + statements: vec![Statement::RaisError { + message: Box::new(Expr::Value( + (Value::SingleQuotedString("Notify Customer Relations".to_string())) + .with_empty_span() + )), + severity: Box::new(Expr::Value( + (Value::Number("16".parse().unwrap(), false)).with_empty_span() + )), + state: Box::new(Expr::Value( + (Value::Number("10".parse().unwrap(), false)).with_empty_span() + )), + arguments: vec![], + options: vec![], + }], + }), + characteristics: None, + } + ); + + let multi_statement_as_trigger = "\ + CREATE TRIGGER some_trigger ON some_table FOR INSERT \ + AS \ + DECLARE @var INT; \ + RAISERROR('Trigger fired', 10, 1);\ + "; + let _ = ms().verified_stmt(multi_statement_as_trigger); + + let multi_statement_trigger = "\ + CREATE TRIGGER some_trigger ON some_table FOR INSERT \ + AS \ + BEGIN \ + DECLARE @var INT; \ + RAISERROR('Trigger fired', 10, 1); \ + END\ + "; + let _ = ms().verified_stmt(multi_statement_trigger); + + let create_trigger_with_return = "\ + CREATE TRIGGER some_trigger ON some_table FOR INSERT \ + AS \ + BEGIN \ + RETURN; \ + END\ + "; + let _ = ms().verified_stmt(create_trigger_with_return); + + let create_trigger_with_return = "\ + CREATE TRIGGER some_trigger ON some_table FOR INSERT \ + AS \ + BEGIN \ + RETURN; \ + END\ + "; + let _ = ms().verified_stmt(create_trigger_with_return); + + let create_trigger_with_conditional = "\ + CREATE TRIGGER some_trigger ON some_table FOR INSERT \ + AS \ + BEGIN \ + IF 1 = 2 \ + BEGIN \ + RAISERROR('Trigger fired', 10, 1); \ + END; \ + RETURN; \ + END\ + "; + let _ = ms().verified_stmt(create_trigger_with_conditional); +} + +#[test] +fn parse_drop_trigger() { + let sql_drop_trigger = "DROP TRIGGER emp_stamp;"; + let drop_stmt = ms().one_statement_parses_to(sql_drop_trigger, ""); + assert_eq!( + drop_stmt, + Statement::DropTrigger { + if_exists: false, + trigger_name: ObjectName::from(vec![Ident::new("emp_stamp")]), + table_name: None, + option: None, + } + ); +} + +#[test] +fn parse_print() { + let print_string_literal = "PRINT 'Hello, world!'"; + let print_stmt = ms().verified_stmt(print_string_literal); + assert_eq!( + print_stmt, + Statement::Print(PrintStatement { + message: Box::new(Expr::Value( + (Value::SingleQuotedString("Hello, world!".to_string())).with_empty_span() + )), + }) + ); + + let _ = ms().verified_stmt("PRINT N'Hello, ⛄️!'"); + let _ = ms().verified_stmt("PRINT @my_variable"); +} + +#[test] +fn parse_mssql_grant() { + ms().verified_stmt("GRANT SELECT ON my_table TO public, db_admin"); +} + +#[test] +fn parse_mssql_deny() { + ms().verified_stmt("DENY SELECT ON my_table TO public, db_admin"); +} + +#[test] +fn test_tsql_no_semicolon_delimiter() { + let sql = r#" +DECLARE @X AS NVARCHAR(MAX)='x' +DECLARE @Y AS NVARCHAR(MAX)='y' + "#; + + let stmts = tsql().parse_sql_statements(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(stmts.iter().all(|s| matches!(s, Statement::Declare { .. }))); +} diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 15f79b4c..9068ed9c 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -593,7 +593,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - mysql_and_generic().verified_stmt(&format!("USE {}", object_name)), + mysql_and_generic().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -601,8 +601,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - mysql_and_generic() - .verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), + mysql_and_generic().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -617,12 +616,12 @@ fn parse_set_variables() { mysql_and_generic().verified_stmt("SET sql_mode = CONCAT(@@sql_mode, ',STRICT_TRANS_TABLES')"); assert_eq!( mysql_and_generic().verified_stmt("SET LOCAL autocommit = 1"), - Statement::SetVariable { - local: true, + Statement::Set(Set::SingleAssignment { + scope: Some(ContextModifier::Local), hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec!["autocommit".into()])), - value: vec![Expr::value(number("1"))], - } + variable: ObjectName::from(vec!["autocommit".into()]), + values: vec![Expr::value(number("1"))], + }) ); } @@ -670,6 +669,20 @@ fn table_constraint_unique_primary_ctor( characteristics: Option, unique_index_type_display: Option, ) -> TableConstraint { + let columns = columns + .into_iter() + .map(|ident| IndexColumn { + column: OrderByExpr { + expr: Expr::Identifier(ident), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: None, + }) + .collect(); match unique_index_type_display { Some(index_type_display) => TableConstraint::Unique { name, @@ -795,6 +808,67 @@ fn parse_create_table_primary_and_unique_key_with_index_options() { } } +#[test] +fn parse_prefix_key_part() { + let expected = vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::value( + number("10"), + )))]; + for sql in [ + "CREATE INDEX idx_index ON t(textcol(10))", + "ALTER TABLE tab ADD INDEX idx_index (textcol(10))", + "ALTER TABLE tab ADD PRIMARY KEY (textcol(10))", + "ALTER TABLE tab ADD UNIQUE KEY (textcol(10))", + "ALTER TABLE tab ADD UNIQUE KEY (textcol(10))", + "ALTER TABLE tab ADD FULLTEXT INDEX (textcol(10))", + "CREATE TABLE t (textcol TEXT, INDEX idx_index (textcol(10)))", + ] { + match index_column(mysql_and_generic().verified_stmt(sql)) { + Expr::Function(Function { + name, + args: FunctionArguments::List(FunctionArgumentList { args, .. }), + .. + }) => { + assert_eq!(name.to_string(), "textcol"); + assert_eq!(args, expected); + } + expr => panic!("unexpected expression {expr} for {sql}"), + } + } +} + +#[test] +fn test_functional_key_part() { + assert_eq!( + index_column( + mysql_and_generic() + .verified_stmt("CREATE INDEX idx_index ON t((col COLLATE utf8mb4_bin) DESC)") + ), + Expr::Nested(Box::new(Expr::Collate { + expr: Box::new(Expr::Identifier("col".into())), + collation: ObjectName(vec![sqlparser::ast::ObjectNamePart::Identifier( + Ident::new("utf8mb4_bin") + )]), + })) + ); + assert_eq!( + index_column(mysql_and_generic().verified_stmt( + r#"CREATE TABLE t (jsoncol JSON, PRIMARY KEY ((CAST(col ->> '$.id' AS UNSIGNED)) ASC))"# + )), + Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col"))), + op: BinaryOperator::LongArrow, + right: Box::new(Expr::Value( + Value::SingleQuotedString("$.id".to_string()).with_empty_span() + )), + }), + data_type: DataType::Unsigned, + format: None, + })), + ); +} + #[test] fn parse_create_table_primary_and_unique_key_with_index_type() { let sqls = ["UNIQUE", "PRIMARY KEY"].map(|key_ty| { @@ -848,9 +922,23 @@ fn parse_create_table_comment() { for sql in [without_equal, with_equal] { match mysql().verified_stmt(sql) { - Statement::CreateTable(CreateTable { name, comment, .. }) => { + Statement::CreateTable(CreateTable { + name, + table_options, + .. + }) => { assert_eq!(name.to_string(), "foo"); - assert_eq!(comment.expect("Should exist").to_string(), "baz"); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + let comment = match plain_options.first().unwrap() { + SqlOption::Comment(CommentDef::WithEq(c)) + | SqlOption::Comment(CommentDef::WithoutEq(c)) => c, + _ => unreachable!(), + }; + assert_eq!(comment, "baz"); } _ => unreachable!(), } @@ -859,29 +947,226 @@ fn parse_create_table_comment() { #[test] fn parse_create_table_auto_increment_offset() { - let canonical = - "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT 123"; - let with_equal = - "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT=123"; + let sql = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE = InnoDB AUTO_INCREMENT = 123"; - for sql in [canonical, with_equal] { - match mysql().one_statement_parses_to(sql, canonical) { + match mysql().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + table_options, + .. + }) => { + assert_eq!(name.to_string(), "foo"); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("AUTO_INCREMENT"), + value: Expr::Value(test_utils::number("123").with_empty_span()) + })); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_multiple_options_order_independent() { + let sql1 = "CREATE TABLE mytable (id INT) ENGINE=InnoDB ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=8 COMMENT='abc'"; + let sql2 = "CREATE TABLE mytable (id INT) KEY_BLOCK_SIZE=8 COMMENT='abc' ENGINE=InnoDB ROW_FORMAT=DYNAMIC"; + let sql3 = "CREATE TABLE mytable (id INT) ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=8 COMMENT='abc' ENGINE=InnoDB"; + + for sql in [sql1, sql2, sql3] { + match mysql().parse_sql_statements(sql).unwrap().pop().unwrap() { Statement::CreateTable(CreateTable { name, - auto_increment_offset, + table_options, .. }) => { - assert_eq!(name.to_string(), "foo"); - assert_eq!( - auto_increment_offset.expect("Should exist").to_string(), - "123" - ); + assert_eq!(name.to_string(), "mytable"); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("InnoDB")), + values: vec![] + } + ))); + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("KEY_BLOCK_SIZE"), + value: Expr::Value(test_utils::number("8").with_empty_span()) + })); + + assert!(plain_options + .contains(&SqlOption::Comment(CommentDef::WithEq("abc".to_owned())))); + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("ROW_FORMAT"), + value: Expr::Identifier(Ident::new("DYNAMIC".to_owned())) + })); } _ => unreachable!(), } } } +#[test] +fn parse_create_table_with_all_table_options() { + let sql = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE = InnoDB AUTO_INCREMENT = 123 DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci INSERT_METHOD = FIRST KEY_BLOCK_SIZE = 8 ROW_FORMAT = DYNAMIC DATA DIRECTORY = '/var/lib/mysql/data' INDEX DIRECTORY = '/var/lib/mysql/index' PACK_KEYS = 1 STATS_AUTO_RECALC = 1 STATS_PERSISTENT = 0 STATS_SAMPLE_PAGES = 128 DELAY_KEY_WRITE = 1 COMPRESSION = 'ZLIB' ENCRYPTION = 'Y' MAX_ROWS = 10000 MIN_ROWS = 10 AUTOEXTEND_SIZE = 64 AVG_ROW_LENGTH = 128 CHECKSUM = 1 CONNECTION = 'mysql://localhost' ENGINE_ATTRIBUTE = 'primary' PASSWORD = 'secure_password' SECONDARY_ENGINE_ATTRIBUTE = 'secondary_attr' START TRANSACTION TABLESPACE my_tablespace STORAGE DISK UNION = (table1, table2, table3)"; + + match mysql().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + table_options, + .. + }) => { + assert_eq!(name, vec![Ident::new("foo".to_owned())].into()); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("InnoDB")), + values: vec![] + } + ))); + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("COLLATE"), + value: Expr::Identifier(Ident::new("utf8mb4_0900_ai_ci".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("DEFAULT CHARSET"), + value: Expr::Identifier(Ident::new("utf8mb4".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("AUTO_INCREMENT"), + value: Expr::value(test_utils::number("123")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("KEY_BLOCK_SIZE"), + value: Expr::value(test_utils::number("8")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("ROW_FORMAT"), + value: Expr::Identifier(Ident::new("DYNAMIC".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("PACK_KEYS"), + value: Expr::value(test_utils::number("1")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("STATS_AUTO_RECALC"), + value: Expr::value(test_utils::number("1")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("STATS_PERSISTENT"), + value: Expr::value(test_utils::number("0")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("STATS_SAMPLE_PAGES"), + value: Expr::value(test_utils::number("128")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("STATS_SAMPLE_PAGES"), + value: Expr::value(test_utils::number("128")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("INSERT_METHOD"), + value: Expr::Identifier(Ident::new("FIRST".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("COMPRESSION"), + value: Expr::value(Value::SingleQuotedString("ZLIB".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("ENCRYPTION"), + value: Expr::value(Value::SingleQuotedString("Y".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("MAX_ROWS"), + value: Expr::value(test_utils::number("10000")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("MIN_ROWS"), + value: Expr::value(test_utils::number("10")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("AUTOEXTEND_SIZE"), + value: Expr::value(test_utils::number("64")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("AVG_ROW_LENGTH"), + value: Expr::value(test_utils::number("128")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("CHECKSUM"), + value: Expr::value(test_utils::number("1")) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("CONNECTION"), + value: Expr::value(Value::SingleQuotedString("mysql://localhost".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("ENGINE_ATTRIBUTE"), + value: Expr::value(Value::SingleQuotedString("primary".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("PASSWORD"), + value: Expr::value(Value::SingleQuotedString("secure_password".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("SECONDARY_ENGINE_ATTRIBUTE"), + value: Expr::value(Value::SingleQuotedString("secondary_attr".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::Ident(Ident::new( + "START TRANSACTION".to_owned() + )))); + assert!( + plain_options.contains(&SqlOption::TableSpace(TablespaceOption { + name: "my_tablespace".to_string(), + storage: Some(StorageType::Disk), + })) + ); + + assert!(plain_options.contains(&SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("UNION"), + name: None, + values: vec![ + Ident::new("table1".to_string()), + Ident::new("table2".to_string()), + Ident::new("table3".to_string()) + ] + } + ))); + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("DATA DIRECTORY"), + value: Expr::value(Value::SingleQuotedString("/var/lib/mysql/data".to_owned())) + })); + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("INDEX DIRECTORY"), + value: Expr::value(Value::SingleQuotedString("/var/lib/mysql/index".to_owned())) + })); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; @@ -916,13 +1201,12 @@ fn parse_create_table_set_enum() { #[test] fn parse_create_table_engine_default_charset() { - let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3"; + let sql = "CREATE TABLE foo (id INT(11)) ENGINE = InnoDB DEFAULT CHARSET = utf8mb3"; match mysql().verified_stmt(sql) { Statement::CreateTable(CreateTable { name, columns, - engine, - default_charset, + table_options, .. }) => { assert_eq!(name.to_string(), "foo"); @@ -934,14 +1218,24 @@ fn parse_create_table_engine_default_charset() { },], columns ); - assert_eq!( - engine, - Some(TableEngine { - name: "InnoDB".to_string(), - parameters: None - }) - ); - assert_eq!(default_charset, Some("utf8mb3".to_string())); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("DEFAULT CHARSET"), + value: Expr::Identifier(Ident::new("utf8mb3".to_owned())) + })); + + assert!(plain_options.contains(&SqlOption::NamedParenthesizedList( + NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("InnoDB")), + values: vec![] + } + ))); } _ => unreachable!(), } @@ -949,12 +1243,12 @@ fn parse_create_table_engine_default_charset() { #[test] fn parse_create_table_collate() { - let sql = "CREATE TABLE foo (id INT(11)) COLLATE=utf8mb4_0900_ai_ci"; + let sql = "CREATE TABLE foo (id INT(11)) COLLATE = utf8mb4_0900_ai_ci"; match mysql().verified_stmt(sql) { Statement::CreateTable(CreateTable { name, columns, - collation, + table_options, .. }) => { assert_eq!(name.to_string(), "foo"); @@ -966,7 +1260,16 @@ fn parse_create_table_collate() { },], columns ); - assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("COLLATE"), + value: Expr::Identifier(Ident::new("utf8mb4_0900_ai_ci".to_owned())) + })); } _ => unreachable!(), } @@ -974,16 +1277,26 @@ fn parse_create_table_collate() { #[test] fn parse_create_table_both_options_and_as_query() { - let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1"; + let sql = "CREATE TABLE foo (id INT(11)) ENGINE = InnoDB DEFAULT CHARSET = utf8mb3 COLLATE = utf8mb4_0900_ai_ci AS SELECT 1"; match mysql_and_generic().verified_stmt(sql) { Statement::CreateTable(CreateTable { name, - collation, query, + table_options, .. }) => { assert_eq!(name.to_string(), "foo"); - assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + + assert!(plain_options.contains(&SqlOption::KeyValue { + key: Ident::new("COLLATE"), + value: Expr::Identifier(Ident::new("utf8mb4_0900_ai_ci".to_owned())) + })); + assert_eq!( query.unwrap().body.as_select().unwrap().projection, vec![SelectItem::UnnamedExpr(Expr::Value( @@ -994,7 +1307,8 @@ fn parse_create_table_both_options_and_as_query() { _ => unreachable!(), } - let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3"; + let sql = + r"CREATE TABLE foo (id INT(11)) ENGINE = InnoDB AS SELECT 1 DEFAULT CHARSET = utf8mb3"; assert!(matches!( mysql_and_generic().parse_sql_statements(sql), Err(ParserError::ParserError(_)) @@ -1089,6 +1403,7 @@ fn parse_escaped_quote_identifiers_with_escape() { quote_style: Some('`'), span: Span::empty(), }))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1107,14 +1422,13 @@ fn parse_escaped_quote_identifiers_with_escape() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1128,6 +1442,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { ParserOptions { trailing_commas: false, unescape: false, + require_semicolon_stmt_delimiter: true, } ) .verified_stmt(sql), @@ -1143,6 +1458,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { quote_style: Some('`'), span: Span::empty(), }))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1161,14 +1477,13 @@ fn parse_escaped_quote_identifiers_with_no_escape() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1191,6 +1506,7 @@ fn parse_escaped_backticks_with_escape() { quote_style: Some('`'), span: Span::empty(), }))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1209,14 +1525,13 @@ fn parse_escaped_backticks_with_escape() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1243,6 +1558,7 @@ fn parse_escaped_backticks_with_no_escape() { quote_style: Some('`'), span: Span::empty(), }))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1261,14 +1577,13 @@ fn parse_escaped_backticks_with_no_escape() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ); } @@ -1438,14 +1753,13 @@ fn parse_simple_insert() { ] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1488,14 +1802,13 @@ fn parse_ignore_insert() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1538,14 +1851,13 @@ fn parse_priority_insert() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1585,14 +1897,13 @@ fn parse_priority_insert() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1634,14 +1945,13 @@ fn parse_insert_as() { )]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1698,14 +2008,13 @@ fn parse_insert_as() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1749,14 +2058,13 @@ fn parse_replace_insert() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1791,14 +2099,13 @@ fn parse_empty_row_insert() { rows: vec![vec![], vec![]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1857,14 +2164,13 @@ fn parse_insert_with_on_duplicate_update() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), source ); @@ -1924,6 +2230,7 @@ fn parse_select_with_numeric_prefix_column_name() { projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( "123col_$@123abc" )))], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::with_quote( @@ -1952,6 +2259,128 @@ fn parse_select_with_numeric_prefix_column_name() { } } +#[test] +fn parse_qualified_identifiers_with_numeric_prefix() { + // Case 1: Qualified column name that starts with digits. + match mysql().verified_stmt("SELECT t.15to29 FROM my_table AS t") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => { + assert_eq!(&[Ident::new("t"), Ident::new("15to29")], &parts[..]); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 2: Qualified column name that starts with digits and on its own represents a number. + match mysql().verified_stmt("SELECT t.15e29 FROM my_table AS t") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => { + assert_eq!(&[Ident::new("t"), Ident::new("15e29")], &parts[..]); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 3: Unqualified, the same token is parsed as a number. + match mysql() + .parse_sql_statements("SELECT 15e29 FROM my_table") + .unwrap() + .pop() + { + Some(Statement::Query(q)) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan { value, .. }))) => { + assert_eq!(&number("15e29"), value); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 4: Quoted simple identifier. + match mysql().verified_stmt("SELECT `15e29` FROM my_table") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::Identifier(name))) => { + assert_eq!(&Ident::with_quote('`', "15e29"), name); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 5: Quoted compound identifier. + match mysql().verified_stmt("SELECT t.`15e29` FROM my_table AS t") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => { + assert_eq!( + &[Ident::new("t"), Ident::with_quote('`', "15e29")], + &parts[..] + ); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 6: Multi-level compound identifiers. + match mysql().verified_stmt("SELECT 1db.1table.1column") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => { + assert_eq!( + &[ + Ident::new("1db"), + Ident::new("1table"), + Ident::new("1column") + ], + &parts[..] + ); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } + + // Case 7: Multi-level compound quoted identifiers. + match mysql().verified_stmt("SELECT `1`.`2`.`3`") { + Statement::Query(q) => match *q.body { + SetExpr::Select(s) => match s.projection.last() { + Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => { + assert_eq!( + &[ + Ident::with_quote('`', "1"), + Ident::with_quote('`', "2"), + Ident::with_quote('`', "3") + ], + &parts[..] + ); + } + proj => panic!("Unexpected projection: {proj:?}"), + }, + body => panic!("Unexpected statement body: {body:?}"), + }, + stmt => panic!("Unexpected statement: {stmt:?}"), + } +} + // Don't run with bigdecimal as it fails like this on rust beta: // // 'parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column' @@ -1969,7 +2398,6 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - distinct: None, top: None, top_before_distinct: false, @@ -1977,6 +2405,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { SelectItem::UnnamedExpr(Expr::value(number("123e4"))), SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("123col_$@123abc"))) ], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::with_quote( @@ -2158,11 +2587,13 @@ fn parse_alter_table_add_column() { if_exists, only, operations, + iceberg, location: _, on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); + assert!(!iceberg); assert!(!only); assert_eq!( operations, @@ -2187,8 +2618,7 @@ fn parse_alter_table_add_column() { if_exists, only, operations, - location: _, - on_cluster: _, + .. } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2225,8 +2655,7 @@ fn parse_alter_table_add_columns() { if_exists, only, operations, - location: _, - on_cluster: _, + .. } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2273,6 +2702,16 @@ fn parse_alter_table_drop_primary_key() { ); } +#[test] +fn parse_alter_table_drop_foreign_key() { + assert_matches!( + alter_table_op( + mysql_and_generic().verified_stmt("ALTER TABLE tab DROP FOREIGN KEY foo_ibfk_1") + ), + AlterTableOperation::DropForeignKey { name } if name.value == "foo_ibfk_1" + ); +} + #[test] fn parse_alter_table_change_column() { let expected_name = ObjectName::from(vec![Ident::new("orders")]); @@ -2442,7 +2881,8 @@ fn parse_alter_table_with_algorithm() { operations, vec![ AlterTableOperation::DropColumn { - column_name: Ident::new("password_digest"), + has_column_keyword: true, + column_names: vec![Ident::new("password_digest")], if_exists: false, drop_behavior: None, }, @@ -2470,6 +2910,53 @@ fn parse_alter_table_with_algorithm() { mysql_and_generic().verified_stmt("ALTER TABLE `users` ALGORITHM = COPY"); } +#[test] +fn parse_alter_table_with_lock() { + let sql = "ALTER TABLE tab LOCK = SHARED"; + let expected_operation = AlterTableOperation::Lock { + equals: true, + lock: AlterTableLock::Shared, + }; + let operation = alter_table_op(mysql_and_generic().verified_stmt(sql)); + assert_eq!(expected_operation, operation); + + let sql = + "ALTER TABLE users DROP COLUMN password_digest, LOCK = EXCLUSIVE, RENAME COLUMN name TO username"; + let stmt = mysql_and_generic().verified_stmt(sql); + match stmt { + Statement::AlterTable { operations, .. } => { + assert_eq!( + operations, + vec![ + AlterTableOperation::DropColumn { + has_column_keyword: true, + column_names: vec![Ident::new("password_digest")], + if_exists: false, + drop_behavior: None, + }, + AlterTableOperation::Lock { + equals: true, + lock: AlterTableLock::Exclusive, + }, + AlterTableOperation::RenameColumn { + old_column_name: Ident::new("name"), + new_column_name: Ident::new("username") + }, + ] + ) + } + _ => panic!("Unexpected statement {stmt}"), + } + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK DEFAULT"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK SHARED"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK NONE"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK EXCLUSIVE"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK = DEFAULT"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK = SHARED"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK = NONE"); + mysql_and_generic().verified_stmt("ALTER TABLE `users` LOCK = EXCLUSIVE"); +} + #[test] fn parse_alter_table_auto_increment() { let sql = "ALTER TABLE tab AUTO_INCREMENT = 42"; @@ -2560,7 +3047,9 @@ fn parse_substring_in_select() { (number("1")).with_empty_span() ))), special: true, + shorthand: false, })], + exclude: None, into: None, from: vec![TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident { @@ -2586,14 +3075,13 @@ fn parse_substring_in_select() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], }), query ); @@ -2627,6 +3115,17 @@ fn parse_rlike_and_regexp() { } } +#[test] +fn parse_like_with_escape() { + // verify backslash is not stripped for escaped wildcards + mysql().verified_only_select(r#"SELECT 'a\%c' LIKE 'a\%c'"#); + mysql().verified_only_select(r#"SELECT 'a\_c' LIKE 'a\_c'"#); + mysql().verified_only_select(r#"SELECT '%\_\%' LIKE '%\_\%'"#); + mysql().verified_only_select(r#"SELECT '\_\%' LIKE CONCAT('\_', '\%')"#); + mysql().verified_only_select(r#"SELECT 'a%c' LIKE 'a$%c' ESCAPE '$'"#); + mysql().verified_only_select(r#"SELECT 'a_c' LIKE 'a#_c' ESCAPE '#'"#); +} + #[test] fn parse_kill() { let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); @@ -2684,19 +3183,19 @@ fn parse_set_names() { let stmt = mysql_and_generic().verified_stmt("SET NAMES utf8mb4"); assert_eq!( stmt, - Statement::SetNames { - charset_name: "utf8mb4".to_string(), + Statement::Set(Set::SetNames { + charset_name: "utf8mb4".into(), collation_name: None, - } + }) ); let stmt = mysql_and_generic().verified_stmt("SET NAMES utf8mb4 COLLATE bogus"); assert_eq!( stmt, - Statement::SetNames { - charset_name: "utf8mb4".to_string(), + Statement::Set(Set::SetNames { + charset_name: "utf8mb4".into(), collation_name: Some("bogus".to_string()), - } + }) ); let stmt = mysql_and_generic() @@ -2704,22 +3203,20 @@ fn parse_set_names() { .unwrap(); assert_eq!( stmt, - vec![Statement::SetNames { - charset_name: "utf8mb4".to_string(), + vec![Statement::Set(Set::SetNames { + charset_name: "utf8mb4".into(), collation_name: Some("bogus".to_string()), - }] + })] ); let stmt = mysql_and_generic().verified_stmt("SET NAMES DEFAULT"); - assert_eq!(stmt, Statement::SetNamesDefault {}); + assert_eq!(stmt, Statement::Set(Set::SetNamesDefault {})); } #[test] fn parse_limit_my_sql_syntax() { - mysql_and_generic().one_statement_parses_to( - "SELECT id, fname, lname FROM customer LIMIT 5, 10", - "SELECT id, fname, lname FROM customer LIMIT 10 OFFSET 5", - ); + mysql_and_generic().verified_stmt("SELECT id, fname, lname FROM customer LIMIT 10 OFFSET 5"); + mysql_and_generic().verified_stmt("SELECT id, fname, lname FROM customer LIMIT 5, 10"); mysql_and_generic().verified_stmt("SELECT * FROM user LIMIT ? OFFSET ?"); } @@ -2860,10 +3357,14 @@ fn parse_hex_string_introducer() { distinct: None, top: None, top_before_distinct: false, - projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString { - introducer: "_latin1".to_string(), - value: Value::HexStringLiteral("4D7953514C".to_string()) + projection: vec![SelectItem::UnnamedExpr(Expr::Prefixed { + prefix: Ident::from("_latin1"), + value: Expr::Value( + Value::HexStringLiteral("4D7953514C".to_string()).with_empty_span() + ) + .into(), })], + exclude: None, from: vec![], lateral_views: vec![], prewhere: None, @@ -2882,14 +3383,13 @@ fn parse_hex_string_introducer() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })) ) } @@ -3122,7 +3622,9 @@ fn parse_grant() { objects, grantees, with_grant_option, + as_grantor: _, granted_by, + current_grants: _, } = stmt { assert_eq!( @@ -3328,6 +3830,11 @@ fn parse_begin_without_transaction() { mysql().verified_stmt("BEGIN"); } +#[test] +fn parse_geometric_types_srid_option() { + mysql_and_generic().verified_stmt("CREATE TABLE t (a geometry SRID 4326)"); +} + #[test] fn parse_double_precision() { mysql().verified_stmt("CREATE TABLE foo (bar DOUBLE)"); @@ -3363,6 +3870,7 @@ fn parse_create_trigger() { assert_eq!( create_stmt, Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("emp_stamp")]), @@ -3374,13 +3882,14 @@ fn parse_create_trigger() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("emp_stamp")]), args: None, } - }, + }), + statements: None, characteristics: None, } ); @@ -3418,3 +3927,219 @@ fn parse_cast_integers() { .run_parser_method("CAST(foo AS UNSIGNED INTEGER(3))", |p| p.parse_expr()) .expect_err("CAST doesn't allow display width"); } + +#[test] +fn parse_match_against_with_alias() { + let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)"; + match mysql().verified_stmt(sql) { + Statement::Query(query) => match *query.body { + SetExpr::Select(select) => match select.selection { + Some(Expr::MatchAgainst { + columns, + match_value, + opt_search_modifier, + }) => { + assert_eq!( + columns, + vec![ObjectName::from(vec![ + Ident::new("tbl"), + Ident::new("ReferenceID") + ])] + ); + assert_eq!(match_value, Value::SingleQuotedString("AAA".to_owned())); + assert_eq!(opt_search_modifier, Some(SearchModifier::InBooleanMode)); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }, + _ => unreachable!(), + } +} + +#[test] +fn test_variable_assignment_using_colon_equal() { + let sql_select = "SELECT @price := price, @tax := price * 0.1 FROM products WHERE id = 1"; + let stmt = mysql().verified_stmt(sql_select); + match stmt { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + + assert_eq!( + select.projection, + vec![ + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "@price".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Assignment, + right: Box::new(Expr::Identifier(Ident { + value: "price".to_string(), + quote_style: None, + span: Span::empty(), + })), + }), + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "@tax".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Assignment, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "price".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value( + (test_utils::number("0.1")).with_empty_span() + )), + }), + }), + ] + ); + + assert_eq!( + select.selection, + Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "id".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value((test_utils::number("1")).with_empty_span())), + }) + ); + } + _ => panic!("Unexpected statement {stmt}"), + } + + let sql_update = + "UPDATE products SET price = @new_price := price * 1.1 WHERE category = 'Books'"; + let stmt = mysql().verified_stmt(sql_update); + + match stmt { + Statement::Update { assignments, .. } => { + assert_eq!( + assignments, + vec![Assignment { + target: AssignmentTarget::ColumnName(ObjectName(vec![ + ObjectNamePart::Identifier(Ident { + value: "price".to_string(), + quote_style: None, + span: Span::empty(), + }) + ])), + value: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "@new_price".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Assignment, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "price".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value( + (test_utils::number("1.1")).with_empty_span() + )), + }), + }, + }] + ) + } + _ => panic!("Unexpected statement {stmt}"), + } +} + +#[test] +fn parse_straight_join() { + mysql().verified_stmt( + "SELECT a.*, b.* FROM table_a AS a STRAIGHT_JOIN table_b AS b ON a.b_id = b.id", + ); + // Without table alias + mysql() + .verified_stmt("SELECT a.*, b.* FROM table_a STRAIGHT_JOIN table_b AS b ON a.b_id = b.id"); +} + +#[test] +fn mysql_foreign_key_with_index_name() { + mysql().verified_stmt( + "CREATE TABLE orders (customer_id INT, INDEX idx_customer (customer_id), CONSTRAINT fk_customer FOREIGN KEY idx_customer (customer_id) REFERENCES customers(id))", + ); +} + +#[test] +fn parse_drop_index() { + let sql = "DROP INDEX idx_name ON table_name"; + match mysql().verified_stmt(sql) { + Statement::Drop { + object_type, + if_exists, + names, + cascade, + restrict, + purge, + temporary, + table, + } => { + assert!(!if_exists); + assert_eq!(ObjectType::Index, object_type); + assert_eq!( + vec!["idx_name"], + names.iter().map(ToString::to_string).collect::>() + ); + assert!(!cascade); + assert!(!restrict); + assert!(!purge); + assert!(!temporary); + assert!(table.is_some()); + assert_eq!("table_name", table.unwrap().to_string()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_table_drop_index() { + assert_matches!( + alter_table_op( + mysql_and_generic().verified_stmt("ALTER TABLE tab DROP INDEX idx_index") + ), + AlterTableOperation::DropIndex { name } if name.value == "idx_index" + ); +} + +#[test] +fn parse_json_member_of() { + mysql().verified_stmt(r#"SELECT 17 MEMBER OF('[23, "abc", 17, "ab", 10]')"#); + let sql = r#"SELECT 'ab' MEMBER OF('[23, "abc", 17, "ab", 10]')"#; + let stmt = mysql().verified_stmt(sql); + match stmt { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + assert_eq!( + select.projection, + vec![SelectItem::UnnamedExpr(Expr::MemberOf(MemberOf { + value: Box::new(Expr::Value( + Value::SingleQuotedString("ab".to_string()).into() + )), + array: Box::new(Expr::Value( + Value::SingleQuotedString(r#"[23, "abc", 17, "ab", 10]"#.to_string()) + .into() + )), + }))] + ); + } + _ => panic!("Unexpected statement {stmt}"), + } +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7508218f..0d1d138c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -21,6 +21,7 @@ #[macro_use] mod test_utils; + use helpers::attached_token::AttachedToken; use sqlparser::tokenizer::Span; use test_utils::*; @@ -348,7 +349,7 @@ fn parse_create_table_with_defaults() { name, columns, constraints, - with_options, + table_options, if_not_exists: false, external: false, file_format: None, @@ -485,6 +486,11 @@ fn parse_create_table_with_defaults() { ] ); assert!(constraints.is_empty()); + + let with_options = match table_options { + CreateTableOptions::With(options) => options, + _ => unreachable!(), + }; assert_eq!( with_options, vec![ @@ -600,9 +606,10 @@ fn parse_alter_table_constraints_unique_nulls_distinct() { .verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE NULLS NOT DISTINCT (c)") { Statement::AlterTable { operations, .. } => match &operations[0] { - AlterTableOperation::AddConstraint(TableConstraint::Unique { - nulls_distinct, .. - }) => { + AlterTableOperation::AddConstraint { + constraint: TableConstraint::Unique { nulls_distinct, .. }, + .. + } => { assert_eq!(nulls_distinct, &NullsDistinctOption::NotDistinct) } _ => unreachable!(), @@ -758,10 +765,7 @@ fn parse_drop_extension() { #[test] fn parse_alter_table_alter_column() { - pg().one_statement_parses_to( - "ALTER TABLE tab ALTER COLUMN is_active TYPE TEXT USING 'text'", - "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'", - ); + pg().verified_stmt("ALTER TABLE tab ALTER COLUMN is_active TYPE TEXT USING 'text'"); match alter_table_op( pg().verified_stmt( @@ -777,6 +781,7 @@ fn parse_alter_table_alter_column() { AlterColumnOperation::SetDataType { data_type: DataType::Text, using: Some(using_expr), + had_set: true, } ); } @@ -828,8 +833,7 @@ fn parse_alter_table_add_columns() { if_exists, only, operations, - location: _, - on_cluster: _, + .. } => { assert_eq!(name.to_string(), "tab"); assert!(if_exists); @@ -909,8 +913,7 @@ fn parse_alter_table_owner_to() { if_exists: _, only: _, operations, - location: _, - on_cluster: _, + .. } => { assert_eq!(name.to_string(), "tab"); assert_eq!( @@ -988,6 +991,7 @@ fn parse_create_schema_if_not_exists() { Statement::CreateSchema { if_not_exists: true, schema_name, + .. } => assert_eq!("schema_name", schema_name.to_string()), _ => unreachable!(), } @@ -1301,6 +1305,7 @@ fn parse_copy_to() { }, } ], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -1319,14 +1324,13 @@ fn parse_copy_to() { flavor: SelectFlavor::Standard, }))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), to: true, target: CopyTarget::File { @@ -1432,81 +1436,77 @@ fn parse_set() { let stmt = pg_and_generic().verified_stmt("SET a = b"); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("a")])), - value: vec![Expr::Identifier(Ident { + variable: ObjectName::from(vec![Ident::new("a")]), + values: vec![Expr::Identifier(Ident { value: "b".into(), quote_style: None, span: Span::empty(), })], - } + }) ); let stmt = pg_and_generic().verified_stmt("SET a = 'b'"); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("a")])), - value: vec![Expr::Value( + variable: ObjectName::from(vec![Ident::new("a")]), + values: vec![Expr::Value( (Value::SingleQuotedString("b".into())).with_empty_span() )], - } + }) ); let stmt = pg_and_generic().verified_stmt("SET a = 0"); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("a")])), - value: vec![Expr::value(number("0"))], - } + variable: ObjectName::from(vec![Ident::new("a")]), + values: vec![Expr::value(number("0"))], + }) ); let stmt = pg_and_generic().verified_stmt("SET a = DEFAULT"); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("a")])), - value: vec![Expr::Identifier(Ident::new("DEFAULT"))], - } + variable: ObjectName::from(vec![Ident::new("a")]), + values: vec![Expr::Identifier(Ident::new("DEFAULT"))], + }) ); let stmt = pg_and_generic().verified_stmt("SET LOCAL a = b"); assert_eq!( stmt, - Statement::SetVariable { - local: true, + Statement::Set(Set::SingleAssignment { + scope: Some(ContextModifier::Local), hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![Ident::new("a")])), - value: vec![Expr::Identifier("b".into())], - } + variable: ObjectName::from(vec![Ident::new("a")]), + values: vec![Expr::Identifier("b".into())], + }) ); let stmt = pg_and_generic().verified_stmt("SET a.b.c = b"); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![ - Ident::new("a"), - Ident::new("b"), - Ident::new("c") - ])), - value: vec![Expr::Identifier(Ident { + variable: ObjectName::from(vec![Ident::new("a"), Ident::new("b"), Ident::new("c")]), + values: vec![Expr::Identifier(Ident { value: "b".into(), quote_style: None, span: Span::empty(), })], - } + }) ); let stmt = pg_and_generic().one_statement_parses_to( @@ -1515,22 +1515,21 @@ fn parse_set() { ); assert_eq!( stmt, - Statement::SetVariable { - local: false, + Statement::Set(Set::SingleAssignment { + scope: None, hivevar: false, - variables: OneOrManyWithParens::One(ObjectName::from(vec![ + variable: ObjectName::from(vec![ Ident::new("hive"), Ident::new("tez"), Ident::new("auto"), Ident::new("reducer"), Ident::new("parallelism") - ])), - value: vec![Expr::Value((Value::Boolean(false)).with_empty_span())], - } + ]), + values: vec![Expr::Value((Value::Boolean(false)).with_empty_span())], + }) ); pg_and_generic().one_statement_parses_to("SET a TO b", "SET a = b"); - pg_and_generic().one_statement_parses_to("SET SESSION a = b", "SET a = b"); assert_eq!( pg_and_generic().parse_sql_statements("SET"), @@ -1560,10 +1559,10 @@ fn parse_set_role() { let stmt = pg_and_generic().verified_stmt(query); assert_eq!( stmt, - Statement::SetRole { - context_modifier: ContextModifier::Session, + Statement::Set(Set::SetRole { + context_modifier: Some(ContextModifier::Session), role_name: None, - } + }) ); assert_eq!(query, stmt.to_string()); @@ -1571,14 +1570,14 @@ fn parse_set_role() { let stmt = pg_and_generic().verified_stmt(query); assert_eq!( stmt, - Statement::SetRole { - context_modifier: ContextModifier::Local, + Statement::Set(Set::SetRole { + context_modifier: Some(ContextModifier::Local), role_name: Some(Ident { value: "rolename".to_string(), quote_style: Some('\"'), span: Span::empty(), }), - } + }) ); assert_eq!(query, stmt.to_string()); @@ -1586,14 +1585,14 @@ fn parse_set_role() { let stmt = pg_and_generic().verified_stmt(query); assert_eq!( stmt, - Statement::SetRole { - context_modifier: ContextModifier::None, + Statement::Set(Set::SetRole { + context_modifier: None, role_name: Some(Ident { value: "rolename".to_string(), quote_style: Some('\''), span: Span::empty(), }), - } + }) ); assert_eq!(query, stmt.to_string()); } @@ -2509,6 +2508,271 @@ fn parse_create_anonymous_index() { } } +#[test] +/// Test to verify the correctness of parsing the `CREATE INDEX` statement with optional operator classes. +/// +/// # Implementative details +/// +/// At this time, since the parser library is not intended to take care of the semantics of the SQL statements, +/// there is no way to verify the correctness of the operator classes, nor whether they are valid for the given +/// index type. This test is only intended to verify that the parser can correctly parse the statement. For this +/// reason, the test includes a `totally_not_valid` operator class. +fn parse_create_indices_with_operator_classes() { + let indices = [ + IndexType::GIN, + IndexType::GiST, + IndexType::SPGiST, + IndexType::Custom("CustomIndexType".into()), + ]; + let operator_classes: [Option; 4] = [ + None, + Some("gin_trgm_ops".into()), + Some("gist_trgm_ops".into()), + Some("totally_not_valid".into()), + ]; + + for expected_index_type in indices { + for expected_operator_class in &operator_classes { + let single_column_sql_statement = format!( + "CREATE INDEX the_index_name ON users USING {expected_index_type} (concat_users_name(first_name, last_name){})", + expected_operator_class.as_ref().map(|oc| format!(" {oc}")) + .unwrap_or_default() + ); + let multi_column_sql_statement = format!( + "CREATE INDEX the_index_name ON users USING {expected_index_type} (column_name,concat_users_name(first_name, last_name){})", + expected_operator_class.as_ref().map(|oc| format!(" {oc}")) + .unwrap_or_default() + ); + + let expected_function_column = IndexColumn { + column: OrderByExpr { + expr: Expr::Function(Function { + name: ObjectName(vec![ObjectNamePart::Identifier(Ident { + value: "concat_users_name".to_owned(), + quote_style: None, + span: Span::empty(), + })]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident { + value: "first_name".to_owned(), + quote_style: None, + span: Span::empty(), + }, + ))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident { + value: "last_name".to_owned(), + quote_style: None, + span: Span::empty(), + }, + ))), + ], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: expected_operator_class.clone(), + }; + + match pg().verified_stmt(&single_column_sql_statement) { + Statement::CreateIndex(CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using: Some(using), + columns, + unique: false, + concurrently: false, + if_not_exists: false, + include, + nulls_distinct: None, + with, + predicate: None, + }) => { + assert_eq_vec(&["the_index_name"], &name); + assert_eq_vec(&["users"], &table_name); + assert_eq!(expected_index_type, using); + assert_eq!(expected_function_column, columns[0],); + assert!(include.is_empty()); + assert!(with.is_empty()); + } + _ => unreachable!(), + } + + match pg().verified_stmt(&multi_column_sql_statement) { + Statement::CreateIndex(CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using: Some(using), + columns, + unique: false, + concurrently: false, + if_not_exists: false, + include, + nulls_distinct: None, + with, + predicate: None, + }) => { + assert_eq_vec(&["the_index_name"], &name); + assert_eq_vec(&["users"], &table_name); + assert_eq!(expected_index_type, using); + assert_eq!( + IndexColumn { + column: OrderByExpr { + expr: Expr::Identifier(Ident { + value: "column_name".to_owned(), + quote_style: None, + span: Span::empty() + }), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: None + }, + columns[0], + ); + assert_eq!(expected_function_column, columns[1],); + assert!(include.is_empty()); + assert!(with.is_empty()); + } + _ => unreachable!(), + } + } + } +} + +#[test] +fn parse_create_bloom() { + let sql = + "CREATE INDEX bloomidx ON tbloom USING BLOOM (i1,i2,i3) WITH (length = 80, col1 = 2, col2 = 2, col3 = 4)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using: Some(using), + columns, + unique: false, + concurrently: false, + if_not_exists: false, + include, + nulls_distinct: None, + with, + predicate: None, + }) => { + assert_eq_vec(&["bloomidx"], &name); + assert_eq_vec(&["tbloom"], &table_name); + assert_eq!(IndexType::Bloom, using); + assert_eq_vec(&["i1", "i2", "i3"], &columns); + assert!(include.is_empty()); + assert_eq!( + vec![ + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("length"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("80").into())), + }, + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("2").into())), + }, + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col2"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("2").into())), + }, + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col3"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("4").into())), + }, + ], + with + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_brin() { + let sql = "CREATE INDEX brin_sensor_data_recorded_at ON sensor_data USING BRIN (recorded_at)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using: Some(using), + columns, + unique: false, + concurrently: false, + if_not_exists: false, + include, + nulls_distinct: None, + with, + predicate: None, + }) => { + assert_eq_vec(&["brin_sensor_data_recorded_at"], &name); + assert_eq_vec(&["sensor_data"], &table_name); + assert_eq!(IndexType::BRIN, using); + assert_eq_vec(&["recorded_at"], &columns); + assert!(include.is_empty()); + assert!(with.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_inherits() { + let single_inheritance_sql = + "CREATE TABLE child_table (child_column INT) INHERITS (public.parent_table)"; + match pg().verified_stmt(single_inheritance_sql) { + Statement::CreateTable(CreateTable { + inherits: Some(inherits), + .. + }) => { + assert_eq_vec(&["public", "parent_table"], &inherits[0].0); + } + _ => unreachable!(), + } + + let double_inheritance_sql = "CREATE TABLE child_table (child_column INT) INHERITS (public.parent_table, pg_catalog.pg_settings)"; + match pg().verified_stmt(double_inheritance_sql) { + Statement::CreateTable(CreateTable { + inherits: Some(inherits), + .. + }) => { + assert_eq_vec(&["public", "parent_table"], &inherits[0].0); + assert_eq_vec(&["pg_catalog", "pg_settings"], &inherits[1].0); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_empty_inherits_fails() { + assert!(matches!( + pg().parse_sql_statements("CREATE TABLE child_table (child_column INT) INHERITS ()"), + Err(ParserError::ParserError(_)) + )); +} + #[test] fn parse_create_index_concurrently() { let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)"; @@ -2685,6 +2949,7 @@ fn parse_array_subquery_expr() { projection: vec![SelectItem::UnnamedExpr(Expr::Value( (number("1")).with_empty_span() ))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -2710,6 +2975,7 @@ fn parse_array_subquery_expr() { projection: vec![SelectItem::UnnamedExpr(Expr::Value( (number("2")).with_empty_span() ))], + exclude: None, into: None, from: vec![], lateral_views: vec![], @@ -2729,14 +2995,13 @@ fn parse_array_subquery_expr() { }))), }), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), filter: None, null_treatment: None, @@ -2752,16 +3017,16 @@ fn test_transaction_statement() { let statement = pg().verified_stmt("SET TRANSACTION SNAPSHOT '000003A1-1'"); assert_eq!( statement, - Statement::SetTransaction { + Statement::Set(Set::SetTransaction { modes: vec![], snapshot: Some(Value::SingleQuotedString(String::from("000003A1-1"))), session: false - } + }) ); let statement = pg().verified_stmt("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY, READ WRITE, ISOLATION LEVEL SERIALIZABLE"); assert_eq!( statement, - Statement::SetTransaction { + Statement::Set(Set::SetTransaction { modes: vec![ TransactionMode::AccessMode(TransactionAccessMode::ReadOnly), TransactionMode::AccessMode(TransactionAccessMode::ReadWrite), @@ -2769,7 +3034,7 @@ fn test_transaction_statement() { ], snapshot: None, session: true - } + }) ); } @@ -3010,7 +3275,7 @@ fn test_fn_arg_with_value_operator() { assert!(matches!( &args[..], &[FunctionArg::ExprNamed { operator: FunctionArgOperator::Value, .. }] - ), "Invalid function argument: {:?}", args); + ), "Invalid function argument: {args:?}"); } other => panic!("Expected: JSON_OBJECT('name' VALUE 'value') to be parsed as a function, but got {other:?}"), } @@ -3841,12 +4106,223 @@ fn parse_update_in_with_subquery() { pg_and_generic().verified_stmt(r#"WITH "result" AS (UPDATE "Hero" SET "name" = 'Captain America', "number_of_movies" = "number_of_movies" + 1 WHERE "secret_identity" = 'Sam Wilson' RETURNING "id", "name", "secret_identity", "number_of_movies") SELECT * FROM "result""#); } +#[test] +fn parser_create_function_with_args() { + let sql1 = r#"CREATE OR REPLACE FUNCTION check_strings_different(str1 VARCHAR, str2 VARCHAR) RETURNS BOOLEAN LANGUAGE plpgsql AS $$ +BEGIN + IF str1 <> str2 THEN + RETURN TRUE; + ELSE + RETURN FALSE; + END IF; +END; +$$"#; + + assert_eq!( + pg_and_generic().verified_stmt(sql1), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: true, + temporary: false, + name: ObjectName::from(vec![Ident::new("check_strings_different")]), + args: Some(vec![ + OperateFunctionArg::with_name( + "str1", + DataType::Varchar(None), + ), + OperateFunctionArg::with_name( + "str2", + DataType::Varchar(None), + ), + ]), + return_type: Some(DataType::Boolean), + language: Some("plpgsql".into()), + behavior: None, + called_on_null: None, + parallel: None, + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF str1 <> str2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + ); + + let sql2 = r#"CREATE OR REPLACE FUNCTION check_not_zero(int1 INT) RETURNS BOOLEAN LANGUAGE plpgsql AS $$ +BEGIN + IF int1 <> 0 THEN + RETURN TRUE; + ELSE + RETURN FALSE; + END IF; +END; +$$"#; + assert_eq!( + pg_and_generic().verified_stmt(sql2), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: true, + temporary: false, + name: ObjectName::from(vec![Ident::new("check_not_zero")]), + args: Some(vec![ + OperateFunctionArg::with_name( + "int1", + DataType::Int(None) + ) + ]), + return_type: Some(DataType::Boolean), + language: Some("plpgsql".into()), + behavior: None, + called_on_null: None, + parallel: None, + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> 0 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + ); + + let sql3 = r#"CREATE OR REPLACE FUNCTION check_values_different(a INT, b INT) RETURNS BOOLEAN LANGUAGE plpgsql AS $$ +BEGIN + IF a <> b THEN + RETURN TRUE; + ELSE + RETURN FALSE; + END IF; +END; +$$"#; + assert_eq!( + pg_and_generic().verified_stmt(sql3), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: true, + temporary: false, + name: ObjectName::from(vec![Ident::new("check_values_different")]), + args: Some(vec![ + OperateFunctionArg::with_name( + "a", + DataType::Int(None) + ), + OperateFunctionArg::with_name( + "b", + DataType::Int(None) + ), + ]), + return_type: Some(DataType::Boolean), + language: Some("plpgsql".into()), + behavior: None, + called_on_null: None, + parallel: None, + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF a <> b THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + ); + + let sql4 = r#"CREATE OR REPLACE FUNCTION check_values_different(int1 INT, int2 INT) RETURNS BOOLEAN LANGUAGE plpgsql AS $$ +BEGIN + IF int1 <> int2 THEN + RETURN TRUE; + ELSE + RETURN FALSE; + END IF; +END; +$$"#; + assert_eq!( + pg_and_generic().verified_stmt(sql4), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: true, + temporary: false, + name: ObjectName::from(vec![Ident::new("check_values_different")]), + args: Some(vec![ + OperateFunctionArg::with_name( + "int1", + DataType::Int(None) + ), + OperateFunctionArg::with_name( + "int2", + DataType::Int(None) + ), + ]), + return_type: Some(DataType::Boolean), + language: Some("plpgsql".into()), + behavior: None, + called_on_null: None, + parallel: None, + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> int2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + ); + + let sql5 = r#"CREATE OR REPLACE FUNCTION foo(a TIMESTAMP WITH TIME ZONE, b VARCHAR) RETURNS BOOLEAN LANGUAGE plpgsql AS $$ + BEGIN + RETURN TRUE; + END; + $$"#; + assert_eq!( + pg_and_generic().verified_stmt(sql5), + Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: true, + temporary: false, + name: ObjectName::from(vec![Ident::new("foo")]), + args: Some(vec![ + OperateFunctionArg::with_name( + "a", + DataType::Timestamp(None, TimezoneInfo::WithTimeZone) + ), + OperateFunctionArg::with_name("b", DataType::Varchar(None)), + ]), + return_type: Some(DataType::Boolean), + language: Some("plpgsql".into()), + behavior: None, + called_on_null: None, + parallel: None, + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + (Value::DollarQuotedString(DollarQuotedString { + value: "\n BEGIN\n RETURN TRUE;\n END;\n ".to_owned(), + tag: None + })) + .with_empty_span() + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + ); + + let incorrect_sql = "CREATE FUNCTION add(function(struct int64), b INTEGER) RETURNS INTEGER LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE AS 'select $1 + $2;'"; + assert!(pg().parse_sql_statements(incorrect_sql).is_err(),); +} + #[test] fn parse_create_function() { let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE AS 'select $1 + $2;'"; assert_eq!( pg_and_generic().verified_stmt(sql), Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace: false, temporary: false, name: ObjectName::from(vec![Ident::new("add")]), @@ -3981,6 +4457,66 @@ fn parse_drop_function() { ); } +#[test] +fn parse_drop_domain() { + let sql = "DROP DOMAIN IF EXISTS jpeg_domain"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropDomain(DropDomain { + if_exists: true, + name: ObjectName::from(vec![Ident { + value: "jpeg_domain".to_string(), + quote_style: None, + span: Span::empty(), + }]), + drop_behavior: None + }) + ); + + let sql = "DROP DOMAIN jpeg_domain"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropDomain(DropDomain { + if_exists: false, + name: ObjectName::from(vec![Ident { + value: "jpeg_domain".to_string(), + quote_style: None, + span: Span::empty(), + }]), + drop_behavior: None + }) + ); + + let sql = "DROP DOMAIN IF EXISTS jpeg_domain CASCADE"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropDomain(DropDomain { + if_exists: true, + name: ObjectName::from(vec![Ident { + value: "jpeg_domain".to_string(), + quote_style: None, + span: Span::empty(), + }]), + drop_behavior: Some(DropBehavior::Cascade) + }) + ); + + let sql = "DROP DOMAIN IF EXISTS jpeg_domain RESTRICT"; + + assert_eq!( + pg().verified_stmt(sql), + Statement::DropDomain(DropDomain { + if_exists: true, + name: ObjectName::from(vec![Ident { + value: "jpeg_domain".to_string(), + quote_style: None, + span: Span::empty(), + }]), + drop_behavior: Some(DropBehavior::Restrict) + }) + ); +} + #[test] fn parse_drop_procedure() { let sql = "DROP PROCEDURE IF EXISTS test_proc"; @@ -4254,13 +4790,13 @@ fn parse_truncate() { let table_name = ObjectName::from(vec![Ident::new("db"), Ident::new("table_name")]); let table_names = vec![TruncateTableTarget { name: table_name.clone(), + only: false, }]; assert_eq!( Statement::Truncate { table_names, partitions: None, table: false, - only: false, identity: None, cascade: None, on_cluster: None, @@ -4277,6 +4813,7 @@ fn parse_truncate_with_options() { let table_name = ObjectName::from(vec![Ident::new("db"), Ident::new("table_name")]); let table_names = vec![TruncateTableTarget { name: table_name.clone(), + only: true, }]; assert_eq!( @@ -4284,7 +4821,6 @@ fn parse_truncate_with_options() { table_names, partitions: None, table: true, - only: true, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, @@ -4305,9 +4841,11 @@ fn parse_truncate_with_table_list() { let table_names = vec![ TruncateTableTarget { name: table_name_a.clone(), + only: false, }, TruncateTableTarget { name: table_name_b.clone(), + only: false, }, ]; @@ -4316,7 +4854,6 @@ fn parse_truncate_with_table_list() { table_names, partitions: None, table: true, - only: false, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, @@ -4348,7 +4885,6 @@ fn parse_create_table_with_alias() { name, columns, constraints, - with_options: _with_options, if_not_exists: false, external: false, file_format: None, @@ -4521,14 +5057,13 @@ fn test_simple_postgres_insert_with_alias() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), assignments: vec![], partitioned: None, @@ -4594,14 +5129,13 @@ fn test_simple_postgres_insert_with_alias() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), assignments: vec![], partitioned: None, @@ -4665,14 +5199,13 @@ fn test_simple_insert_with_quoted_alias() { ]] })), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, fetch: None, locks: vec![], for_clause: None, settings: None, format_clause: None, + pipe_operators: vec![], })), assignments: vec![], partitioned: None, @@ -4727,7 +5260,10 @@ fn parse_at_time_zone() { left: Box::new(Expr::AtTimeZone { timestamp: Box::new(Expr::TypedString { data_type: DataType::Timestamp(None, TimezoneInfo::None), - value: Value::SingleQuotedString("2001-09-28 01:00".to_string()), + value: ValueWithSpan { + value: Value::SingleQuotedString("2001-09-28 01:00".to_string()), + span: Span::empty(), + }, }), time_zone: Box::new(Expr::Cast { kind: CastKind::DoubleColon, @@ -4761,7 +5297,11 @@ fn parse_at_time_zone() { fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; match pg().verified_stmt(sql) { - Statement::CreateTable(CreateTable { with_options, .. }) => { + Statement::CreateTable(CreateTable { table_options, .. }) => { + let with_options = match table_options { + CreateTableOptions::With(options) => options, + _ => unreachable!(), + }; assert_eq!( vec![ SqlOption::KeyValue { @@ -4828,10 +5368,109 @@ fn test_escaped_string_literal() { } } +#[test] +fn parse_create_domain() { + let sql1 = "CREATE DOMAIN my_domain AS INTEGER CHECK (VALUE > 0)"; + let expected = Statement::CreateDomain(CreateDomain { + name: ObjectName::from(vec![Ident::new("my_domain")]), + data_type: DataType::Integer(None), + collation: None, + default: None, + constraints: vec![TableConstraint::Check { + name: None, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("VALUE"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(test_utils::number("0").into())), + }), + enforced: None, + }], + }); + + assert_eq!(pg().verified_stmt(sql1), expected); + + let sql2 = "CREATE DOMAIN my_domain AS INTEGER COLLATE \"en_US\" CHECK (VALUE > 0)"; + let expected = Statement::CreateDomain(CreateDomain { + name: ObjectName::from(vec![Ident::new("my_domain")]), + data_type: DataType::Integer(None), + collation: Some(Ident::with_quote('"', "en_US")), + default: None, + constraints: vec![TableConstraint::Check { + name: None, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("VALUE"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(test_utils::number("0").into())), + }), + enforced: None, + }], + }); + + assert_eq!(pg().verified_stmt(sql2), expected); + + let sql3 = "CREATE DOMAIN my_domain AS INTEGER DEFAULT 1 CHECK (VALUE > 0)"; + let expected = Statement::CreateDomain(CreateDomain { + name: ObjectName::from(vec![Ident::new("my_domain")]), + data_type: DataType::Integer(None), + collation: None, + default: Some(Expr::Value(test_utils::number("1").into())), + constraints: vec![TableConstraint::Check { + name: None, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("VALUE"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(test_utils::number("0").into())), + }), + enforced: None, + }], + }); + + assert_eq!(pg().verified_stmt(sql3), expected); + + let sql4 = "CREATE DOMAIN my_domain AS INTEGER COLLATE \"en_US\" DEFAULT 1 CHECK (VALUE > 0)"; + let expected = Statement::CreateDomain(CreateDomain { + name: ObjectName::from(vec![Ident::new("my_domain")]), + data_type: DataType::Integer(None), + collation: Some(Ident::with_quote('"', "en_US")), + default: Some(Expr::Value(test_utils::number("1").into())), + constraints: vec![TableConstraint::Check { + name: None, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("VALUE"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(test_utils::number("0").into())), + }), + enforced: None, + }], + }); + + assert_eq!(pg().verified_stmt(sql4), expected); + + let sql5 = "CREATE DOMAIN my_domain AS INTEGER CONSTRAINT my_constraint CHECK (VALUE > 0)"; + let expected = Statement::CreateDomain(CreateDomain { + name: ObjectName::from(vec![Ident::new("my_domain")]), + data_type: DataType::Integer(None), + collation: None, + default: None, + constraints: vec![TableConstraint::Check { + name: Some(Ident::new("my_constraint")), + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("VALUE"))), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(test_utils::number("0").into())), + }), + enforced: None, + }], + }); + + assert_eq!(pg().verified_stmt(sql5), expected); +} + #[test] fn parse_create_simple_before_insert_trigger() { let sql = "CREATE TRIGGER check_insert BEFORE INSERT ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_insert"; let expected = Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("check_insert")]), @@ -4843,13 +5482,14 @@ fn parse_create_simple_before_insert_trigger() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("check_account_insert")]), args: None, }, - }, + }), + statements: None, characteristics: None, }; @@ -4860,6 +5500,7 @@ fn parse_create_simple_before_insert_trigger() { fn parse_create_after_update_trigger_with_condition() { let sql = "CREATE TRIGGER check_update AFTER UPDATE ON accounts FOR EACH ROW WHEN (NEW.balance > 10000) EXECUTE FUNCTION check_account_update"; let expected = Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("check_update")]), @@ -4878,13 +5519,14 @@ fn parse_create_after_update_trigger_with_condition() { op: BinaryOperator::Gt, right: Box::new(Expr::value(number("10000"))), }))), - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("check_account_update")]), args: None, }, - }, + }), + statements: None, characteristics: None, }; @@ -4895,6 +5537,7 @@ fn parse_create_after_update_trigger_with_condition() { fn parse_create_instead_of_delete_trigger() { let sql = "CREATE TRIGGER check_delete INSTEAD OF DELETE ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_deletes"; let expected = Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("check_delete")]), @@ -4906,13 +5549,14 @@ fn parse_create_instead_of_delete_trigger() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("check_account_deletes")]), args: None, }, - }, + }), + statements: None, characteristics: None, }; @@ -4923,6 +5567,7 @@ fn parse_create_instead_of_delete_trigger() { fn parse_create_trigger_with_multiple_events_and_deferrable() { let sql = "CREATE CONSTRAINT TRIGGER check_multiple_events BEFORE INSERT OR UPDATE OR DELETE ON accounts DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION check_account_changes"; let expected = Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: true, name: ObjectName::from(vec![Ident::new("check_multiple_events")]), @@ -4938,13 +5583,14 @@ fn parse_create_trigger_with_multiple_events_and_deferrable() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("check_account_changes")]), args: None, }, - }, + }), + statements: None, characteristics: Some(ConstraintCharacteristics { deferrable: Some(true), initially: Some(DeferrableInitial::Deferred), @@ -4959,6 +5605,7 @@ fn parse_create_trigger_with_multiple_events_and_deferrable() { fn parse_create_trigger_with_referencing() { let sql = "CREATE TRIGGER check_referencing BEFORE INSERT ON accounts REFERENCING NEW TABLE AS new_accounts OLD TABLE AS old_accounts FOR EACH ROW EXECUTE FUNCTION check_account_referencing"; let expected = Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("check_referencing")]), @@ -4981,13 +5628,14 @@ fn parse_create_trigger_with_referencing() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("check_account_referencing")]), args: None, }, - }, + }), + statements: None, characteristics: None, }; @@ -5007,7 +5655,7 @@ fn parse_create_trigger_invalid_cases() { ), ( "CREATE TRIGGER check_update TOMORROW UPDATE ON accounts EXECUTE FUNCTION check_account_update", - "Expected: one of BEFORE or AFTER or INSTEAD, found: TOMORROW" + "Expected: one of FOR or BEFORE or AFTER or INSTEAD, found: TOMORROW" ), ( "CREATE TRIGGER check_update BEFORE SAVE ON accounts EXECUTE FUNCTION check_account_update", @@ -5036,7 +5684,7 @@ fn parse_drop_trigger() { "DROP TRIGGER{} check_update ON table_name{}", if if_exists { " IF EXISTS" } else { "" }, option - .map(|o| format!(" {}", o)) + .map(|o| format!(" {o}")) .unwrap_or_else(|| "".to_string()) ); assert_eq!( @@ -5130,8 +5778,7 @@ fn parse_trigger_related_functions() { // Now we parse the statements and check if they are parsed correctly. let mut statements = pg() .parse_sql_statements(&format!( - "{}{}{}{}", - sql_table_creation, sql_create_function, sql_create_trigger, sql_drop_trigger + "{sql_table_creation}{sql_create_function}{sql_create_trigger}{sql_drop_trigger}" )) .unwrap(); @@ -5189,19 +5836,13 @@ fn parse_trigger_related_functions() { storage: None, location: None }), - table_properties: vec![], - with_options: vec![], file_format: None, location: None, query: None, without_rowid: false, like: None, clone: None, - engine: None, comment: None, - auto_increment_offset: None, - default_charset: None, - collation: None, on_commit: None, on_cluster: None, primary_key: None, @@ -5209,7 +5850,7 @@ fn parse_trigger_related_functions() { partition_by: None, cluster_by: None, clustered_by: None, - options: None, + inherits: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -5225,6 +5866,7 @@ fn parse_trigger_related_functions() { catalog: None, catalog_sync: None, storage_serialization_policy: None, + table_options: CreateTableOptions::None } ); @@ -5233,6 +5875,7 @@ fn parse_trigger_related_functions() { assert_eq!( create_function, Statement::CreateFunction(CreateFunction { + or_alter: false, or_replace: false, temporary: false, if_not_exists: false, @@ -5269,6 +5912,7 @@ fn parse_trigger_related_functions() { assert_eq!( create_trigger, Statement::CreateTrigger { + or_alter: false, or_replace: false, is_constraint: false, name: ObjectName::from(vec![Ident::new("emp_stamp")]), @@ -5280,13 +5924,14 @@ fn parse_trigger_related_functions() { trigger_object: TriggerObject::Row, include_each: true, condition: None, - exec_body: TriggerExecBody { + exec_body: Some(TriggerExecBody { exec_type: TriggerExecBodyType::Function, func_desc: FunctionDesc { name: ObjectName::from(vec![Ident::new("emp_stamp")]), args: None, } - }, + }), + statements: None, characteristics: None } ); @@ -5533,3 +6178,180 @@ fn parse_varbit_datatype() { _ => unreachable!(), } } + +#[test] +fn parse_alter_table_replica_identity() { + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY FULL") { + Statement::AlterTable { operations, .. } => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Full + }] + ); + } + _ => unreachable!(), + } + + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY USING INDEX foo_idx") { + Statement::AlterTable { operations, .. } => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Index("foo_idx".into()) + }] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_ts_datatypes() { + match pg_and_generic().verified_stmt("CREATE TABLE foo (x TSVECTOR)") { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ColumnDef { + name: "x".into(), + data_type: DataType::TsVector, + options: vec![], + }] + ); + } + _ => unreachable!(), + } + + match pg_and_generic().verified_stmt("CREATE TABLE foo (x TSQUERY)") { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ColumnDef { + name: "x".into(), + data_type: DataType::TsQuery, + options: vec![], + }] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_table_constraint_not_valid() { + match pg_and_generic().verified_stmt( + "ALTER TABLE foo ADD CONSTRAINT bar FOREIGN KEY (baz) REFERENCES other(ref) NOT VALID", + ) { + Statement::AlterTable { operations, .. } => { + assert_eq!( + operations, + vec![AlterTableOperation::AddConstraint { + constraint: TableConstraint::ForeignKey { + name: Some("bar".into()), + index_name: None, + columns: vec!["baz".into()], + foreign_table: ObjectName::from(vec!["other".into()]), + referred_columns: vec!["ref".into()], + on_delete: None, + on_update: None, + characteristics: None, + }, + not_valid: true, + }] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_table_validate_constraint() { + match pg_and_generic().verified_stmt("ALTER TABLE foo VALIDATE CONSTRAINT bar") { + Statement::AlterTable { operations, .. } => { + assert_eq!( + operations, + vec![AlterTableOperation::ValidateConstraint { name: "bar".into() }] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_server() { + let test_cases = vec![ + ( + "CREATE SERVER myserver FOREIGN DATA WRAPPER postgres_fdw", + CreateServerStatement { + name: ObjectName::from(vec!["myserver".into()]), + if_not_exists: false, + server_type: None, + version: None, + foreign_data_wrapper: ObjectName::from(vec!["postgres_fdw".into()]), + options: None, + }, + ), + ( + "CREATE SERVER IF NOT EXISTS myserver TYPE 'server_type' VERSION 'server_version' FOREIGN DATA WRAPPER postgres_fdw", + CreateServerStatement { + name: ObjectName::from(vec!["myserver".into()]), + if_not_exists: true, + server_type: Some(Ident { + value: "server_type".to_string(), + quote_style: Some('\''), + span: Span::empty(), + }), + version: Some(Ident { + value: "server_version".to_string(), + quote_style: Some('\''), + span: Span::empty(), + }), + foreign_data_wrapper: ObjectName::from(vec!["postgres_fdw".into()]), + options: None, + } + ), + ( + "CREATE SERVER myserver2 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'foo', dbname 'foodb', port '5432')", + CreateServerStatement { + name: ObjectName::from(vec!["myserver2".into()]), + if_not_exists: false, + server_type: None, + version: None, + foreign_data_wrapper: ObjectName::from(vec!["postgres_fdw".into()]), + options: Some(vec![ + CreateServerOption { + key: "host".into(), + value: Ident { + value: "foo".to_string(), + quote_style: Some('\''), + span: Span::empty(), + }, + }, + CreateServerOption { + key: "dbname".into(), + value: Ident { + value: "foodb".to_string(), + quote_style: Some('\''), + span: Span::empty(), + }, + }, + CreateServerOption { + key: "port".into(), + value: Ident { + value: "5432".to_string(), + quote_style: Some('\''), + span: Span::empty(), + }, + }, + ]), + } + ) + ]; + + for (sql, expected) in test_cases { + let Statement::CreateServer(stmt) = pg_and_generic().verified_stmt(sql) else { + unreachable!() + }; + assert_eq!(stmt, expected); + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 7736735c..d539adf6 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -391,3 +391,19 @@ fn test_parse_nested_quoted_identifier() { .parse_sql_statements(r#"SELECT 1 AS ["1]"#) .is_err()); } + +#[test] +fn parse_extract_single_quotes() { + let sql = "SELECT EXTRACT('month' FROM my_timestamp) FROM my_table"; + redshift().verified_stmt(sql); +} + +#[test] +fn parse_string_literal_backslash_escape() { + redshift().one_statement_parses_to(r#"SELECT 'l\'auto'"#, "SELECT 'l''auto'"); +} + +#[test] +fn parse_utf8_multibyte_idents() { + redshift().verified_stmt("SELECT 🚀.city AS 🎸 FROM customers AS 🚀"); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index b1d31e6d..65546bee 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -20,7 +20,7 @@ //! generic dialect is also tested (on the inputs it can handle). use sqlparser::ast::helpers::key_value_options::{KeyValueOption, KeyValueOptionType}; -use sqlparser::ast::helpers::stmt_data_loading::StageLoadSelectItem; +use sqlparser::ast::helpers::stmt_data_loading::{StageLoadSelectItem, StageLoadSelectItemKind}; use sqlparser::ast::*; use sqlparser::dialect::{Dialect, GenericDialect, SnowflakeDialect}; use sqlparser::parser::{ParserError, ParserOptions}; @@ -270,8 +270,8 @@ fn test_snowflake_create_table_with_tag() { assert_eq!("my_table", name.to_string()); assert_eq!( Some(vec![ - Tag::new("A".into(), "TAG A".to_string()), - Tag::new("B".into(), "TAG B".to_string()) + Tag::new(ObjectName::from(vec![Ident::new("A")]), "TAG A".to_string()), + Tag::new(ObjectName::from(vec![Ident::new("B")]), "TAG B".to_string()) ]), with_tags ); @@ -291,8 +291,8 @@ fn test_snowflake_create_table_with_tag() { assert_eq!("my_table", name.to_string()); assert_eq!( Some(vec![ - Tag::new("A".into(), "TAG A".to_string()), - Tag::new("B".into(), "TAG B".to_string()) + Tag::new(ObjectName::from(vec![Ident::new("A")]), "TAG A".to_string()), + Tag::new(ObjectName::from(vec![Ident::new("B")]), "TAG B".to_string()) ]), with_tags ); @@ -446,19 +446,56 @@ fn test_snowflake_create_table_if_not_exists() { } _ => unreachable!(), } + + for (sql, parse_to) in [ + ( + r#"CREATE TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + r#"CREATE TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + ), + ( + r#"CREATE TABLE "A"."B"."C" IF NOT EXISTS (v VARIANT)"#, + r#"CREATE TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + ), + ( + r#"CREATE TRANSIENT TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + r#"CREATE TRANSIENT TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + ), + ( + r#"CREATE TRANSIENT TABLE "A"."B"."C" IF NOT EXISTS (v VARIANT)"#, + r#"CREATE TRANSIENT TABLE IF NOT EXISTS "A"."B"."C" (v VARIANT)"#, + ), + ] { + snowflake().one_statement_parses_to(sql, parse_to); + } } #[test] fn test_snowflake_create_table_cluster_by() { - match snowflake().verified_stmt("CREATE TABLE my_table (a INT) CLUSTER BY (a, b)") { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT) CLUSTER BY (a, b, my_func(c))") { Statement::CreateTable(CreateTable { name, cluster_by, .. }) => { assert_eq!("my_table", name.to_string()); assert_eq!( Some(WrappedCollection::Parentheses(vec![ - Ident::new("a"), - Ident::new("b"), + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")), + Expr::Function(Function { + name: ObjectName::from(vec![Ident::new("my_func")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("c")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }), ])), cluster_by ) @@ -470,9 +507,22 @@ fn test_snowflake_create_table_cluster_by() { #[test] fn test_snowflake_create_table_comment() { match snowflake().verified_stmt("CREATE TABLE my_table (a INT) COMMENT = 'some comment'") { - Statement::CreateTable(CreateTable { name, comment, .. }) => { + Statement::CreateTable(CreateTable { + name, + table_options, + .. + }) => { assert_eq!("my_table", name.to_string()); - assert_eq!("some comment", comment.unwrap().to_string()); + let plain_options = match table_options { + CreateTableOptions::Plain(options) => options, + _ => unreachable!(), + }; + let comment = match plain_options.first().unwrap() { + SqlOption::Comment(CommentDef::WithEq(c)) + | SqlOption::Comment(CommentDef::WithoutEq(c)) => c, + _ => unreachable!(), + }; + assert_eq!("some comment", comment); } _ => unreachable!(), } @@ -681,7 +731,7 @@ fn test_snowflake_create_table_with_columns_masking_policy() { option: ColumnOption::Policy(ColumnPolicy::MaskingPolicy( ColumnPolicyProperty { with, - policy_name: "p".into(), + policy_name: ObjectName::from(vec![Ident::new("p")]), using_columns, } )) @@ -715,7 +765,7 @@ fn test_snowflake_create_table_with_columns_projection_policy() { option: ColumnOption::Policy(ColumnPolicy::ProjectionPolicy( ColumnPolicyProperty { with, - policy_name: "p".into(), + policy_name: ObjectName::from(vec![Ident::new("p")]), using_columns: None, } )) @@ -752,8 +802,14 @@ fn test_snowflake_create_table_with_columns_tags() { option: ColumnOption::Tags(TagsColumnOption { with, tags: vec![ - Tag::new("A".into(), "TAG A".into()), - Tag::new("B".into(), "TAG B".into()), + Tag::new( + ObjectName::from(vec![Ident::new("A")]), + "TAG A".into() + ), + Tag::new( + ObjectName::from(vec![Ident::new("B")]), + "TAG B".into() + ), ] }), }], @@ -796,7 +852,7 @@ fn test_snowflake_create_table_with_several_column_options() { option: ColumnOption::Policy(ColumnPolicy::MaskingPolicy( ColumnPolicyProperty { with: true, - policy_name: "p1".into(), + policy_name: ObjectName::from(vec![Ident::new("p1")]), using_columns: Some(vec!["a".into(), "b".into()]), } )), @@ -806,8 +862,14 @@ fn test_snowflake_create_table_with_several_column_options() { option: ColumnOption::Tags(TagsColumnOption { with: true, tags: vec![ - Tag::new("A".into(), "TAG A".into()), - Tag::new("B".into(), "TAG B".into()), + Tag::new( + ObjectName::from(vec![Ident::new("A")]), + "TAG A".into() + ), + Tag::new( + ObjectName::from(vec![Ident::new("B")]), + "TAG B".into() + ), ] }), } @@ -828,7 +890,7 @@ fn test_snowflake_create_table_with_several_column_options() { option: ColumnOption::Policy(ColumnPolicy::ProjectionPolicy( ColumnPolicyProperty { with: false, - policy_name: "p2".into(), + policy_name: ObjectName::from(vec![Ident::new("p2")]), using_columns: None, } )), @@ -838,8 +900,14 @@ fn test_snowflake_create_table_with_several_column_options() { option: ColumnOption::Tags(TagsColumnOption { with: false, tags: vec![ - Tag::new("C".into(), "TAG C".into()), - Tag::new("D".into(), "TAG D".into()), + Tag::new( + ObjectName::from(vec![Ident::new("C")]), + "TAG C".into() + ), + Tag::new( + ObjectName::from(vec![Ident::new("D")]), + "TAG D".into() + ), ] }), } @@ -869,8 +937,8 @@ fn test_snowflake_create_iceberg_table_all_options() { assert_eq!("my_table", name.to_string()); assert_eq!( Some(WrappedCollection::Parentheses(vec![ - Ident::new("a"), - Ident::new("b"), + Expr::Identifier(Ident::new("a")), + Expr::Identifier(Ident::new("b")), ])), cluster_by ); @@ -892,8 +960,8 @@ fn test_snowflake_create_iceberg_table_all_options() { with_aggregation_policy.map(|name| name.to_string()) ); assert_eq!(Some(vec![ - Tag::new("A".into(), "TAG A".into()), - Tag::new("B".into(), "TAG B".into()), + Tag::new(ObjectName::from(vec![Ident::new("A")]), "TAG A".into()), + Tag::new(ObjectName::from(vec![Ident::new("B")]), "TAG B".into()), ]), with_tags); } @@ -976,6 +1044,21 @@ fn parse_sf_create_or_replace_with_comment_for_snowflake() { } } +#[test] +fn parse_sf_create_table_or_view_with_dollar_quoted_comment() { + // Snowflake transforms dollar quoted comments into a common comment in DDL representation of creation + snowflake() + .one_statement_parses_to( + r#"CREATE OR REPLACE TEMPORARY VIEW foo.bar.baz ("COL_1" COMMENT $$comment 1$$) COMMENT = $$view comment$$ AS (SELECT 1)"#, + r#"CREATE OR REPLACE TEMPORARY VIEW foo.bar.baz ("COL_1" COMMENT 'comment 1') COMMENT = 'view comment' AS (SELECT 1)"# + ); + + snowflake().one_statement_parses_to( + r#"CREATE TABLE my_table (a STRING COMMENT $$comment 1$$) COMMENT = $$table comment$$"#, + r#"CREATE TABLE my_table (a STRING COMMENT 'comment 1') COMMENT = 'table comment'"#, + ); +} + #[test] fn test_sf_derived_table_in_parenthesis() { // Nesting a subquery in an extra set of parentheses is non-standard, @@ -1563,6 +1646,13 @@ fn test_alter_table_clustering() { snowflake_and_generic().verified_stmt("ALTER TABLE tbl RESUME RECLUSTER"); } +#[test] +fn test_alter_iceberg_table() { + snowflake_and_generic().verified_stmt("ALTER ICEBERG TABLE tbl DROP CLUSTERING KEY"); + snowflake_and_generic().verified_stmt("ALTER ICEBERG TABLE tbl SUSPEND RECLUSTER"); + snowflake_and_generic().verified_stmt("ALTER ICEBERG TABLE tbl RESUME RECLUSTER"); +} + #[test] fn test_drop_stage() { match snowflake_and_generic().verified_stmt("DROP STAGE s1") { @@ -2241,7 +2331,7 @@ fn test_copy_into_with_files_and_pattern_and_verification() { fn test_copy_into_with_transformations() { let sql = concat!( "COPY INTO my_company.emp_basic FROM ", - "(SELECT t1.$1:st AS st, $1:index, t2.$1 FROM @schema.general_finished AS T) ", + "(SELECT t1.$1:st AS st, $1:index, t2.$1, 4, '5' AS const_str FROM @schema.general_finished AS T) ", "FILES = ('file1.json', 'file2.json') ", "PATTERN = '.*employees0[1-5].csv.gz' ", "VALIDATION_MODE = RETURN_7_ROWS" @@ -2262,35 +2352,55 @@ fn test_copy_into_with_transformations() { ); assert_eq!( from_transformations.as_ref().unwrap()[0], - StageLoadSelectItem { + StageLoadSelectItemKind::StageLoadSelectItem(StageLoadSelectItem { alias: Some(Ident::new("t1")), file_col_num: 1, element: Some(Ident::new("st")), item_as: Some(Ident::new("st")) - } + }) ); assert_eq!( from_transformations.as_ref().unwrap()[1], - StageLoadSelectItem { + StageLoadSelectItemKind::StageLoadSelectItem(StageLoadSelectItem { alias: None, file_col_num: 1, element: Some(Ident::new("index")), item_as: None - } + }) ); assert_eq!( from_transformations.as_ref().unwrap()[2], - StageLoadSelectItem { + StageLoadSelectItemKind::StageLoadSelectItem(StageLoadSelectItem { alias: Some(Ident::new("t2")), file_col_num: 1, element: None, item_as: None - } + }) + ); + assert_eq!( + from_transformations.as_ref().unwrap()[3], + StageLoadSelectItemKind::SelectItem(SelectItem::UnnamedExpr(Expr::Value( + Value::Number("4".parse().unwrap(), false).into() + ))) + ); + assert_eq!( + from_transformations.as_ref().unwrap()[4], + StageLoadSelectItemKind::SelectItem(SelectItem::ExprWithAlias { + expr: Expr::Value(Value::SingleQuotedString("5".parse().unwrap()).into()), + alias: Ident::new("const_str".to_string()) + }) ); } _ => unreachable!(), } assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + + // Test optional AS keyword to denote an alias for the stage + let sql1 = concat!( + "COPY INTO my_company.emp_basic FROM ", + "(SELECT t1.$1:st AS st, $1:index, t2.$1, 4, '5' AS const_str FROM @schema.general_finished T) " + ); + snowflake().parse_sql_statements(sql1).unwrap(); } #[test] @@ -2418,10 +2528,7 @@ fn test_snowflake_stage_object_names_into_location() { .zip(allowed_object_names.iter_mut()) { let (formatted_name, object_name) = it; - let sql = format!( - "COPY INTO {} FROM 'gcs://mybucket/./../a.csv'", - formatted_name - ); + let sql = format!("COPY INTO {formatted_name} FROM 'gcs://mybucket/./../a.csv'"); match snowflake().verified_stmt(&sql) { Statement::CopyIntoSnowflake { into, .. } => { assert_eq!(into.0, object_name.0) @@ -2444,10 +2551,7 @@ fn test_snowflake_stage_object_names_into_table() { .zip(allowed_object_names.iter_mut()) { let (formatted_name, object_name) = it; - let sql = format!( - "COPY INTO {} FROM 'gcs://mybucket/./../a.csv'", - formatted_name - ); + let sql = format!("COPY INTO {formatted_name} FROM 'gcs://mybucket/./../a.csv'"); match snowflake().verified_stmt(&sql) { Statement::CopyIntoSnowflake { into, .. } => { assert_eq!(into.0, object_name.0) @@ -2477,6 +2581,26 @@ fn test_snowflake_copy_into() { } _ => unreachable!(), } + + // Test for non-ident characters in stage names + let sql = "COPY INTO a.b FROM @namespace.stage_name/x@x~x%x+"; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + match snowflake().verified_stmt(sql) { + Statement::CopyIntoSnowflake { into, from_obj, .. } => { + assert_eq!( + into, + ObjectName::from(vec![Ident::new("a"), Ident::new("b")]) + ); + assert_eq!( + from_obj, + Some(ObjectName::from(vec![ + Ident::new("@namespace"), + Ident::new("stage_name/x@x~x%x+") + ])) + ) + } + _ => unreachable!(), + } } #[test] @@ -2928,7 +3052,7 @@ fn parse_use() { for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( - snowflake().verified_stmt(&format!("USE {}", object_name)), + snowflake().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) @@ -2936,7 +3060,7 @@ fn parse_use() { for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( - snowflake().verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), + snowflake().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), @@ -2948,7 +3072,9 @@ fn parse_use() { for "e in "e_styles { // Test double identifier with different type of quotes assert_eq!( - snowflake().verified_stmt(&format!("USE {0}CATALOG{0}.{0}my_schema{0}", quote)), + snowflake().verified_stmt(&format!( + "USE {quote}CATALOG{quote}.{quote}my_schema{quote}" + )), Statement::Use(Use::Object(ObjectName::from(vec![ Ident::with_quote(quote, "CATALOG"), Ident::with_quote(quote, "my_schema") @@ -2967,35 +3093,37 @@ fn parse_use() { for "e in "e_styles { // Test single and double identifier with keyword and different type of quotes assert_eq!( - snowflake().verified_stmt(&format!("USE DATABASE {0}my_database{0}", quote)), + snowflake().verified_stmt(&format!("USE DATABASE {quote}my_database{quote}")), Statement::Use(Use::Database(ObjectName::from(vec![Ident::with_quote( quote, "my_database".to_string(), )]))) ); assert_eq!( - snowflake().verified_stmt(&format!("USE SCHEMA {0}my_schema{0}", quote)), + snowflake().verified_stmt(&format!("USE SCHEMA {quote}my_schema{quote}")), Statement::Use(Use::Schema(ObjectName::from(vec![Ident::with_quote( quote, "my_schema".to_string(), )]))) ); assert_eq!( - snowflake().verified_stmt(&format!("USE SCHEMA {0}CATALOG{0}.{0}my_schema{0}", quote)), + snowflake().verified_stmt(&format!( + "USE SCHEMA {quote}CATALOG{quote}.{quote}my_schema{quote}" + )), Statement::Use(Use::Schema(ObjectName::from(vec![ Ident::with_quote(quote, "CATALOG"), Ident::with_quote(quote, "my_schema") ]))) ); assert_eq!( - snowflake().verified_stmt(&format!("USE ROLE {0}my_role{0}", quote)), + snowflake().verified_stmt(&format!("USE ROLE {quote}my_role{quote}")), Statement::Use(Use::Role(ObjectName::from(vec![Ident::with_quote( quote, "my_role".to_string(), )]))) ); assert_eq!( - snowflake().verified_stmt(&format!("USE WAREHOUSE {0}my_wh{0}", quote)), + snowflake().verified_stmt(&format!("USE WAREHOUSE {quote}my_wh{quote}")), Statement::Use(Use::Warehouse(ObjectName::from(vec![Ident::with_quote( quote, "my_wh".to_string(), @@ -3032,7 +3160,7 @@ fn view_comment_option_should_be_after_column_list() { "CREATE OR REPLACE VIEW v (a COMMENT 'a comment', b, c COMMENT 'c comment') COMMENT = 'Comment' AS SELECT a FROM t", "CREATE OR REPLACE VIEW v (a COMMENT 'a comment', b, c COMMENT 'c comment') WITH (foo = bar) COMMENT = 'Comment' AS SELECT a FROM t", ] { - snowflake_and_generic() + snowflake() .verified_stmt(sql); } } @@ -3041,7 +3169,7 @@ fn view_comment_option_should_be_after_column_list() { fn parse_view_column_descriptions() { let sql = "CREATE OR REPLACE VIEW v (a COMMENT 'Comment', b) AS SELECT a, b FROM table1"; - match snowflake_and_generic().verified_stmt(sql) { + match snowflake().verified_stmt(sql) { Statement::CreateView { name, columns, .. } => { assert_eq!(name.to_string(), "v"); assert_eq!( @@ -3050,7 +3178,9 @@ fn parse_view_column_descriptions() { ViewColumnDef { name: Ident::new("a"), data_type: None, - options: Some(vec![ColumnOption::Comment("Comment".to_string())]), + options: Some(ColumnOptions::SpaceSeparated(vec![ColumnOption::Comment( + "Comment".to_string() + )])), }, ViewColumnDef { name: Ident::new("b"), @@ -3305,10 +3435,38 @@ fn parse_ls_and_rm() { .unwrap(); } +#[test] +fn test_sql_keywords_as_select_item_ident() { + // Some keywords that should be parsed as an alias + let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT", "SORT"]; + for kw in unreserved_kws { + snowflake().verified_stmt(&format!("SELECT 1, {kw}")); + } + + // Some keywords that should not be parsed as an alias + let reserved_kws = vec![ + "FROM", + "GROUP", + "HAVING", + "INTERSECT", + "INTO", + "ORDER", + "SELECT", + "UNION", + "WHERE", + "WITH", + ]; + for kw in reserved_kws { + assert!(snowflake() + .parse_sql_statements(&format!("SELECT 1, {kw}")) + .is_err()); + } +} + #[test] fn test_sql_keywords_as_select_item_aliases() { // Some keywords that should be parsed as an alias - let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"]; + let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT", "SORT"]; for kw in unreserved_kws { snowflake() .one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}")); @@ -3334,6 +3492,57 @@ fn test_sql_keywords_as_select_item_aliases() { } } +#[test] +fn test_sql_keywords_as_table_aliases() { + // Some keywords that should be parsed as an alias implicitly + let unreserved_kws = vec![ + "VIEW", + "EXPLAIN", + "ANALYZE", + "SORT", + "PIVOT", + "UNPIVOT", + "TOP", + "LIMIT", + "OFFSET", + "FETCH", + "EXCEPT", + "CLUSTER", + "DISTRIBUTE", + "GLOBAL", + "ANTI", + "SEMI", + "RETURNING", + "OUTER", + "WINDOW", + "END", + "PARTITION", + "PREWHERE", + "SETTINGS", + "FORMAT", + "MATCH_RECOGNIZE", + "OPEN", + ]; + + for kw in unreserved_kws { + snowflake().verified_stmt(&format!("SELECT * FROM tbl AS {kw}")); + snowflake().one_statement_parses_to( + &format!("SELECT * FROM tbl {kw}"), + &format!("SELECT * FROM tbl AS {kw}"), + ); + } + + // Some keywords that should not be parsed as an alias implicitly + let reserved_kws = vec![ + "FROM", "GROUP", "HAVING", "ORDER", "SELECT", "UNION", "WHERE", "WITH", + ]; + for kw in reserved_kws { + assert!(snowflake() + .parse_sql_statements(&format!("SELECT * FROM tbl {kw}")) + .is_err()); + } +} + #[test] fn test_timetravel_at_before() { snowflake().verified_only_select("SELECT * FROM tbl AT(TIMESTAMP => '2024-12-15 00:00:00')"); @@ -3342,7 +3551,7 @@ fn test_timetravel_at_before() { } #[test] -fn test_grant_account_privileges() { +fn test_grant_account_global_privileges() { let privileges = vec![ "ALL", "ALL PRIVILEGES", @@ -3447,6 +3656,43 @@ fn test_grant_account_privileges() { } } +#[test] +fn test_grant_account_object_privileges() { + let privileges = vec![ + "ALL", + "ALL PRIVILEGES", + "APPLYBUDGET", + "MODIFY", + "MONITOR", + "USAGE", + "OPERATE", + ]; + + let objects_types = vec![ + "USER", + "RESOURCE MONITOR", + "WAREHOUSE", + "COMPUTE POOL", + "DATABASE", + "INTEGRATION", + "CONNECTION", + "FAILOVER GROUP", + "REPLICATION GROUP", + "EXTERNAL VOLUME", + ]; + + let with_grant_options = vec!["", " WITH GRANT OPTION"]; + + for t in &objects_types { + for p in &privileges { + for wgo in &with_grant_options { + let sql = format!("GRANT {p} ON {t} obj1 TO ROLE role1{wgo}"); + snowflake_and_generic().verified_stmt(&sql); + } + } + } +} + #[test] fn test_grant_role_to() { snowflake_and_generic().verified_stmt("GRANT ROLE r1 TO ROLE r2"); @@ -3490,3 +3736,705 @@ fn test_alter_session() { ); snowflake().one_statement_parses_to("ALTER SESSION UNSET a\nB", "ALTER SESSION UNSET a, B"); } + +#[test] +fn test_alter_session_followed_by_statement() { + let stmts = snowflake() + .parse_sql_statements("ALTER SESSION SET QUERY_TAG='hello'; SELECT 42") + .unwrap(); + match stmts[..] { + [Statement::AlterSession { .. }, Statement::Query { .. }] => {} + _ => panic!("Unexpected statements: {stmts:?}"), + } +} + +#[test] +fn test_nested_join_without_parentheses() { + let query = "SELECT DISTINCT p.product_id FROM orders AS o INNER JOIN customers AS c INNER JOIN products AS p ON p.customer_id = c.customer_id ON c.order_id = o.order_id"; + assert_eq!( + only( + snowflake() + .verified_only_select_with_canonical(query, "SELECT DISTINCT p.product_id FROM orders AS o INNER JOIN (customers AS c INNER JOIN products AS p ON p.customer_id = c.customer_id) ON c.order_id = o.order_id") + .from + ) + .joins, + vec![Join { + relation: TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("customers".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "c".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + joins: vec![Join { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("products".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "p".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + global: false, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("p".to_string()), + Ident::new("customer_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("customer_id".to_string()) + ])), + })), + }] + }), + alias: None + }, + global: false, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("order_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("o".to_string()), + Ident::new("order_id".to_string()) + ])), + })) + }], + ); + + let query = "SELECT DISTINCT p.product_id FROM orders AS o JOIN customers AS c JOIN products AS p ON p.customer_id = c.customer_id ON c.order_id = o.order_id"; + assert_eq!( + only( + snowflake() + .verified_only_select_with_canonical(query, "SELECT DISTINCT p.product_id FROM orders AS o JOIN (customers AS c JOIN products AS p ON p.customer_id = c.customer_id) ON c.order_id = o.order_id") + .from + ) + .joins, + vec![Join { + relation: TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("customers".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "c".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + joins: vec![Join { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("products".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "p".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + global: false, + join_operator: JoinOperator::Join(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("p".to_string()), + Ident::new("customer_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("customer_id".to_string()) + ])), + })), + }] + }), + alias: None + }, + global: false, + join_operator: JoinOperator::Join(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("order_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("o".to_string()), + Ident::new("order_id".to_string()) + ])), + })) + }], + ); + + let query = "SELECT DISTINCT p.product_id FROM orders AS o LEFT JOIN customers AS c LEFT JOIN products AS p ON p.customer_id = c.customer_id ON c.order_id = o.order_id"; + assert_eq!( + only( + snowflake() + .verified_only_select_with_canonical(query, "SELECT DISTINCT p.product_id FROM orders AS o LEFT JOIN (customers AS c LEFT JOIN products AS p ON p.customer_id = c.customer_id) ON c.order_id = o.order_id") + .from + ) + .joins, + vec![Join { + relation: TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("customers".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "c".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + joins: vec![Join { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("products".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "p".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + global: false, + join_operator: JoinOperator::Left(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("p".to_string()), + Ident::new("customer_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("customer_id".to_string()) + ])), + })), + }] + }), + alias: None + }, + global: false, + join_operator: JoinOperator::Left(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("order_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("o".to_string()), + Ident::new("order_id".to_string()) + ])), + })) + }], + ); + + let query = "SELECT DISTINCT p.product_id FROM orders AS o RIGHT JOIN customers AS c RIGHT JOIN products AS p ON p.customer_id = c.customer_id ON c.order_id = o.order_id"; + assert_eq!( + only( + snowflake() + .verified_only_select_with_canonical(query, "SELECT DISTINCT p.product_id FROM orders AS o RIGHT JOIN (customers AS c RIGHT JOIN products AS p ON p.customer_id = c.customer_id) ON c.order_id = o.order_id") + .from + ) + .joins, + vec![Join { + relation: TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("customers".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "c".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + joins: vec![Join { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("products".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "p".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + global: false, + join_operator: JoinOperator::Right(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("p".to_string()), + Ident::new("customer_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("customer_id".to_string()) + ])), + })), + }] + }), + alias: None + }, + global: false, + join_operator: JoinOperator::Right(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("order_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("o".to_string()), + Ident::new("order_id".to_string()) + ])), + })) + }], + ); + + let query = "SELECT DISTINCT p.product_id FROM orders AS o FULL JOIN customers AS c FULL JOIN products AS p ON p.customer_id = c.customer_id ON c.order_id = o.order_id"; + assert_eq!( + only( + snowflake() + .verified_only_select_with_canonical(query, "SELECT DISTINCT p.product_id FROM orders AS o FULL JOIN (customers AS c FULL JOIN products AS p ON p.customer_id = c.customer_id) ON c.order_id = o.order_id") + .from + ) + .joins, + vec![Join { + relation: TableFactor::NestedJoin { + table_with_joins: Box::new(TableWithJoins { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("customers".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "c".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + joins: vec![Join { + relation: TableFactor::Table { + name: ObjectName::from(vec![Ident::new("products".to_string())]), + alias: Some(TableAlias { + name: Ident { + value: "p".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }, + global: false, + join_operator: JoinOperator::FullOuter(JoinConstraint::On( + Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("p".to_string()), + Ident::new("customer_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("customer_id".to_string()) + ])), + } + )), + }] + }), + alias: None + }, + global: false, + join_operator: JoinOperator::FullOuter(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("c".to_string()), + Ident::new("order_id".to_string()) + ])), + op: BinaryOperator::Eq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("o".to_string()), + Ident::new("order_id".to_string()) + ])), + })) + }], + ); +} + +#[test] +fn parse_connect_by_root_operator() { + let sql = "SELECT CONNECT_BY_ROOT name AS root_name FROM Tbl1"; + + match snowflake().verified_stmt(sql) { + Statement::Query(query) => { + assert_eq!( + query.body.as_select().unwrap().projection[0], + SelectItem::ExprWithAlias { + expr: Expr::Prefixed { + prefix: Ident::new("CONNECT_BY_ROOT"), + value: Box::new(Expr::Identifier(Ident::new("name"))) + }, + alias: Ident::new("root_name"), + } + ); + } + _ => unreachable!(), + } + + let sql = "SELECT CONNECT_BY_ROOT name FROM Tbl2"; + match snowflake().verified_stmt(sql) { + Statement::Query(query) => { + assert_eq!( + query.body.as_select().unwrap().projection[0], + SelectItem::UnnamedExpr(Expr::Prefixed { + prefix: Ident::new("CONNECT_BY_ROOT"), + value: Box::new(Expr::Identifier(Ident::new("name"))) + }) + ); + } + _ => unreachable!(), + } + + let sql = "SELECT CONNECT_BY_ROOT FROM Tbl2"; + let res = snowflake().parse_sql_statements(sql); + assert_eq!( + res.unwrap_err().to_string(), + "sql parser error: Expected an expression, found: FROM" + ); +} + +#[test] +fn test_begin_exception_end() { + for sql in [ + "BEGIN SELECT 1; EXCEPTION WHEN OTHER THEN SELECT 2; RAISE; END", + "BEGIN SELECT 1; EXCEPTION WHEN OTHER THEN SELECT 2; RAISE EX_1; END", + "BEGIN SELECT 1; EXCEPTION WHEN FOO THEN SELECT 2; WHEN OTHER THEN SELECT 3; RAISE; END", + "BEGIN BEGIN SELECT 1; EXCEPTION WHEN OTHER THEN SELECT 2; RAISE; END; END", + ] { + snowflake().verified_stmt(sql); + } + + let sql = r#" +DECLARE + EXCEPTION_1 EXCEPTION (-20001, 'I caught the expected exception.'); + EXCEPTION_2 EXCEPTION (-20002, 'Not the expected exception!'); + EXCEPTION_3 EXCEPTION (-20003, 'The worst exception...'); +BEGIN + BEGIN + SELECT 1; + EXCEPTION + WHEN EXCEPTION_1 THEN + SELECT 1; + WHEN EXCEPTION_2 OR EXCEPTION_3 THEN + SELECT 2; + SELECT 3; + WHEN OTHER THEN + SELECT 4; + RAISE; + END; +END +"#; + + // Outer `BEGIN` of the two nested `BEGIN` statements. + let Statement::StartTransaction { mut statements, .. } = snowflake() + .parse_sql_statements(sql) + .unwrap() + .pop() + .unwrap() + else { + unreachable!(); + }; + + // Inner `BEGIN` of the two nested `BEGIN` statements. + let Statement::StartTransaction { + statements, + exception, + has_end_keyword, + .. + } = statements.pop().unwrap() + else { + unreachable!(); + }; + + assert_eq!(1, statements.len()); + assert!(has_end_keyword); + + let exception = exception.unwrap(); + assert_eq!(3, exception.len()); + assert_eq!(1, exception[0].idents.len()); + assert_eq!(1, exception[0].statements.len()); + assert_eq!(2, exception[1].idents.len()); + assert_eq!(2, exception[1].statements.len()); +} + +#[test] +fn test_snowflake_fetch_clause_syntax() { + let canonical = "SELECT c1 FROM fetch_test FETCH FIRST 2 ROWS ONLY"; + snowflake().verified_only_select_with_canonical("SELECT c1 FROM fetch_test FETCH 2", canonical); + + snowflake() + .verified_only_select_with_canonical("SELECT c1 FROM fetch_test FETCH FIRST 2", canonical); + snowflake() + .verified_only_select_with_canonical("SELECT c1 FROM fetch_test FETCH NEXT 2", canonical); + + snowflake() + .verified_only_select_with_canonical("SELECT c1 FROM fetch_test FETCH 2 ROW", canonical); + + snowflake().verified_only_select_with_canonical( + "SELECT c1 FROM fetch_test FETCH FIRST 2 ROWS", + canonical, + ); +} + +#[test] +fn test_snowflake_create_view_with_multiple_column_options() { + let create_view_with_tag = + r#"CREATE VIEW X (COL WITH TAG (pii='email') COMMENT 'foobar') AS SELECT * FROM Y"#; + snowflake().verified_stmt(create_view_with_tag); +} + +#[test] +fn test_snowflake_create_view_with_composite_tag() { + let create_view_with_tag = + r#"CREATE VIEW X (COL WITH TAG (foo.bar.baz.pii='email')) AS SELECT * FROM Y"#; + snowflake().verified_stmt(create_view_with_tag); +} + +#[test] +fn test_snowflake_create_view_with_composite_policy_name() { + let create_view_with_tag = + r#"CREATE VIEW X (COL WITH MASKING POLICY foo.bar.baz) AS SELECT * FROM Y"#; + snowflake().verified_stmt(create_view_with_tag); +} + +#[test] +fn test_snowflake_identifier_function() { + // Using IDENTIFIER to reference a column + match &snowflake() + .verified_only_select("SELECT identifier('email') FROM customers") + .projection[0] + { + SelectItem::UnnamedExpr(Expr::Function(Function { name, args, .. })) => { + assert_eq!(*name, ObjectName::from(vec![Ident::new("identifier")])); + assert_eq!( + *args, + FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("email".to_string()).into() + )))], + clauses: vec![], + duplicate_treatment: None + }) + ); + } + _ => unreachable!(), + } + + // Using IDENTIFIER to reference a case-sensitive column + match &snowflake() + .verified_only_select(r#"SELECT identifier('"Email"') FROM customers"#) + .projection[0] + { + SelectItem::UnnamedExpr(Expr::Function(Function { name, args, .. })) => { + assert_eq!(*name, ObjectName::from(vec![Ident::new("identifier")])); + assert_eq!( + *args, + FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("\"Email\"".to_string()).into() + )))], + clauses: vec![], + duplicate_treatment: None + }) + ); + } + _ => unreachable!(), + } + + // Using IDENTIFIER to reference an alias of a table + match &snowflake() + .verified_only_select("SELECT identifier('alias1').* FROM tbl AS alias1") + .projection[0] + { + SelectItem::QualifiedWildcard( + SelectItemQualifiedWildcardKind::Expr(Expr::Function(Function { name, args, .. })), + _, + ) => { + assert_eq!(*name, ObjectName::from(vec![Ident::new("identifier")])); + assert_eq!( + *args, + FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("alias1".to_string()).into() + )))], + clauses: vec![], + duplicate_treatment: None + }) + ); + } + _ => unreachable!(), + } + + // Using IDENTIFIER to reference a database + match snowflake().verified_stmt("CREATE DATABASE IDENTIFIER('tbl')") { + Statement::CreateDatabase { db_name, .. } => { + assert_eq!( + db_name, + ObjectName(vec![ObjectNamePart::Function(ObjectNamePartFunction { + name: Ident::new("IDENTIFIER"), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("tbl".to_string()).into() + )))] + })]) + ); + } + _ => unreachable!(), + } + + // Using IDENTIFIER to reference a schema + match snowflake().verified_stmt("CREATE SCHEMA IDENTIFIER('db1.sc1')") { + Statement::CreateSchema { schema_name, .. } => { + assert_eq!( + schema_name, + SchemaName::Simple(ObjectName(vec![ObjectNamePart::Function( + ObjectNamePartFunction { + name: Ident::new("IDENTIFIER"), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("db1.sc1".to_string()).into() + )))] + } + )])) + ); + } + _ => unreachable!(), + } + + // Using IDENTIFIER to reference a table + match snowflake().verified_stmt("CREATE TABLE IDENTIFIER('tbl') (id INT)") { + Statement::CreateTable(CreateTable { name, .. }) => { + assert_eq!( + name, + ObjectName(vec![ObjectNamePart::Function(ObjectNamePartFunction { + name: Ident::new("IDENTIFIER"), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("tbl".to_string()).into() + )))] + })]) + ); + } + _ => unreachable!(), + } + + // Cannot have more than one IDENTIFIER part in an object name + assert_eq!( + snowflake() + .parse_sql_statements( + "CREATE TABLE IDENTIFIER('db1').IDENTIFIER('sc1').IDENTIFIER('tbl') (id INT)" + ) + .is_err(), + true + ); + assert_eq!( + snowflake() + .parse_sql_statements("CREATE TABLE IDENTIFIER('db1')..IDENTIFIER('tbl') (id INT)") + .is_err(), + true + ); +} diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 361c9b05..06496f0c 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -324,7 +324,7 @@ fn parse_create_table_on_conflict_col() { Keyword::IGNORE, Keyword::REPLACE, ] { - let sql = format!("CREATE TABLE t1 (a INT, b INT ON CONFLICT {:?})", keyword); + let sql = format!("CREATE TABLE t1 (a INT, b INT ON CONFLICT {keyword:?})"); match sqlite_and_generic().verified_stmt(&sql) { Statement::CreateTable(CreateTable { columns, .. }) => { assert_eq!( @@ -410,7 +410,7 @@ fn parse_window_function_with_filter() { "count", "user_defined_function", ] { - let sql = format!("SELECT {}(x) FILTER (WHERE y) OVER () FROM t", func_name); + let sql = format!("SELECT {func_name}(x) FILTER (WHERE y) OVER () FROM t"); let select = sqlite().verified_only_select(&sql); assert_eq!(select.to_string(), sql); assert_eq!( @@ -444,7 +444,7 @@ fn parse_window_function_with_filter() { fn parse_attach_database() { let sql = "ATTACH DATABASE 'test.db' AS test"; let verified_stmt = sqlite().verified_stmt(sql); - assert_eq!(sql, format!("{}", verified_stmt)); + assert_eq!(sql, format!("{verified_stmt}")); match verified_stmt { Statement::AttachDatabase { schema_name, @@ -562,6 +562,36 @@ fn test_dollar_identifier_as_placeholder() { } } +#[test] +fn test_match_operator() { + assert_eq!( + sqlite().verified_expr("col MATCH 'pattern'"), + Expr::BinaryOp { + op: BinaryOperator::Match, + left: Box::new(Expr::Identifier(Ident::new("col"))), + right: Box::new(Expr::Value( + (Value::SingleQuotedString("pattern".to_string())).with_empty_span() + )) + } + ); + sqlite().verified_only_select("SELECT * FROM email WHERE email MATCH 'fts5'"); +} + +#[test] +fn test_regexp_operator() { + assert_eq!( + sqlite().verified_expr("col REGEXP 'pattern'"), + Expr::BinaryOp { + op: BinaryOperator::Regexp, + left: Box::new(Expr::Identifier(Ident::new("col"))), + right: Box::new(Expr::Value( + (Value::SingleQuotedString("pattern".to_string())).with_empty_span() + )) + } + ); + sqlite().verified_only_select(r#"SELECT count(*) FROM messages WHERE msg_text REGEXP '\d+'"#); +} + fn sqlite() -> TestedDialects { TestedDialects::new(vec![Box::new(SQLiteDialect {})]) }