feat: add DuckDB dialect (#878)

* feat: add DuckDB dialect

* formatting

* fix conflict

* support // in GenericDialect

* add DucDbDialect to all_dialects

* add comment from suggestion

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* fix: support // in GenericDialect

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
eitsupi 2023-05-19 03:57:29 +09:00 committed by GitHub
parent 3be19c7666
commit 33b12acce7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 140 additions and 10 deletions

View file

@ -46,6 +46,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
"--hive" => Box::new(HiveDialect {}),
"--redshift" => Box::new(RedshiftSqlDialect {}),
"--clickhouse" => Box::new(ClickHouseDialect {}),
"--duckdb" => Box::new(DuckDbDialect {}),
"--generic" | "" => Box::new(GenericDialect {}),
s => panic!("Unexpected parameter: {s}"),
};

View file

@ -85,6 +85,8 @@ pub enum BinaryOperator {
BitwiseOr,
BitwiseAnd,
BitwiseXor,
/// Integer division operator `//` in DuckDB
DuckIntegerDivide,
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
MyIntegerDivide,
/// Support for custom operators (built by parsers outside this crate)
@ -126,6 +128,7 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::BitwiseOr => f.write_str("|"),
BinaryOperator::BitwiseAnd => f.write_str("&"),
BinaryOperator::BitwiseXor => f.write_str("^"),
BinaryOperator::DuckIntegerDivide => f.write_str("//"),
BinaryOperator::MyIntegerDivide => f.write_str("DIV"),
BinaryOperator::Custom(s) => f.write_str(s),
BinaryOperator::PGBitwiseXor => f.write_str("#"),

31
src/dialect/duckdb.rs Normal file
View file

@ -0,0 +1,31 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dialect::Dialect;
#[derive(Debug, Default)]
pub struct DuckDbDialect;
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
impl Dialect for DuckDbDialect {
fn is_identifier_start(&self, ch: char) -> bool {
ch.is_alphabetic() || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
}
fn supports_filter_during_aggregation(&self) -> bool {
true
}
}

View file

@ -13,6 +13,7 @@
mod ansi;
mod bigquery;
mod clickhouse;
mod duckdb;
mod generic;
mod hive;
mod mssql;
@ -31,6 +32,7 @@ use core::str::Chars;
pub use self::ansi::AnsiDialect;
pub use self::bigquery::BigQueryDialect;
pub use self::clickhouse::ClickHouseDialect;
pub use self::duckdb::DuckDbDialect;
pub use self::generic::GenericDialect;
pub use self::hive::HiveDialect;
pub use self::mssql::MsSqlDialect;
@ -163,6 +165,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
"clickhouse" => Some(Box::new(ClickHouseDialect {})),
"bigquery" => Some(Box::new(BigQueryDialect)),
"ansi" => Some(Box::new(AnsiDialect {})),
"duckdb" => Some(Box::new(DuckDbDialect {})),
_ => None,
}
}
@ -214,6 +217,8 @@ mod tests {
assert!(parse_dialect("BigQuery").is::<BigQueryDialect>());
assert!(parse_dialect("ansi").is::<AnsiDialect>());
assert!(parse_dialect("ANSI").is::<AnsiDialect>());
assert!(parse_dialect("duckdb").is::<DuckDbDialect>());
assert!(parse_dialect("DuckDb").is::<DuckDbDialect>());
// error cases
assert!(dialect_from_str("Unknown").is_none());

View file

@ -992,7 +992,7 @@ impl<'a> Parser<'a> {
/// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple
/// expr.
fn parse_group_by_expr(&mut self) -> Result<Expr, ParserError> {
if dialect_of!(self is PostgreSqlDialect | GenericDialect) {
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
self.expect_token(&Token::LParen)?;
let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?;
@ -1662,10 +1662,13 @@ impl<'a> Parser<'a> {
}
Token::Ampersand => Some(BinaryOperator::BitwiseAnd),
Token::Div => Some(BinaryOperator::Divide),
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => {
Some(BinaryOperator::DuckIntegerDivide)
}
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
Some(BinaryOperator::PGBitwiseShiftLeft)
}
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
Some(BinaryOperator::PGBitwiseShiftRight)
}
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
@ -2051,7 +2054,9 @@ impl<'a> Parser<'a> {
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
Token::Ampersand => Ok(23),
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
Ok(40)
}
Token::DoubleColon => Ok(50),
Token::Colon => Ok(50),
Token::ExclamationMark => Ok(50),
@ -3842,7 +3847,7 @@ impl<'a> Parser<'a> {
} else {
let column_keyword = self.parse_keyword(Keyword::COLUMN);
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | GenericDialect)
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect)
{
self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS])
|| if_not_exists
@ -6315,7 +6320,7 @@ impl<'a> Parser<'a> {
self.expect_keyword(Keyword::SET)?;
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
let from = if self.parse_keyword(Keyword::FROM)
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
{
Some(self.parse_table_and_joins()?)
} else {
@ -6415,7 +6420,8 @@ impl<'a> Parser<'a> {
pub fn parse_wildcard_additional_options(
&mut self,
) -> Result<WildcardAdditionalOptions, ParserError> {
let opt_exclude = if dialect_of!(self is GenericDialect | SnowflakeDialect) {
let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect)
{
self.parse_optional_select_item_exclude()?
} else {
None

View file

@ -168,6 +168,7 @@ pub fn all_dialects() -> TestedDialects {
Box::new(MySqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SQLiteDialect {}),
Box::new(DuckDbDialect {}),
],
options: None,
}

View file

@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::DollarQuotedString;
use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect};
use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect};
use crate::dialect::{Dialect, MySqlDialect};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
@ -98,6 +98,8 @@ pub enum Token {
Mul,
/// Division operator `/`
Div,
/// Integer division operator `//` in DuckDB
DuckIntDiv,
/// Modulo Operator `%`
Mod,
/// String concatenation `||`
@ -212,6 +214,7 @@ impl fmt::Display for Token {
Token::Minus => f.write_str("-"),
Token::Mul => f.write_str("*"),
Token::Div => f.write_str("/"),
Token::DuckIntDiv => f.write_str("//"),
Token::StringConcat => f.write_str("||"),
Token::Mod => f.write_str("%"),
Token::LParen => f.write_str("("),
@ -768,6 +771,9 @@ impl<'a> Tokenizer<'a> {
comment,
})))
}
Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.consume_and_return(chars, Token::DuckIntDiv)
}
// a regular '/' operator
_ => Ok(Some(Token::Div)),
}

View file

@ -24,8 +24,9 @@ use sqlparser::ast::SelectItem::UnnamedExpr;
use sqlparser::ast::TableFactor::Pivot;
use sqlparser::ast::*;
use sqlparser::dialect::{
AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect,
MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect,
AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect,
MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
SnowflakeDialect,
};
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError, ParserOptions};
@ -195,6 +196,7 @@ fn parse_update_set_from() {
let dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SnowflakeDialect {}),
@ -941,6 +943,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> {
Box::new(AnsiDialect {}),
Box::new(BigQueryDialect {}),
Box::new(ClickHouseDialect {}),
Box::new(DuckDbDialect {}),
Box::new(GenericDialect {}),
// Box::new(HiveDialect {}),
Box::new(MsSqlDialect {}),
@ -2053,6 +2056,7 @@ fn parse_array_agg_func() {
let supported_dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
@ -2848,6 +2852,7 @@ fn parse_alter_table_add_column_if_not_exists() {
Box::new(PostgreSqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
],
options: None,
};
@ -6139,6 +6144,7 @@ fn test_placeholder() {
let dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
@ -6873,6 +6879,7 @@ fn parse_non_latin_identifiers() {
let supported_dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(RedshiftSqlDialect {}),

70
tests/sqlparser_duckdb.rs Normal file
View file

@ -0,0 +1,70 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[macro_use]
mod test_utils;
use test_utils::*;
use sqlparser::ast::*;
use sqlparser::dialect::{DuckDbDialect, GenericDialect};
fn duckdb() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(DuckDbDialect {})],
options: None,
}
}
fn duckdb_and_generic() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(DuckDbDialect {}), Box::new(GenericDialect {})],
options: None,
}
}
#[test]
fn test_select_wildcard_with_exclude() {
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])),
..Default::default()
});
assert_eq!(expected, select.projection[0]);
let select =
duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table");
let expected = SelectItem::QualifiedWildcard(
ObjectName(vec![Ident::new("name")]),
WildcardAdditionalOptions {
opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))),
..Default::default()
},
);
assert_eq!(expected, select.projection[0]);
let select = duckdb()
.verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table");
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![
Ident::new("department_id"),
Ident::new("employee_id"),
])),
..Default::default()
});
assert_eq!(expected, select.projection[0]);
}
#[test]
fn parse_div_infix() {
duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#);
}