mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
feat: add DuckDB dialect (#878)
* feat: add DuckDB dialect * formatting * fix conflict * support // in GenericDialect * add DucDbDialect to all_dialects * add comment from suggestion Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * fix: support // in GenericDialect --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
3be19c7666
commit
33b12acce7
9 changed files with 140 additions and 10 deletions
|
@ -46,6 +46,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
|
|||
"--hive" => Box::new(HiveDialect {}),
|
||||
"--redshift" => Box::new(RedshiftSqlDialect {}),
|
||||
"--clickhouse" => Box::new(ClickHouseDialect {}),
|
||||
"--duckdb" => Box::new(DuckDbDialect {}),
|
||||
"--generic" | "" => Box::new(GenericDialect {}),
|
||||
s => panic!("Unexpected parameter: {s}"),
|
||||
};
|
||||
|
|
|
@ -85,6 +85,8 @@ pub enum BinaryOperator {
|
|||
BitwiseOr,
|
||||
BitwiseAnd,
|
||||
BitwiseXor,
|
||||
/// Integer division operator `//` in DuckDB
|
||||
DuckIntegerDivide,
|
||||
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
|
||||
MyIntegerDivide,
|
||||
/// Support for custom operators (built by parsers outside this crate)
|
||||
|
@ -126,6 +128,7 @@ impl fmt::Display for BinaryOperator {
|
|||
BinaryOperator::BitwiseOr => f.write_str("|"),
|
||||
BinaryOperator::BitwiseAnd => f.write_str("&"),
|
||||
BinaryOperator::BitwiseXor => f.write_str("^"),
|
||||
BinaryOperator::DuckIntegerDivide => f.write_str("//"),
|
||||
BinaryOperator::MyIntegerDivide => f.write_str("DIV"),
|
||||
BinaryOperator::Custom(s) => f.write_str(s),
|
||||
BinaryOperator::PGBitwiseXor => f.write_str("#"),
|
||||
|
|
31
src/dialect/duckdb.rs
Normal file
31
src/dialect/duckdb.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::dialect::Dialect;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DuckDbDialect;
|
||||
|
||||
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
|
||||
impl Dialect for DuckDbDialect {
|
||||
fn is_identifier_start(&self, ch: char) -> bool {
|
||||
ch.is_alphabetic() || ch == '_'
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
|
||||
}
|
||||
|
||||
fn supports_filter_during_aggregation(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
|
@ -13,6 +13,7 @@
|
|||
mod ansi;
|
||||
mod bigquery;
|
||||
mod clickhouse;
|
||||
mod duckdb;
|
||||
mod generic;
|
||||
mod hive;
|
||||
mod mssql;
|
||||
|
@ -31,6 +32,7 @@ use core::str::Chars;
|
|||
pub use self::ansi::AnsiDialect;
|
||||
pub use self::bigquery::BigQueryDialect;
|
||||
pub use self::clickhouse::ClickHouseDialect;
|
||||
pub use self::duckdb::DuckDbDialect;
|
||||
pub use self::generic::GenericDialect;
|
||||
pub use self::hive::HiveDialect;
|
||||
pub use self::mssql::MsSqlDialect;
|
||||
|
@ -163,6 +165,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
|
|||
"clickhouse" => Some(Box::new(ClickHouseDialect {})),
|
||||
"bigquery" => Some(Box::new(BigQueryDialect)),
|
||||
"ansi" => Some(Box::new(AnsiDialect {})),
|
||||
"duckdb" => Some(Box::new(DuckDbDialect {})),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -214,6 +217,8 @@ mod tests {
|
|||
assert!(parse_dialect("BigQuery").is::<BigQueryDialect>());
|
||||
assert!(parse_dialect("ansi").is::<AnsiDialect>());
|
||||
assert!(parse_dialect("ANSI").is::<AnsiDialect>());
|
||||
assert!(parse_dialect("duckdb").is::<DuckDbDialect>());
|
||||
assert!(parse_dialect("DuckDb").is::<DuckDbDialect>());
|
||||
|
||||
// error cases
|
||||
assert!(dialect_from_str("Unknown").is_none());
|
||||
|
|
|
@ -992,7 +992,7 @@ impl<'a> Parser<'a> {
|
|||
/// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple
|
||||
/// expr.
|
||||
fn parse_group_by_expr(&mut self) -> Result<Expr, ParserError> {
|
||||
if dialect_of!(self is PostgreSqlDialect | GenericDialect) {
|
||||
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
|
||||
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?;
|
||||
|
@ -1662,10 +1662,13 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
Token::Ampersand => Some(BinaryOperator::BitwiseAnd),
|
||||
Token::Div => Some(BinaryOperator::Divide),
|
||||
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
|
||||
Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
||||
Some(BinaryOperator::DuckIntegerDivide)
|
||||
}
|
||||
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
|
||||
Some(BinaryOperator::PGBitwiseShiftLeft)
|
||||
}
|
||||
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
|
||||
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
|
||||
Some(BinaryOperator::PGBitwiseShiftRight)
|
||||
}
|
||||
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
|
||||
|
@ -2051,7 +2054,9 @@ impl<'a> Parser<'a> {
|
|||
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
|
||||
Token::Ampersand => Ok(23),
|
||||
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
|
||||
Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
|
||||
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
|
||||
Ok(40)
|
||||
}
|
||||
Token::DoubleColon => Ok(50),
|
||||
Token::Colon => Ok(50),
|
||||
Token::ExclamationMark => Ok(50),
|
||||
|
@ -3842,7 +3847,7 @@ impl<'a> Parser<'a> {
|
|||
} else {
|
||||
let column_keyword = self.parse_keyword(Keyword::COLUMN);
|
||||
|
||||
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | GenericDialect)
|
||||
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect)
|
||||
{
|
||||
self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS])
|
||||
|| if_not_exists
|
||||
|
@ -6315,7 +6320,7 @@ impl<'a> Parser<'a> {
|
|||
self.expect_keyword(Keyword::SET)?;
|
||||
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
|
||||
let from = if self.parse_keyword(Keyword::FROM)
|
||||
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
|
||||
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
|
||||
{
|
||||
Some(self.parse_table_and_joins()?)
|
||||
} else {
|
||||
|
@ -6415,7 +6420,8 @@ impl<'a> Parser<'a> {
|
|||
pub fn parse_wildcard_additional_options(
|
||||
&mut self,
|
||||
) -> Result<WildcardAdditionalOptions, ParserError> {
|
||||
let opt_exclude = if dialect_of!(self is GenericDialect | SnowflakeDialect) {
|
||||
let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect)
|
||||
{
|
||||
self.parse_optional_select_item_exclude()?
|
||||
} else {
|
||||
None
|
||||
|
|
|
@ -168,6 +168,7 @@ pub fn all_dialects() -> TestedDialects {
|
|||
Box::new(MySqlDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(SQLiteDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
],
|
||||
options: None,
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
|
|||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::ast::DollarQuotedString;
|
||||
use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect};
|
||||
use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect};
|
||||
use crate::dialect::{Dialect, MySqlDialect};
|
||||
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||
|
||||
|
@ -98,6 +98,8 @@ pub enum Token {
|
|||
Mul,
|
||||
/// Division operator `/`
|
||||
Div,
|
||||
/// Integer division operator `//` in DuckDB
|
||||
DuckIntDiv,
|
||||
/// Modulo Operator `%`
|
||||
Mod,
|
||||
/// String concatenation `||`
|
||||
|
@ -212,6 +214,7 @@ impl fmt::Display for Token {
|
|||
Token::Minus => f.write_str("-"),
|
||||
Token::Mul => f.write_str("*"),
|
||||
Token::Div => f.write_str("/"),
|
||||
Token::DuckIntDiv => f.write_str("//"),
|
||||
Token::StringConcat => f.write_str("||"),
|
||||
Token::Mod => f.write_str("%"),
|
||||
Token::LParen => f.write_str("("),
|
||||
|
@ -768,6 +771,9 @@ impl<'a> Tokenizer<'a> {
|
|||
comment,
|
||||
})))
|
||||
}
|
||||
Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
||||
self.consume_and_return(chars, Token::DuckIntDiv)
|
||||
}
|
||||
// a regular '/' operator
|
||||
_ => Ok(Some(Token::Div)),
|
||||
}
|
||||
|
|
|
@ -24,8 +24,9 @@ use sqlparser::ast::SelectItem::UnnamedExpr;
|
|||
use sqlparser::ast::TableFactor::Pivot;
|
||||
use sqlparser::ast::*;
|
||||
use sqlparser::dialect::{
|
||||
AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect,
|
||||
MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect,
|
||||
AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect,
|
||||
MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
|
||||
SnowflakeDialect,
|
||||
};
|
||||
use sqlparser::keywords::ALL_KEYWORDS;
|
||||
use sqlparser::parser::{Parser, ParserError, ParserOptions};
|
||||
|
@ -195,6 +196,7 @@ fn parse_update_set_from() {
|
|||
let dialects = TestedDialects {
|
||||
dialects: vec![
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(SnowflakeDialect {}),
|
||||
|
@ -941,6 +943,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> {
|
|||
Box::new(AnsiDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(ClickHouseDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(GenericDialect {}),
|
||||
// Box::new(HiveDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
|
@ -2053,6 +2056,7 @@ fn parse_array_agg_func() {
|
|||
let supported_dialects = TestedDialects {
|
||||
dialects: vec![
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
Box::new(AnsiDialect {}),
|
||||
|
@ -2848,6 +2852,7 @@ fn parse_alter_table_add_column_if_not_exists() {
|
|||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
],
|
||||
options: None,
|
||||
};
|
||||
|
@ -6139,6 +6144,7 @@ fn test_placeholder() {
|
|||
let dialects = TestedDialects {
|
||||
dialects: vec![
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
Box::new(AnsiDialect {}),
|
||||
|
@ -6873,6 +6879,7 @@ fn parse_non_latin_identifiers() {
|
|||
let supported_dialects = TestedDialects {
|
||||
dialects: vec![
|
||||
Box::new(GenericDialect {}),
|
||||
Box::new(DuckDbDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
Box::new(RedshiftSqlDialect {}),
|
||||
|
|
70
tests/sqlparser_duckdb.rs
Normal file
70
tests/sqlparser_duckdb.rs
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[macro_use]
|
||||
mod test_utils;
|
||||
|
||||
use test_utils::*;
|
||||
|
||||
use sqlparser::ast::*;
|
||||
use sqlparser::dialect::{DuckDbDialect, GenericDialect};
|
||||
|
||||
fn duckdb() -> TestedDialects {
|
||||
TestedDialects {
|
||||
dialects: vec![Box::new(DuckDbDialect {})],
|
||||
options: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn duckdb_and_generic() -> TestedDialects {
|
||||
TestedDialects {
|
||||
dialects: vec![Box::new(DuckDbDialect {}), Box::new(GenericDialect {})],
|
||||
options: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select_wildcard_with_exclude() {
|
||||
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
|
||||
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
|
||||
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])),
|
||||
..Default::default()
|
||||
});
|
||||
assert_eq!(expected, select.projection[0]);
|
||||
|
||||
let select =
|
||||
duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table");
|
||||
let expected = SelectItem::QualifiedWildcard(
|
||||
ObjectName(vec![Ident::new("name")]),
|
||||
WildcardAdditionalOptions {
|
||||
opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
assert_eq!(expected, select.projection[0]);
|
||||
|
||||
let select = duckdb()
|
||||
.verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table");
|
||||
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
|
||||
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![
|
||||
Ident::new("department_id"),
|
||||
Ident::new("employee_id"),
|
||||
])),
|
||||
..Default::default()
|
||||
});
|
||||
assert_eq!(expected, select.projection[0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_div_infix() {
|
||||
duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue