Support for DuckDB Union datatype (#1322)

This commit is contained in:
gstvg 2024-06-27 08:58:11 -03:00 committed by GitHub
parent f5ccef6ea9
commit f9ab8dcc27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 151 additions and 1 deletions

View file

@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{display_comma_separated, ObjectName, StructField};
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
use super::{value::escape_single_quote_string, ColumnDef};
@ -303,6 +303,10 @@ pub enum DataType {
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
/// Union
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
Union(Vec<UnionField>),
/// Nullable - special marker NULL represents in ClickHouse as a data type.
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
@ -516,6 +520,9 @@ impl fmt::Display for DataType {
write!(f, "STRUCT")
}
}
DataType::Union(fields) => {
write!(f, "UNION({})", display_comma_separated(fields))
}
// ClickHouse
DataType::Nullable(data_type) => {
write!(f, "Nullable({})", data_type)

View file

@ -294,6 +294,23 @@ impl fmt::Display for StructField {
}
}
/// A field definition within a union
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct UnionField {
pub field_name: Ident,
pub field_type: DataType,
}
impl fmt::Display for UnionField {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.field_name, self.field_type)
}
}
/// A dictionary field within a dictionary.
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs

View file

@ -2246,6 +2246,32 @@ impl<'a> Parser<'a> {
))
}
/// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs.
///
/// Syntax:
///
/// ```sql
/// UNION(field_name field_type[,...])
/// ```
///
/// [1]: https://duckdb.org/docs/sql/data_types/union.html
fn parse_union_type_def(&mut self) -> Result<Vec<UnionField>, ParserError> {
self.expect_keyword(Keyword::UNION)?;
self.expect_token(&Token::LParen)?;
let fields = self.parse_comma_separated(|p| {
Ok(UnionField {
field_name: p.parse_identifier(false)?,
field_type: p.parse_data_type()?,
})
})?;
self.expect_token(&Token::RParen)?;
Ok(fields)
}
/// DuckDB specific: Parse a duckdb dictionary [1]
///
/// Syntax:
@ -7136,6 +7162,11 @@ impl<'a> Parser<'a> {
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
}
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token();
let fields = self.parse_union_type_def()?;
Ok(DataType::Union(fields))
}
Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
Ok(self.parse_sub_type(DataType::Nullable)?)
}

View file

@ -545,3 +545,98 @@ fn test_array_index() {
expr
);
}
#[test]
fn test_duckdb_union_datatype() {
let sql = "CREATE TABLE tbl1 (one UNION(a INT), two UNION(a INT, b INT), nested UNION(a UNION(b INT)))";
let stmt = duckdb_and_generic().verified_stmt(sql);
assert_eq!(
Statement::CreateTable(CreateTable {
or_replace: Default::default(),
temporary: Default::default(),
external: Default::default(),
global: Default::default(),
if_not_exists: Default::default(),
transient: Default::default(),
volatile: Default::default(),
name: ObjectName(vec!["tbl1".into()]),
columns: vec![
ColumnDef {
name: "one".into(),
data_type: DataType::Union(vec![UnionField {
field_name: "a".into(),
field_type: DataType::Int(None)
}]),
collation: Default::default(),
options: Default::default()
},
ColumnDef {
name: "two".into(),
data_type: DataType::Union(vec![
UnionField {
field_name: "a".into(),
field_type: DataType::Int(None)
},
UnionField {
field_name: "b".into(),
field_type: DataType::Int(None)
}
]),
collation: Default::default(),
options: Default::default()
},
ColumnDef {
name: "nested".into(),
data_type: DataType::Union(vec![UnionField {
field_name: "a".into(),
field_type: DataType::Union(vec![UnionField {
field_name: "b".into(),
field_type: DataType::Int(None)
}])
}]),
collation: Default::default(),
options: Default::default()
}
],
constraints: Default::default(),
hive_distribution: HiveDistributionStyle::NONE,
hive_formats: Some(HiveFormat {
row_format: Default::default(),
serde_properties: Default::default(),
storage: Default::default(),
location: Default::default()
}),
table_properties: Default::default(),
with_options: Default::default(),
file_format: Default::default(),
location: Default::default(),
query: Default::default(),
without_rowid: Default::default(),
like: Default::default(),
clone: Default::default(),
engine: Default::default(),
comment: Default::default(),
auto_increment_offset: Default::default(),
default_charset: Default::default(),
collation: Default::default(),
on_commit: Default::default(),
on_cluster: Default::default(),
primary_key: Default::default(),
order_by: Default::default(),
partition_by: Default::default(),
cluster_by: Default::default(),
options: Default::default(),
strict: Default::default(),
copy_grants: Default::default(),
enable_schema_evolution: Default::default(),
change_tracking: Default::default(),
data_retention_time_in_days: Default::default(),
max_data_extension_time_in_days: Default::default(),
default_ddl_collation: Default::default(),
with_aggregation_policy: Default::default(),
with_row_access_policy: Default::default(),
with_tags: Default::default()
}),
stmt
);
}