Support DuckDB struct syntax and support list of struct syntax (#1372)

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
Jay Zhan 2024-08-15 21:02:42 +08:00 committed by GitHub
parent fab834dca3
commit 8c4d30bb6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 209 additions and 33 deletions

2
.gitignore vendored
View file

@ -16,3 +16,5 @@ Cargo.lock
.vscode .vscode
*.swp *.swp
.DS_store

View file

@ -302,7 +302,7 @@ pub enum DataType {
/// ///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>), Struct(Vec<StructField>, StructBracketKind),
/// Union /// Union
/// ///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
@ -517,9 +517,16 @@ impl fmt::Display for DataType {
} }
write!(f, ")") write!(f, ")")
} }
DataType::Struct(fields) => { DataType::Struct(fields, bracket) => {
if !fields.is_empty() { if !fields.is_empty() {
match bracket {
StructBracketKind::Parentheses => {
write!(f, "STRUCT({})", display_comma_separated(fields))
}
StructBracketKind::AngleBrackets => {
write!(f, "STRUCT<{}>", display_comma_separated(fields)) write!(f, "STRUCT<{}>", display_comma_separated(fields))
}
}
} else { } else {
write!(f, "STRUCT") write!(f, "STRUCT")
} }
@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
Ok(()) Ok(())
} }
/// Type of brackets used for `STRUCT` literals.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum StructBracketKind {
/// Example: `STRUCT(a INT, b STRING)`
Parentheses,
/// Example: `STRUCT<a INT, b STRING>`
AngleBrackets,
}
/// Timestamp and Time data types information about TimeZone formatting. /// Timestamp and Time data types information about TimeZone formatting.
/// ///
/// This is more related to a display information than real differences between each variant. To /// This is more related to a display information than real differences between each variant. To

View file

@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut}; use sqlparser_derive::{Visit, VisitMut};
pub use self::data_type::{ pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
}; };
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{ pub use self::ddl::{

View file

@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
)) ))
} }
/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
self.expect_keyword(Keyword::STRUCT)?;
self.expect_token(&Token::LParen)?;
let struct_body = self.parse_comma_separated(|parser| {
let field_name = parser.parse_identifier(false)?;
let field_type = parser.parse_data_type()?;
Ok(StructField {
field_name: Some(field_name),
field_type,
})
});
self.expect_token(&Token::RParen)?;
struct_body
}
/// Parse a field definition in a [struct] or [tuple]. /// Parse a field definition in a [struct] or [tuple].
/// Syntax: /// Syntax:
/// ///
@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
)))) ))))
} }
} }
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
self.prev_token();
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token(); self.prev_token();
let (field_defs, _trailing_bracket) = let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?; self.parse_struct_type_def(Self::parse_struct_field_def)?;
trailing_bracket = _trailing_bracket; trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs)) Ok(DataType::Struct(
field_defs,
StructBracketKind::AngleBrackets,
))
} }
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token(); self.prev_token();

View file

@ -489,7 +489,8 @@ fn parse_nested_data_types() {
vec![ vec![
ColumnDef { ColumnDef {
name: Ident::new("x"), name: Ident::new("x"),
data_type: DataType::Struct(vec![ data_type: DataType::Struct(
vec![
StructField { StructField {
field_name: Some("a".into()), field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
@ -500,17 +501,22 @@ fn parse_nested_data_types() {
field_name: Some("b".into()), field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42)) field_type: DataType::Bytes(Some(42))
}, },
]), ],
StructBracketKind::AngleBrackets
),
collation: None, collation: None,
options: vec![], options: vec![],
}, },
ColumnDef { ColumnDef {
name: Ident::new("y"), name: Ident::new("y"),
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(vec![StructField { DataType::Struct(
vec![StructField {
field_name: None, field_name: None,
field_type: DataType::Int64, field_type: DataType::Int64,
}]), }],
StructBracketKind::AngleBrackets
),
))), ))),
collation: None, collation: None,
options: vec![], options: vec![],
@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
}, },
StructField { StructField {
field_name: Some("str".into()), field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField { field_type: DataType::Struct(
vec![StructField {
field_name: None, field_name: None,
field_type: DataType::Bool field_type: DataType::Bool
}]) }],
StructBracketKind::AngleBrackets
)
}, },
] ]
}, },
@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
fields: vec![ fields: vec![
StructField { StructField {
field_name: Some("x".into()), field_name: Some("x".into()),
field_type: DataType::Struct(Default::default()) field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
}, },
StructField { StructField {
field_name: Some("y".into()), field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default()) DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
))) )))
}, },
] ]
@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
}, },
StructField { StructField {
field_name: Some("str".into()), field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField { field_type: DataType::Struct(
vec![StructField {
field_name: None, field_name: None,
field_type: DataType::Bool field_type: DataType::Bool
}]) }],
StructBracketKind::AngleBrackets
)
}, },
] ]
}, },
@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
fields: vec![ fields: vec![
StructField { StructField {
field_name: Some("x".into()), field_name: Some("x".into()),
field_type: DataType::Struct(Default::default()) field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
}, },
StructField { StructField {
field_name: Some("y".into()), field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default()) DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
))) )))
}, },
] ]

View file

@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
} }
} }
#[test]
fn test_struct() {
// s STRUCT(v VARCHAR, i INTEGER)
let struct_type1 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("i")),
field_type: DataType::Integer(None),
},
],
StructBracketKind::Parentheses,
);
// basic struct
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: struct_type1.clone(),
collation: None,
options: vec![],
}]
);
// struct array
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type1),
None
)),
collation: None,
options: vec![],
}]
);
// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
let struct_type2 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("s")),
field_type: DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("a1")),
field_type: DataType::Integer(None),
},
StructField {
field_name: Some(Ident::new("a2")),
field_type: DataType::Varchar(None),
},
],
StructBracketKind::Parentheses,
),
},
],
StructBracketKind::Parentheses,
);
// nested struct
let statement = duckdb().verified_stmt(
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type2),
None
)),
collation: None,
options: vec![],
}]
);
// failing test (duckdb does not support bracket syntax)
let sql_list = vec![
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
];
for sql in sql_list {
duckdb().parse_sql_statements(sql).unwrap_err();
}
}
/// Returns the ColumnDefinitions from a CreateTable statement
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
match statement {
Statement::CreateTable(CreateTable { columns, .. }) => columns,
_ => panic!("Expected CreateTable"),
}
}
#[test] #[test]
fn test_select_wildcard_with_exclude() { fn test_select_wildcard_with_exclude() {
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");