mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-24 07:54:06 +00:00
Support DuckDB struct syntax and support list of struct syntax (#1372)
Signed-off-by: jayzhan211 <jayzhan211@gmail.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
parent
fab834dca3
commit
8c4d30bb6d
6 changed files with 209 additions and 33 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -16,3 +16,5 @@ Cargo.lock
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
|
.DS_store
|
|
@ -302,7 +302,7 @@ pub enum DataType {
|
||||||
///
|
///
|
||||||
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
|
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
|
||||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
Struct(Vec<StructField>),
|
Struct(Vec<StructField>, StructBracketKind),
|
||||||
/// Union
|
/// Union
|
||||||
///
|
///
|
||||||
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
|
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
|
||||||
|
@ -517,9 +517,16 @@ impl fmt::Display for DataType {
|
||||||
}
|
}
|
||||||
write!(f, ")")
|
write!(f, ")")
|
||||||
}
|
}
|
||||||
DataType::Struct(fields) => {
|
DataType::Struct(fields, bracket) => {
|
||||||
if !fields.is_empty() {
|
if !fields.is_empty() {
|
||||||
write!(f, "STRUCT<{}>", display_comma_separated(fields))
|
match bracket {
|
||||||
|
StructBracketKind::Parentheses => {
|
||||||
|
write!(f, "STRUCT({})", display_comma_separated(fields))
|
||||||
|
}
|
||||||
|
StructBracketKind::AngleBrackets => {
|
||||||
|
write!(f, "STRUCT<{}>", display_comma_separated(fields))
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
write!(f, "STRUCT")
|
write!(f, "STRUCT")
|
||||||
}
|
}
|
||||||
|
@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Type of brackets used for `STRUCT` literals.
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum StructBracketKind {
|
||||||
|
/// Example: `STRUCT(a INT, b STRING)`
|
||||||
|
Parentheses,
|
||||||
|
/// Example: `STRUCT<a INT, b STRING>`
|
||||||
|
AngleBrackets,
|
||||||
|
}
|
||||||
|
|
||||||
/// Timestamp and Time data types information about TimeZone formatting.
|
/// Timestamp and Time data types information about TimeZone formatting.
|
||||||
///
|
///
|
||||||
/// This is more related to a display information than real differences between each variant. To
|
/// This is more related to a display information than real differences between each variant. To
|
||||||
|
|
|
@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
|
||||||
use sqlparser_derive::{Visit, VisitMut};
|
use sqlparser_derive::{Visit, VisitMut};
|
||||||
|
|
||||||
pub use self::data_type::{
|
pub use self::data_type::{
|
||||||
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
|
||||||
|
StructBracketKind, TimezoneInfo,
|
||||||
};
|
};
|
||||||
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
|
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
|
||||||
pub use self::ddl::{
|
pub use self::ddl::{
|
||||||
|
|
|
@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
|
||||||
|
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
|
||||||
|
self.expect_keyword(Keyword::STRUCT)?;
|
||||||
|
self.expect_token(&Token::LParen)?;
|
||||||
|
let struct_body = self.parse_comma_separated(|parser| {
|
||||||
|
let field_name = parser.parse_identifier(false)?;
|
||||||
|
let field_type = parser.parse_data_type()?;
|
||||||
|
|
||||||
|
Ok(StructField {
|
||||||
|
field_name: Some(field_name),
|
||||||
|
field_type,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
self.expect_token(&Token::RParen)?;
|
||||||
|
struct_body
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a field definition in a [struct] or [tuple].
|
/// Parse a field definition in a [struct] or [tuple].
|
||||||
/// Syntax:
|
/// Syntax:
|
||||||
///
|
///
|
||||||
|
@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
|
||||||
|
self.prev_token();
|
||||||
|
let field_defs = self.parse_duckdb_struct_type_def()?;
|
||||||
|
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
|
||||||
|
}
|
||||||
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
let (field_defs, _trailing_bracket) =
|
let (field_defs, _trailing_bracket) =
|
||||||
self.parse_struct_type_def(Self::parse_struct_field_def)?;
|
self.parse_struct_type_def(Self::parse_struct_field_def)?;
|
||||||
trailing_bracket = _trailing_bracket;
|
trailing_bracket = _trailing_bracket;
|
||||||
Ok(DataType::Struct(field_defs))
|
Ok(DataType::Struct(
|
||||||
|
field_defs,
|
||||||
|
StructBracketKind::AngleBrackets,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
|
|
|
@ -489,28 +489,34 @@ fn parse_nested_data_types() {
|
||||||
vec![
|
vec![
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: Ident::new("x"),
|
name: Ident::new("x"),
|
||||||
data_type: DataType::Struct(vec![
|
data_type: DataType::Struct(
|
||||||
StructField {
|
vec![
|
||||||
field_name: Some("a".into()),
|
StructField {
|
||||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
|
field_name: Some("a".into()),
|
||||||
Box::new(DataType::Int64,)
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
|
||||||
))
|
Box::new(DataType::Int64,)
|
||||||
},
|
))
|
||||||
StructField {
|
},
|
||||||
field_name: Some("b".into()),
|
StructField {
|
||||||
field_type: DataType::Bytes(Some(42))
|
field_name: Some("b".into()),
|
||||||
},
|
field_type: DataType::Bytes(Some(42))
|
||||||
]),
|
},
|
||||||
|
],
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![],
|
options: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: Ident::new("y"),
|
name: Ident::new("y"),
|
||||||
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
DataType::Struct(vec![StructField {
|
DataType::Struct(
|
||||||
field_name: None,
|
vec![StructField {
|
||||||
field_type: DataType::Int64,
|
field_name: None,
|
||||||
}]),
|
field_type: DataType::Int64,
|
||||||
|
}],
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
),
|
||||||
))),
|
))),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![],
|
options: vec![],
|
||||||
|
@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
|
||||||
},
|
},
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("str".into()),
|
field_name: Some("str".into()),
|
||||||
field_type: DataType::Struct(vec![StructField {
|
field_type: DataType::Struct(
|
||||||
field_name: None,
|
vec![StructField {
|
||||||
field_type: DataType::Bool
|
field_name: None,
|
||||||
}])
|
field_type: DataType::Bool
|
||||||
|
}],
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
)
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
|
||||||
fields: vec![
|
fields: vec![
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("x".into()),
|
field_name: Some("x".into()),
|
||||||
field_type: DataType::Struct(Default::default())
|
field_type: DataType::Struct(
|
||||||
|
Default::default(),
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
)
|
||||||
},
|
},
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("y".into()),
|
field_name: Some("y".into()),
|
||||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
DataType::Struct(Default::default())
|
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
|
||||||
)))
|
)))
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
|
||||||
},
|
},
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("str".into()),
|
field_name: Some("str".into()),
|
||||||
field_type: DataType::Struct(vec![StructField {
|
field_type: DataType::Struct(
|
||||||
field_name: None,
|
vec![StructField {
|
||||||
field_type: DataType::Bool
|
field_name: None,
|
||||||
}])
|
field_type: DataType::Bool
|
||||||
|
}],
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
)
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
|
||||||
fields: vec![
|
fields: vec![
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("x".into()),
|
field_name: Some("x".into()),
|
||||||
field_type: DataType::Struct(Default::default())
|
field_type: DataType::Struct(
|
||||||
|
Default::default(),
|
||||||
|
StructBracketKind::AngleBrackets
|
||||||
|
)
|
||||||
},
|
},
|
||||||
StructField {
|
StructField {
|
||||||
field_name: Some("y".into()),
|
field_name: Some("y".into()),
|
||||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
DataType::Struct(Default::default())
|
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
|
||||||
)))
|
)))
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
|
@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_struct() {
|
||||||
|
// s STRUCT(v VARCHAR, i INTEGER)
|
||||||
|
let struct_type1 = DataType::Struct(
|
||||||
|
vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("v")),
|
||||||
|
field_type: DataType::Varchar(None),
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("i")),
|
||||||
|
field_type: DataType::Integer(None),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
StructBracketKind::Parentheses,
|
||||||
|
);
|
||||||
|
|
||||||
|
// basic struct
|
||||||
|
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
|
||||||
|
assert_eq!(
|
||||||
|
column_defs(statement),
|
||||||
|
vec![ColumnDef {
|
||||||
|
name: "s".into(),
|
||||||
|
data_type: struct_type1.clone(),
|
||||||
|
collation: None,
|
||||||
|
options: vec![],
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
|
||||||
|
// struct array
|
||||||
|
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
|
||||||
|
assert_eq!(
|
||||||
|
column_defs(statement),
|
||||||
|
vec![ColumnDef {
|
||||||
|
name: "s".into(),
|
||||||
|
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||||
|
Box::new(struct_type1),
|
||||||
|
None
|
||||||
|
)),
|
||||||
|
collation: None,
|
||||||
|
options: vec![],
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
|
||||||
|
// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
|
||||||
|
let struct_type2 = DataType::Struct(
|
||||||
|
vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("v")),
|
||||||
|
field_type: DataType::Varchar(None),
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("s")),
|
||||||
|
field_type: DataType::Struct(
|
||||||
|
vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("a1")),
|
||||||
|
field_type: DataType::Integer(None),
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::new("a2")),
|
||||||
|
field_type: DataType::Varchar(None),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
StructBracketKind::Parentheses,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
StructBracketKind::Parentheses,
|
||||||
|
);
|
||||||
|
|
||||||
|
// nested struct
|
||||||
|
let statement = duckdb().verified_stmt(
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
column_defs(statement),
|
||||||
|
vec![ColumnDef {
|
||||||
|
name: "s".into(),
|
||||||
|
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||||
|
Box::new(struct_type2),
|
||||||
|
None
|
||||||
|
)),
|
||||||
|
collation: None,
|
||||||
|
options: vec![],
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
|
||||||
|
// failing test (duckdb does not support bracket syntax)
|
||||||
|
let sql_list = vec![
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
|
||||||
|
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
|
||||||
|
];
|
||||||
|
|
||||||
|
for sql in sql_list {
|
||||||
|
duckdb().parse_sql_statements(sql).unwrap_err();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the ColumnDefinitions from a CreateTable statement
|
||||||
|
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
|
||||||
|
match statement {
|
||||||
|
Statement::CreateTable(CreateTable { columns, .. }) => columns,
|
||||||
|
_ => panic!("Expected CreateTable"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_select_wildcard_with_exclude() {
|
fn test_select_wildcard_with_exclude() {
|
||||||
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
|
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue