mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Support DuckDB struct syntax and support list of struct syntax (#1372)
Signed-off-by: jayzhan211 <jayzhan211@gmail.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
parent
fab834dca3
commit
8c4d30bb6d
6 changed files with 209 additions and 33 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -16,3 +16,5 @@ Cargo.lock
|
|||
.vscode
|
||||
|
||||
*.swp
|
||||
|
||||
.DS_store
|
|
@ -302,7 +302,7 @@ pub enum DataType {
|
|||
///
|
||||
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||
Struct(Vec<StructField>),
|
||||
Struct(Vec<StructField>, StructBracketKind),
|
||||
/// Union
|
||||
///
|
||||
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
|
||||
|
@ -517,9 +517,16 @@ impl fmt::Display for DataType {
|
|||
}
|
||||
write!(f, ")")
|
||||
}
|
||||
DataType::Struct(fields) => {
|
||||
DataType::Struct(fields, bracket) => {
|
||||
if !fields.is_empty() {
|
||||
write!(f, "STRUCT<{}>", display_comma_separated(fields))
|
||||
match bracket {
|
||||
StructBracketKind::Parentheses => {
|
||||
write!(f, "STRUCT({})", display_comma_separated(fields))
|
||||
}
|
||||
StructBracketKind::AngleBrackets => {
|
||||
write!(f, "STRUCT<{}>", display_comma_separated(fields))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
write!(f, "STRUCT")
|
||||
}
|
||||
|
@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Type of brackets used for `STRUCT` literals.
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum StructBracketKind {
|
||||
/// Example: `STRUCT(a INT, b STRING)`
|
||||
Parentheses,
|
||||
/// Example: `STRUCT<a INT, b STRING>`
|
||||
AngleBrackets,
|
||||
}
|
||||
|
||||
/// Timestamp and Time data types information about TimeZone formatting.
|
||||
///
|
||||
/// This is more related to a display information than real differences between each variant. To
|
||||
|
|
|
@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
|
|||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
pub use self::data_type::{
|
||||
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
|
||||
StructBracketKind, TimezoneInfo,
|
||||
};
|
||||
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
|
||||
pub use self::ddl::{
|
||||
|
|
|
@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
|
|||
))
|
||||
}
|
||||
|
||||
/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
|
||||
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
|
||||
self.expect_keyword(Keyword::STRUCT)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let struct_body = self.parse_comma_separated(|parser| {
|
||||
let field_name = parser.parse_identifier(false)?;
|
||||
let field_type = parser.parse_data_type()?;
|
||||
|
||||
Ok(StructField {
|
||||
field_name: Some(field_name),
|
||||
field_type,
|
||||
})
|
||||
});
|
||||
self.expect_token(&Token::RParen)?;
|
||||
struct_body
|
||||
}
|
||||
|
||||
/// Parse a field definition in a [struct] or [tuple].
|
||||
/// Syntax:
|
||||
///
|
||||
|
@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
|
|||
))))
|
||||
}
|
||||
}
|
||||
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
|
||||
self.prev_token();
|
||||
let field_defs = self.parse_duckdb_struct_type_def()?;
|
||||
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
|
||||
}
|
||||
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
||||
self.prev_token();
|
||||
let (field_defs, _trailing_bracket) =
|
||||
self.parse_struct_type_def(Self::parse_struct_field_def)?;
|
||||
trailing_bracket = _trailing_bracket;
|
||||
Ok(DataType::Struct(field_defs))
|
||||
Ok(DataType::Struct(
|
||||
field_defs,
|
||||
StructBracketKind::AngleBrackets,
|
||||
))
|
||||
}
|
||||
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
||||
self.prev_token();
|
||||
|
|
|
@ -489,28 +489,34 @@ fn parse_nested_data_types() {
|
|||
vec![
|
||||
ColumnDef {
|
||||
name: Ident::new("x"),
|
||||
data_type: DataType::Struct(vec![
|
||||
StructField {
|
||||
field_name: Some("a".into()),
|
||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
|
||||
Box::new(DataType::Int64,)
|
||||
))
|
||||
},
|
||||
StructField {
|
||||
field_name: Some("b".into()),
|
||||
field_type: DataType::Bytes(Some(42))
|
||||
},
|
||||
]),
|
||||
data_type: DataType::Struct(
|
||||
vec![
|
||||
StructField {
|
||||
field_name: Some("a".into()),
|
||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
|
||||
Box::new(DataType::Int64,)
|
||||
))
|
||||
},
|
||||
StructField {
|
||||
field_name: Some("b".into()),
|
||||
field_type: DataType::Bytes(Some(42))
|
||||
},
|
||||
],
|
||||
StructBracketKind::AngleBrackets
|
||||
),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
ColumnDef {
|
||||
name: Ident::new("y"),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||
DataType::Struct(vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Int64,
|
||||
}]),
|
||||
DataType::Struct(
|
||||
vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Int64,
|
||||
}],
|
||||
StructBracketKind::AngleBrackets
|
||||
),
|
||||
))),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
|
@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
|
|||
},
|
||||
StructField {
|
||||
field_name: Some("str".into()),
|
||||
field_type: DataType::Struct(vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Bool
|
||||
}])
|
||||
field_type: DataType::Struct(
|
||||
vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Bool
|
||||
}],
|
||||
StructBracketKind::AngleBrackets
|
||||
)
|
||||
},
|
||||
]
|
||||
},
|
||||
|
@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
|
|||
fields: vec![
|
||||
StructField {
|
||||
field_name: Some("x".into()),
|
||||
field_type: DataType::Struct(Default::default())
|
||||
field_type: DataType::Struct(
|
||||
Default::default(),
|
||||
StructBracketKind::AngleBrackets
|
||||
)
|
||||
},
|
||||
StructField {
|
||||
field_name: Some("y".into()),
|
||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||
DataType::Struct(Default::default())
|
||||
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
|
||||
)))
|
||||
},
|
||||
]
|
||||
|
@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
|
|||
},
|
||||
StructField {
|
||||
field_name: Some("str".into()),
|
||||
field_type: DataType::Struct(vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Bool
|
||||
}])
|
||||
field_type: DataType::Struct(
|
||||
vec![StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Bool
|
||||
}],
|
||||
StructBracketKind::AngleBrackets
|
||||
)
|
||||
},
|
||||
]
|
||||
},
|
||||
|
@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
|
|||
fields: vec![
|
||||
StructField {
|
||||
field_name: Some("x".into()),
|
||||
field_type: DataType::Struct(Default::default())
|
||||
field_type: DataType::Struct(
|
||||
Default::default(),
|
||||
StructBracketKind::AngleBrackets
|
||||
)
|
||||
},
|
||||
StructField {
|
||||
field_name: Some("y".into()),
|
||||
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||
DataType::Struct(Default::default())
|
||||
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
|
||||
)))
|
||||
},
|
||||
]
|
||||
|
|
|
@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_struct() {
|
||||
// s STRUCT(v VARCHAR, i INTEGER)
|
||||
let struct_type1 = DataType::Struct(
|
||||
vec![
|
||||
StructField {
|
||||
field_name: Some(Ident::new("v")),
|
||||
field_type: DataType::Varchar(None),
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::new("i")),
|
||||
field_type: DataType::Integer(None),
|
||||
},
|
||||
],
|
||||
StructBracketKind::Parentheses,
|
||||
);
|
||||
|
||||
// basic struct
|
||||
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
|
||||
assert_eq!(
|
||||
column_defs(statement),
|
||||
vec![ColumnDef {
|
||||
name: "s".into(),
|
||||
data_type: struct_type1.clone(),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
}]
|
||||
);
|
||||
|
||||
// struct array
|
||||
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
|
||||
assert_eq!(
|
||||
column_defs(statement),
|
||||
vec![ColumnDef {
|
||||
name: "s".into(),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(struct_type1),
|
||||
None
|
||||
)),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
}]
|
||||
);
|
||||
|
||||
// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
|
||||
let struct_type2 = DataType::Struct(
|
||||
vec![
|
||||
StructField {
|
||||
field_name: Some(Ident::new("v")),
|
||||
field_type: DataType::Varchar(None),
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::new("s")),
|
||||
field_type: DataType::Struct(
|
||||
vec![
|
||||
StructField {
|
||||
field_name: Some(Ident::new("a1")),
|
||||
field_type: DataType::Integer(None),
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::new("a2")),
|
||||
field_type: DataType::Varchar(None),
|
||||
},
|
||||
],
|
||||
StructBracketKind::Parentheses,
|
||||
),
|
||||
},
|
||||
],
|
||||
StructBracketKind::Parentheses,
|
||||
);
|
||||
|
||||
// nested struct
|
||||
let statement = duckdb().verified_stmt(
|
||||
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
column_defs(statement),
|
||||
vec![ColumnDef {
|
||||
name: "s".into(),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
|
||||
Box::new(struct_type2),
|
||||
None
|
||||
)),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
}]
|
||||
);
|
||||
|
||||
// failing test (duckdb does not support bracket syntax)
|
||||
let sql_list = vec![
|
||||
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
|
||||
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
|
||||
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
|
||||
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
|
||||
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
|
||||
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
|
||||
];
|
||||
|
||||
for sql in sql_list {
|
||||
duckdb().parse_sql_statements(sql).unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the ColumnDefinitions from a CreateTable statement
|
||||
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
|
||||
match statement {
|
||||
Statement::CreateTable(CreateTable { columns, .. }) => columns,
|
||||
_ => panic!("Expected CreateTable"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select_wildcard_with_exclude() {
|
||||
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue