Support Struct datatype parsing for GenericDialect (#1241)

This commit is contained in:
Duong Cong Toai 2024-04-30 23:16:23 +02:00 committed by GitHub
parent fb20f8ccbe
commit bafaa914b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 345 additions and 8 deletions

View file

@ -6638,7 +6638,7 @@ impl<'a> Parser<'a> {
))))
}
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => {
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;

View file

@ -350,7 +350,7 @@ fn parse_create_table_with_options() {
#[test]
fn parse_nested_data_types() {
let sql = "CREATE TABLE table (x STRUCT<a ARRAY<INT64>, b BYTES(42)>, y ARRAY<STRUCT<INT64>>)";
match bigquery().one_statement_parses_to(sql, sql) {
match bigquery_and_generic().one_statement_parses_to(sql, sql) {
Statement::CreateTable { name, columns, .. } => {
assert_eq!(name, ObjectName(vec!["table".into()]));
assert_eq!(
@ -395,19 +395,25 @@ fn parse_nested_data_types() {
fn parse_invalid_brackets() {
let sql = "SELECT STRUCT<INT64>>(NULL)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
bigquery_and_generic()
.parse_sql_statements(sql)
.unwrap_err(),
ParserError::ParserError("unmatched > in STRUCT literal".to_string())
);
let sql = "SELECT STRUCT<STRUCT<INT64>>>(NULL)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
bigquery_and_generic()
.parse_sql_statements(sql)
.unwrap_err(),
ParserError::ParserError("Expected (, found: >".to_string())
);
let sql = "CREATE TABLE table (x STRUCT<STRUCT<INT64>>>)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
bigquery_and_generic()
.parse_sql_statements(sql)
.unwrap_err(),
ParserError::ParserError(
"Expected ',' or ')' after column definition, found: >".to_string()
)
@ -445,7 +451,7 @@ fn parse_typeless_struct_syntax() {
// typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
// syntax: STRUCT( expr1 [AS field_name] [, ... ])
let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)";
let select = bigquery().verified_only_select(sql);
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(5, select.projection.len());
assert_eq!(
&Expr::Struct {
@ -505,7 +511,7 @@ fn parse_typeless_struct_syntax() {
}
#[test]
fn parse_typed_struct_syntax() {
fn parse_typed_struct_syntax_bigquery() {
// typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
// syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
@ -789,7 +795,291 @@ fn parse_typed_struct_syntax() {
}
#[test]
fn parse_typed_struct_with_field_name() {
fn parse_typed_struct_syntax_bigquery_and_generic() {
// typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
// syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
let sql = r#"SELECT STRUCT<INT64>(5), STRUCT<x INT64, y STRING>(1, t.str_col), STRUCT<arr ARRAY<FLOAT64>, str STRUCT<BOOL>>(nested_col)"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![
Expr::Value(number("1")),
Expr::CompoundIdentifier(vec![
Ident {
value: "t".into(),
quote_style: None,
},
Ident {
value: "str_col".into(),
quote_style: None,
},
]),
],
fields: vec![
StructField {
field_name: Some(Ident {
value: "x".into(),
quote_style: None,
}),
field_type: DataType::Int64
},
StructField {
field_name: Some(Ident {
value: "y".into(),
quote_style: None,
}),
field_type: DataType::String(None)
},
]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Identifier(Ident {
value: "nested_col".into(),
quote_style: None,
}),],
fields: vec![
StructField {
field_name: Some("arr".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Float64
)))
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
},
]
},
expr_from_projection(&select.projection[2])
);
let sql = r#"SELECT STRUCT<x STRUCT, y ARRAY<STRUCT>>(nested_col)"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Identifier(Ident {
value: "nested_col".into(),
quote_style: None,
}),],
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
)))
},
]
},
expr_from_projection(&select.projection[0])
);
let sql = r#"SELECT STRUCT<BOOL>(true), STRUCT<BYTES(42)>(B'abc')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::Boolean(true)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Bool
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral(
"abc".into()
)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Bytes(Some(42))
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<DATE>('2011-05-05'), STRUCT<DATETIME>(DATETIME '1999-01-01 01:23:34.45'), STRUCT<FLOAT64>(5.0), STRUCT<INT64>(1)"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(4, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedString(
"2011-05-05".to_string()
)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Date
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Datetime(None),
value: "1999-01-01 01:23:34.45".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Datetime(None)
}]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5.0")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Float64
}]
},
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("1")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Int64
}]
},
expr_from_projection(&select.projection[3])
);
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
}),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Interval
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::JSON,
value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::JSON
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<STRING(42)>('foo'), STRUCT<TIMESTAMP>(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT<TIME>(TIME '15:30:00')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedString("foo".to_string())),],
fields: vec![StructField {
field_name: None,
field_type: DataType::String(Some(42))
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Timestamp(None, TimezoneInfo::None),
value: "2008-12-25 15:30:00 America/Los_Angeles".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Timestamp(None, TimezoneInfo::None)
}]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Time(None, TimezoneInfo::None),
value: "15:30:00".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Time(None, TimezoneInfo::None)
}]
},
expr_from_projection(&select.projection[2])
);
let sql = r#"SELECT STRUCT<NUMERIC>(NUMERIC '1'), STRUCT<BIGNUMERIC>(BIGNUMERIC '1')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Numeric(ExactNumberInfo::None),
value: "1".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Numeric(ExactNumberInfo::None)
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::BigNumeric(ExactNumberInfo::None),
value: "1".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::BigNumeric(ExactNumberInfo::None)
}]
},
expr_from_projection(&select.projection[1])
);
}
#[test]
fn parse_typed_struct_with_field_name_bigquery() {
let sql = r#"SELECT STRUCT<x INT64>(5), STRUCT<y STRING>("foo")"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
@ -835,6 +1125,53 @@ fn parse_typed_struct_with_field_name() {
);
}
#[test]
fn parse_typed_struct_with_field_name_bigquery_and_generic() {
let sql = r#"SELECT STRUCT<x INT64>(5), STRUCT<y STRING>('foo')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")),],
fields: vec![StructField {
field_name: Some(Ident::from("x")),
field_type: DataType::Int64
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedString("foo".to_string())),],
fields: vec![StructField {
field_name: Some(Ident::from("y")),
field_type: DataType::String(None)
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<x INT64, y INT64>(5, 5)"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")), Expr::Value(number("5")),],
fields: vec![
StructField {
field_name: Some(Ident::from("x")),
field_type: DataType::Int64
},
StructField {
field_name: Some(Ident::from("y")),
field_type: DataType::Int64
}
]
},
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_table_identifiers() {
/// Parses a table identifier ident and verifies that re-serializing the