Support for BigQuery struct, array and bytes , int64, float64 datatypes (#1003)

This commit is contained in:
Ifeanyi Ubah 2023-10-25 18:57:33 +02:00 committed by GitHub
parent 65317edcb9
commit 2f437db2a6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 901 additions and 65 deletions

View file

@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::ObjectName;
use crate::ast::{display_comma_separated, ObjectName, StructField};
use super::value::escape_single_quote_string;
@ -71,6 +71,10 @@ pub enum DataType {
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
Blob(Option<u64>),
/// Variable-length binary data with optional length.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type
Bytes(Option<u64>),
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
@ -125,6 +129,10 @@ pub enum DataType {
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Int4(Option<u64>),
/// Integer type in [bigquery]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
Int64,
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
Integer(Option<u64>),
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@ -149,6 +157,10 @@ pub enum DataType {
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Float4,
/// Floating point in [bigquery]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
Float64,
/// Floating point e.g. REAL
Real,
/// Float8 as alias for Double in [postgresql]
@ -190,18 +202,23 @@ pub enum DataType {
Regclass,
/// Text
Text,
/// String
String,
/// String with optional length.
String(Option<u64>),
/// Bytea
Bytea,
/// Custom type such as enums
Custom(ObjectName, Vec<String>),
/// Arrays
Array(Option<Box<DataType>>),
Array(ArrayElemTypeDef),
/// Enums
Enum(Vec<String>),
/// Set
Set(Vec<String>),
/// Struct
///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
}
impl fmt::Display for DataType {
@ -231,6 +248,7 @@ impl fmt::Display for DataType {
format_type_with_optional_length(f, "VARBINARY", size, false)
}
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false),
DataType::Numeric(info) => {
write!(f, "NUMERIC{info}")
}
@ -274,6 +292,9 @@ impl fmt::Display for DataType {
DataType::Int4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, false)
}
DataType::Int64 => {
write!(f, "INT64")
}
DataType::UnsignedInt4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, true)
}
@ -297,6 +318,7 @@ impl fmt::Display for DataType {
}
DataType::Real => write!(f, "REAL"),
DataType::Float4 => write!(f, "FLOAT4"),
DataType::Float64 => write!(f, "FLOAT64"),
DataType::Double => write!(f, "DOUBLE"),
DataType::Float8 => write!(f, "FLOAT8"),
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
@ -316,15 +338,13 @@ impl fmt::Display for DataType {
DataType::JSON => write!(f, "JSON"),
DataType::Regclass => write!(f, "REGCLASS"),
DataType::Text => write!(f, "TEXT"),
DataType::String => write!(f, "STRING"),
DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false),
DataType::Bytea => write!(f, "BYTEA"),
DataType::Array(ty) => {
if let Some(t) = &ty {
write!(f, "{t}[]")
} else {
write!(f, "ARRAY")
}
}
DataType::Array(ty) => match ty {
ArrayElemTypeDef::None => write!(f, "ARRAY"),
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
},
DataType::Custom(ty, modifiers) => {
if modifiers.is_empty() {
write!(f, "{ty}")
@ -352,6 +372,13 @@ impl fmt::Display for DataType {
}
write!(f, ")")
}
DataType::Struct(fields) => {
if !fields.is_empty() {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
} else {
write!(f, "STRUCT")
}
}
}
}
}
@ -533,3 +560,19 @@ impl fmt::Display for CharLengthUnits {
}
}
}
/// Represents the data type of the elements in an array (if any) as well as
/// the syntax used to declare the array.
///
/// For example: Bigquery/Hive use `ARRAY<INT>` whereas snowflake uses ARRAY.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum ArrayElemTypeDef {
/// `ARRAY`
None,
/// `ARRAY<INT>`
AngleBracket(Box<DataType>),
/// `[]INT`
SquareBracket(Box<DataType>),
}

View file

@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};
pub use self::data_type::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator {
}
}
/// A field definition within a struct.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct StructField {
pub field_name: Option<Ident>,
pub field_type: DataType,
}
impl fmt::Display for StructField {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(name) = &self.field_name {
write!(f, "{name} {}", self.field_type)
} else {
write!(f, "{}", self.field_type)
}
}
}
/// Options for `CAST` / `TRY_CAST`
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@ -597,6 +618,26 @@ pub enum Expr {
Rollup(Vec<Vec<Expr>>),
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
Tuple(Vec<Expr>),
/// `BigQuery` specific `Struct` literal expression [1]
/// Syntax:
/// ```sql
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
/// ```
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct {
/// Struct values.
values: Vec<Expr>,
/// Struct field definitions.
fields: Vec<StructField>,
},
/// `BigQuery` specific: An named expression in a typeless struct [1]
///
/// Syntax
/// ```sql
/// 1 AS A
/// ```
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Named { expr: Box<Expr>, name: Ident },
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
/// An array expression e.g. `ARRAY[1, 2]`
@ -997,6 +1038,21 @@ impl fmt::Display for Expr {
Expr::Tuple(exprs) => {
write!(f, "({})", display_comma_separated(exprs))
}
Expr::Struct { values, fields } => {
if !fields.is_empty() {
write!(
f,
"STRUCT<{}>({})",
display_comma_separated(fields),
display_comma_separated(values)
)
} else {
write!(f, "STRUCT({})", display_comma_separated(values))
}
}
Expr::Named { expr, name } => {
write!(f, "{} AS {}", expr, name)
}
Expr::ArrayIndex { obj, indexes } => {
write!(f, "{obj}")?;
for i in indexes {

View file

@ -120,6 +120,7 @@ define_keywords!(
BY,
BYPASSRLS,
BYTEA,
BYTES,
CACHE,
CALL,
CALLED,
@ -270,6 +271,7 @@ define_keywords!(
FIRST_VALUE,
FLOAT,
FLOAT4,
FLOAT64,
FLOAT8,
FLOOR,
FOLLOWING,
@ -293,6 +295,7 @@ define_keywords!(
FUSION,
GENERATE,
GENERATED,
GEOGRAPHY,
GET,
GLOBAL,
GRANT,
@ -328,6 +331,7 @@ define_keywords!(
INT,
INT2,
INT4,
INT64,
INT8,
INTEGER,
INTERSECT,
@ -584,6 +588,7 @@ define_keywords!(
STORED,
STRICT,
STRING,
STRUCT,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,

View file

@ -30,7 +30,7 @@ use IsOptional::*;
use crate::ast::helpers::stmt_create_table::CreateTableBuilder;
use crate::ast::*;
use crate::dialect::*;
use crate::keywords::{self, Keyword};
use crate::keywords::{self, Keyword, ALL_KEYWORDS};
use crate::tokenizer::*;
mod alter;
@ -197,6 +197,26 @@ impl std::error::Error for ParserError {}
// By default, allow expressions up to this deep before erroring
const DEFAULT_REMAINING_DEPTH: usize = 50;
/// Composite types declarations using angle brackets syntax can be arbitrary
/// nested such that the following declaration is possible:
/// `ARRAY<ARRAY<INT>>`
/// But the tokenizer recognizes the `>>` as a ShiftRight token.
/// We work-around that limitation when parsing a data type by accepting
/// either a `>` or `>>` token in such cases, remembering which variant we
/// matched.
/// In the latter case having matched a `>>`, the parent type will not look to
/// match its closing `>` as a result since that will have taken place at the
/// child type.
///
/// See [Parser::parse_data_type] for details
struct MatchedTrailingBracket(bool);
impl From<bool> for MatchedTrailingBracket {
fn from(value: bool) -> Self {
Self(value)
}
}
/// Options that control how the [`Parser`] parses SQL text
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParserOptions {
@ -833,6 +853,10 @@ impl<'a> Parser<'a> {
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
self.parse_match_against()
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
self.parse_bigquery_struct_literal()
}
// Here `w` is a word, check if it's a part of a multi-part
// identifier, a function call, or a simple identifier:
_ => match self.peek_token().token {
@ -1798,6 +1822,172 @@ impl<'a> Parser<'a> {
}))
}
/// Bigquery specific: Parse a struct literal
/// Syntax
/// ```sql
/// -- typed
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
/// -- typeless
/// STRUCT( expr1 [AS field_name] [, ... ])
/// ```
fn parse_bigquery_struct_literal(&mut self) -> Result<Expr, ParserError> {
let (fields, trailing_bracket) =
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
if trailing_bracket.0 {
return parser_err!("unmatched > in STRUCT literal", self.peek_token().location);
}
self.expect_token(&Token::LParen)?;
let values = self
.parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Struct { values, fields })
}
/// Parse an expression value for a bigquery struct [1]
/// Syntax
/// ```sql
/// expr [AS name]
/// ```
///
/// Parameter typed_syntax is set to true if the expression
/// is to be parsed as a field expression declared using typed
/// struct syntax [2], and false if using typeless struct syntax [3].
///
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct
/// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
/// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result<Expr, ParserError> {
let expr = self.parse_expr()?;
if self.parse_keyword(Keyword::AS) {
if typed_syntax {
return parser_err!("Typed syntax does not allow AS", {
self.prev_token();
self.peek_token().location
});
}
let field_name = self.parse_identifier()?;
Ok(Expr::Named {
expr: expr.into(),
name: field_name,
})
} else {
Ok(expr)
}
}
/// Parse a Struct type definition as a sequence of field-value pairs.
/// The syntax of the Struct elem differs by dialect so it is customised
/// by the `elem_parser` argument.
///
/// Syntax
/// ```sql
/// Hive:
/// STRUCT<field_name: field_type>
///
/// BigQuery:
/// STRUCT<[field_name] field_type>
/// ```
fn parse_struct_type_def<F>(
&mut self,
mut elem_parser: F,
) -> Result<(Vec<StructField>, MatchedTrailingBracket), ParserError>
where
F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>,
{
let start_token = self.peek_token();
self.expect_keyword(Keyword::STRUCT)?;
// Nothing to do if we have no type information.
if Token::Lt != self.peek_token() {
return Ok((Default::default(), false.into()));
}
self.next_token();
let mut field_defs = vec![];
let trailing_bracket = loop {
let (def, trailing_bracket) = elem_parser(self)?;
field_defs.push(def);
if !self.consume_token(&Token::Comma) {
break trailing_bracket;
}
// Angle brackets are balanced so we only expect the trailing `>>` after
// we've matched all field types for the current struct.
// e.g. this is invalid syntax `STRUCT<STRUCT<INT>>>, INT>(NULL)`
if trailing_bracket.0 {
return parser_err!("unmatched > in STRUCT definition", start_token.location);
}
};
Ok((
field_defs,
self.expect_closing_angle_bracket(trailing_bracket)?,
))
}
/// Parse a field definition in a BigQuery struct.
/// Syntax:
///
/// ```sql
/// [field_name] field_type
/// ```
fn parse_big_query_struct_field_def(
&mut self,
) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
let is_anonymous_field = if let Token::Word(w) = self.peek_token().token {
ALL_KEYWORDS
.binary_search(&w.value.to_uppercase().as_str())
.is_ok()
} else {
false
};
let field_name = if is_anonymous_field {
None
} else {
Some(self.parse_identifier()?)
};
let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
Ok((
StructField {
field_name,
field_type,
},
trailing_bracket,
))
}
/// For nested types that use the angle bracket syntax, this matches either
/// `>`, `>>` or nothing depending on which variant is expected (specified by the previously
/// matched `trailing_bracket` argument). It returns whether there is a trailing
/// left to be matched - (i.e. if '>>' was matched).
fn expect_closing_angle_bracket(
&mut self,
trailing_bracket: MatchedTrailingBracket,
) -> Result<MatchedTrailingBracket, ParserError> {
let trailing_bracket = if !trailing_bracket.0 {
match self.peek_token().token {
Token::Gt => {
self.next_token();
false.into()
}
Token::ShiftRight => {
self.next_token();
true.into()
}
_ => return self.expected(">", self.peek_token()),
}
} else {
false.into()
};
Ok(trailing_bracket)
}
/// Parse an operator following an expression
pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
// allow the dialect to override infix parsing
@ -4876,7 +5066,22 @@ impl<'a> Parser<'a> {
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
if trailing_bracket.0 {
return parser_err!(
format!("unmatched > after parsing data type {ty}"),
self.peek_token()
);
}
Ok(ty)
}
fn parse_data_type_helper(
&mut self,
) -> Result<(DataType, MatchedTrailingBracket), ParserError> {
let next_token = self.next_token();
let mut trailing_bracket = false.into();
let mut data = match next_token.token {
Token::Word(w) => match w.keyword {
Keyword::BOOLEAN => Ok(DataType::Boolean),
@ -4884,6 +5089,7 @@ impl<'a> Parser<'a> {
Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
Keyword::REAL => Ok(DataType::Real),
Keyword::FLOAT4 => Ok(DataType::Float4),
Keyword::FLOAT64 => Ok(DataType::Float64),
Keyword::FLOAT8 => Ok(DataType::Float8),
Keyword::DOUBLE => {
if self.parse_keyword(Keyword::PRECISION) {
@ -4940,6 +5146,7 @@ impl<'a> Parser<'a> {
Ok(DataType::Int4(optional_precision?))
}
}
Keyword::INT64 => Ok(DataType::Int64),
Keyword::INTEGER => {
let optional_precision = self.parse_optional_precision();
if self.parse_keyword(Keyword::UNSIGNED) {
@ -4994,6 +5201,7 @@ impl<'a> Parser<'a> {
Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)),
Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)),
Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)),
Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)),
Keyword::UUID => Ok(DataType::Uuid),
Keyword::DATE => Ok(DataType::Date),
Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)),
@ -5037,7 +5245,7 @@ impl<'a> Parser<'a> {
Keyword::INTERVAL => Ok(DataType::Interval),
Keyword::JSON => Ok(DataType::JSON),
Keyword::REGCLASS => Ok(DataType::Regclass),
Keyword::STRING => Ok(DataType::String),
Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)),
Keyword::TEXT => Ok(DataType::Text),
Keyword::BYTEA => Ok(DataType::Bytea),
Keyword::NUMERIC => Ok(DataType::Numeric(
@ -5059,17 +5267,23 @@ impl<'a> Parser<'a> {
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {
Ok(DataType::Array(None))
Ok(DataType::Array(ArrayElemTypeDef::None))
} else {
// Hive array syntax. Note that nesting arrays - or other Hive syntax
// that ends with > will fail due to "C++" problem - >> is parsed as
// Token::ShiftRight
self.expect_token(&Token::Lt)?;
let inside_type = self.parse_data_type()?;
self.expect_token(&Token::Gt)?;
Ok(DataType::Array(Some(Box::new(inside_type))))
let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?;
trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?;
Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
inside_type,
))))
}
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
}
_ => {
self.prev_token();
let type_name = self.parse_object_name()?;
@ -5087,9 +5301,9 @@ impl<'a> Parser<'a> {
// Keyword::ARRAY syntax from above
while self.consume_token(&Token::LBracket) {
self.expect_token(&Token::RBracket)?;
data = DataType::Array(Some(Box::new(data)))
data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data)))
}
Ok(data)
Ok((data, trailing_bracket))
}
pub fn parse_string_values(&mut self) -> Result<Vec<String>, ParserError> {

View file

@ -13,6 +13,7 @@
#[macro_use]
mod test_utils;
use sqlparser::ast;
use std::ops::Deref;
use sqlparser::ast::*;
@ -85,6 +86,494 @@ fn parse_raw_literal() {
panic!("invalid query")
}
#[test]
fn parse_nested_data_types() {
let sql = "CREATE TABLE table (x STRUCT<a ARRAY<INT64>, b BYTES(42)>, y ARRAY<STRUCT<INT64>>)";
match bigquery().one_statement_parses_to(sql, sql) {
Statement::CreateTable { name, columns, .. } => {
assert_eq!(name, ObjectName(vec!["table".into()]));
assert_eq!(
columns,
vec![
ColumnDef {
name: Ident::new("x"),
data_type: DataType::Struct(vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
]),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("y"),
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]),
))),
collation: None,
options: vec![],
},
]
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_invalid_brackets() {
let sql = "SELECT STRUCT<INT64>>(NULL)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
ParserError::ParserError("unmatched > in STRUCT literal".to_string())
);
let sql = "SELECT STRUCT<STRUCT<INT64>>>(NULL)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
ParserError::ParserError("Expected (, found: >".to_string())
);
let sql = "CREATE TABLE table (x STRUCT<STRUCT<INT64>>>)";
assert_eq!(
bigquery().parse_sql_statements(sql).unwrap_err(),
ParserError::ParserError(
"Expected ',' or ')' after column definition, found: >".to_string()
)
);
}
#[test]
fn parse_tuple_struct_literal() {
// tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax
// syntax: (expr1, expr2 [, ... ])
let sql = "SELECT (1, 2, 3), (1, 1.0, '123', true)";
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Tuple(vec![
Expr::Value(number("1")),
Expr::Value(number("2")),
Expr::Value(number("3")),
]),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Tuple(vec![
Expr::Value(number("1")),
Expr::Value(number("1.0")),
Expr::Value(Value::SingleQuotedString("123".to_string())),
Expr::Value(Value::Boolean(true))
]),
expr_from_projection(&select.projection[1])
);
}
#[test]
fn parse_typeless_struct_syntax() {
// typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
// syntax: STRUCT( expr1 [AS field_name] [, ... ])
let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)";
let select = bigquery().verified_only_select(sql);
assert_eq!(5, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![
Expr::Value(number("1")),
Expr::Value(number("2")),
Expr::Value(number("3")),
],
fields: Default::default()
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedString("abc".to_string())),],
fields: Default::default()
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![
Expr::Value(number("1")),
Expr::CompoundIdentifier(vec![Ident::from("t"), Ident::from("str_col")]),
],
fields: Default::default()
},
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Struct {
values: vec![
Expr::Named {
expr: Expr::Value(number("1")).into(),
name: Ident::from("a")
},
Expr::Named {
expr: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(),
name: Ident::from("b")
},
],
fields: Default::default()
},
expr_from_projection(&select.projection[3])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Named {
expr: Expr::Identifier(Ident::from("str_col")).into(),
name: Ident::from("abc")
}],
fields: Default::default()
},
expr_from_projection(&select.projection[4])
);
}
#[test]
fn parse_typed_struct_syntax() {
// typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
// syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
let sql = r#"SELECT STRUCT<INT64>(5), STRUCT<x INT64, y STRING>(1, t.str_col), STRUCT<arr ARRAY<FLOAT64>, str STRUCT<BOOL>>(nested_col)"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![
Expr::Value(number("1")),
Expr::CompoundIdentifier(vec![
Ident {
value: "t".into(),
quote_style: None,
},
Ident {
value: "str_col".into(),
quote_style: None,
},
]),
],
fields: vec![
StructField {
field_name: Some(Ident {
value: "x".into(),
quote_style: None,
}),
field_type: DataType::Int64
},
StructField {
field_name: Some(Ident {
value: "y".into(),
quote_style: None,
}),
field_type: DataType::String(None)
},
]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Identifier(Ident {
value: "nested_col".into(),
quote_style: None,
}),],
fields: vec![
StructField {
field_name: Some("arr".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Float64
)))
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
},
]
},
expr_from_projection(&select.projection[2])
);
let sql = r#"SELECT STRUCT<x STRUCT, y ARRAY<STRUCT>>(nested_col)"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Identifier(Ident {
value: "nested_col".into(),
quote_style: None,
}),],
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
)))
},
]
},
expr_from_projection(&select.projection[0])
);
let sql = r#"SELECT STRUCT<BOOL>(true), STRUCT<BYTES(42)>(B'abc')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::Boolean(true)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Bool
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral(
"abc".into()
)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Bytes(Some(42))
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<DATE>("2011-05-05"), STRUCT<DATETIME>(DATETIME '1999-01-01 01:23:34.45'), STRUCT<FLOAT64>(5.0), STRUCT<INT64>(1)"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(4, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::DoubleQuotedString(
"2011-05-05".to_string()
)),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Date
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Datetime(None),
value: "1999-01-01 01:23:34.45".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Datetime(None)
}]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5.0")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Float64
}]
},
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("1")),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Int64
}]
},
expr_from_projection(&select.projection[3])
);
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
}),],
fields: vec![StructField {
field_name: None,
field_type: DataType::Interval
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::JSON,
value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::JSON
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<STRING(42)>("foo"), STRUCT<TIMESTAMP>(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT<TIME>(TIME '15:30:00')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::DoubleQuotedString("foo".to_string())),],
fields: vec![StructField {
field_name: None,
field_type: DataType::String(Some(42))
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Timestamp(None, TimezoneInfo::None),
value: "2008-12-25 15:30:00 America/Los_Angeles".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Timestamp(None, TimezoneInfo::None)
}]
},
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Time(None, TimezoneInfo::None),
value: "15:30:00".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Time(None, TimezoneInfo::None)
}]
},
expr_from_projection(&select.projection[2])
);
let sql = r#"SELECT STRUCT<NUMERIC>(NUMERIC '1'), STRUCT<BIGNUMERIC>(BIGNUMERIC '1')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::Numeric(ExactNumberInfo::None),
value: "1".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::Numeric(ExactNumberInfo::None)
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::TypedString {
data_type: DataType::BigNumeric(ExactNumberInfo::None),
value: "1".to_string()
},],
fields: vec![StructField {
field_name: None,
field_type: DataType::BigNumeric(ExactNumberInfo::None)
}]
},
expr_from_projection(&select.projection[1])
);
}
#[test]
fn parse_typed_struct_with_field_name() {
let sql = r#"SELECT STRUCT<x INT64>(5), STRUCT<y STRING>("foo")"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")),],
fields: vec![StructField {
field_name: Some(Ident::from("x")),
field_type: DataType::Int64
}]
},
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(Value::DoubleQuotedString("foo".to_string())),],
fields: vec![StructField {
field_name: Some(Ident::from("y")),
field_type: DataType::String(None)
}]
},
expr_from_projection(&select.projection[1])
);
let sql = r#"SELECT STRUCT<x INT64, y INT64>(5, 5)"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Value(number("5")), Expr::Value(number("5")),],
fields: vec![
StructField {
field_name: Some(Ident::from("x")),
field_type: DataType::Int64
},
StructField {
field_name: Some(Ident::from("y")),
field_type: DataType::Int64
}
]
},
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_table_identifiers() {
/// Parses a table identifier ident and verifies that re-serializing the

View file

@ -23,8 +23,8 @@ use sqlparser::ast::SelectItem::UnnamedExpr;
use sqlparser::ast::TableFactor::{Pivot, Unpivot};
use sqlparser::ast::*;
use sqlparser::dialect::{
AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect,
MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
AnsiDialect, BigQueryDialect, ClickHouseDialect, Dialect, DuckDbDialect, GenericDialect,
HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
SnowflakeDialect,
};
use sqlparser::keywords::ALL_KEYWORDS;
@ -2516,15 +2516,41 @@ fn parse_create_table() {
#[test]
fn parse_create_table_hive_array() {
// Parsing [] type arrays does not work in MsSql since [ is used in is_delimited_identifier_start
for (dialects, angle_bracket_syntax) in [
(
vec![Box::new(PostgreSqlDialect {}) as Box<dyn Dialect>],
false,
),
(
vec![
Box::new(HiveDialect {}) as Box<dyn Dialect>,
Box::new(BigQueryDialect {}) as Box<dyn Dialect>,
],
true,
),
] {
let dialects = TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(HiveDialect {})],
dialects,
options: None,
};
let sql = "CREATE TABLE IF NOT EXISTS something (name int, val array<int>)";
match dialects.one_statement_parses_to(
sql,
"CREATE TABLE IF NOT EXISTS something (name INT, val INT[])",
) {
let sql = format!(
"CREATE TABLE IF NOT EXISTS something (name INT, val {})",
if angle_bracket_syntax {
"ARRAY<INT>"
} else {
"INT[]"
}
);
let expected = Box::new(DataType::Int(None));
let expected = if angle_bracket_syntax {
ArrayElemTypeDef::AngleBracket(expected)
} else {
ArrayElemTypeDef::SquareBracket(expected)
};
match dialects.one_statement_parses_to(sql.as_str(), sql.as_str()) {
Statement::CreateTable {
if_not_exists,
name,
@ -2544,7 +2570,7 @@ fn parse_create_table_hive_array() {
},
ColumnDef {
name: Ident::new("val"),
data_type: DataType::Array(Some(Box::new(DataType::Int(None)))),
data_type: DataType::Array(expected),
collation: None,
options: vec![],
},
@ -2553,8 +2579,9 @@ fn parse_create_table_hive_array() {
}
_ => unreachable!(),
}
}
// SnowflakeDialect using array diffrent
// SnowflakeDialect using array different
let dialects = TestedDialects {
dialects: vec![
Box::new(PostgreSqlDialect {}),

View file

@ -1780,9 +1780,11 @@ fn parse_array_index_expr() {
})],
named: true,
})),
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
DataType::Int(None)
)))))),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(
DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(DataType::Int(
None
))))
))),
format: None,
}))),
indexes: vec![num[1].clone(), num[2].clone()],

View file

@ -166,7 +166,7 @@ fn parse_array() {
assert_eq!(
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("a"))),
data_type: DataType::Array(None),
data_type: DataType::Array(ArrayElemTypeDef::None),
format: None,
},
expr_from_projection(only(&select.projection))