mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-24 08:43:43 +00:00
Support for BigQuery struct
, array
and bytes
, int64
, float64
datatypes (#1003)
This commit is contained in:
parent
65317edcb9
commit
2f437db2a6
8 changed files with 901 additions and 65 deletions
|
@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
|
||||||
#[cfg(feature = "visitor")]
|
#[cfg(feature = "visitor")]
|
||||||
use sqlparser_derive::{Visit, VisitMut};
|
use sqlparser_derive::{Visit, VisitMut};
|
||||||
|
|
||||||
use crate::ast::ObjectName;
|
use crate::ast::{display_comma_separated, ObjectName, StructField};
|
||||||
|
|
||||||
use super::value::escape_single_quote_string;
|
use super::value::escape_single_quote_string;
|
||||||
|
|
||||||
|
@ -71,6 +71,10 @@ pub enum DataType {
|
||||||
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
|
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
|
||||||
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
|
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
|
||||||
Blob(Option<u64>),
|
Blob(Option<u64>),
|
||||||
|
/// Variable-length binary data with optional length.
|
||||||
|
///
|
||||||
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type
|
||||||
|
Bytes(Option<u64>),
|
||||||
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
|
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
|
||||||
///
|
///
|
||||||
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
|
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
|
||||||
|
@ -125,6 +129,10 @@ pub enum DataType {
|
||||||
///
|
///
|
||||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||||
Int4(Option<u64>),
|
Int4(Option<u64>),
|
||||||
|
/// Integer type in [bigquery]
|
||||||
|
///
|
||||||
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
|
||||||
|
Int64,
|
||||||
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
|
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
|
||||||
Integer(Option<u64>),
|
Integer(Option<u64>),
|
||||||
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
|
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
|
||||||
|
@ -149,6 +157,10 @@ pub enum DataType {
|
||||||
///
|
///
|
||||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||||
Float4,
|
Float4,
|
||||||
|
/// Floating point in [bigquery]
|
||||||
|
///
|
||||||
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
|
||||||
|
Float64,
|
||||||
/// Floating point e.g. REAL
|
/// Floating point e.g. REAL
|
||||||
Real,
|
Real,
|
||||||
/// Float8 as alias for Double in [postgresql]
|
/// Float8 as alias for Double in [postgresql]
|
||||||
|
@ -190,18 +202,23 @@ pub enum DataType {
|
||||||
Regclass,
|
Regclass,
|
||||||
/// Text
|
/// Text
|
||||||
Text,
|
Text,
|
||||||
/// String
|
/// String with optional length.
|
||||||
String,
|
String(Option<u64>),
|
||||||
/// Bytea
|
/// Bytea
|
||||||
Bytea,
|
Bytea,
|
||||||
/// Custom type such as enums
|
/// Custom type such as enums
|
||||||
Custom(ObjectName, Vec<String>),
|
Custom(ObjectName, Vec<String>),
|
||||||
/// Arrays
|
/// Arrays
|
||||||
Array(Option<Box<DataType>>),
|
Array(ArrayElemTypeDef),
|
||||||
/// Enums
|
/// Enums
|
||||||
Enum(Vec<String>),
|
Enum(Vec<String>),
|
||||||
/// Set
|
/// Set
|
||||||
Set(Vec<String>),
|
Set(Vec<String>),
|
||||||
|
/// Struct
|
||||||
|
///
|
||||||
|
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
|
||||||
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
|
Struct(Vec<StructField>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for DataType {
|
impl fmt::Display for DataType {
|
||||||
|
@ -231,6 +248,7 @@ impl fmt::Display for DataType {
|
||||||
format_type_with_optional_length(f, "VARBINARY", size, false)
|
format_type_with_optional_length(f, "VARBINARY", size, false)
|
||||||
}
|
}
|
||||||
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
|
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
|
||||||
|
DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false),
|
||||||
DataType::Numeric(info) => {
|
DataType::Numeric(info) => {
|
||||||
write!(f, "NUMERIC{info}")
|
write!(f, "NUMERIC{info}")
|
||||||
}
|
}
|
||||||
|
@ -274,6 +292,9 @@ impl fmt::Display for DataType {
|
||||||
DataType::Int4(zerofill) => {
|
DataType::Int4(zerofill) => {
|
||||||
format_type_with_optional_length(f, "INT4", zerofill, false)
|
format_type_with_optional_length(f, "INT4", zerofill, false)
|
||||||
}
|
}
|
||||||
|
DataType::Int64 => {
|
||||||
|
write!(f, "INT64")
|
||||||
|
}
|
||||||
DataType::UnsignedInt4(zerofill) => {
|
DataType::UnsignedInt4(zerofill) => {
|
||||||
format_type_with_optional_length(f, "INT4", zerofill, true)
|
format_type_with_optional_length(f, "INT4", zerofill, true)
|
||||||
}
|
}
|
||||||
|
@ -297,6 +318,7 @@ impl fmt::Display for DataType {
|
||||||
}
|
}
|
||||||
DataType::Real => write!(f, "REAL"),
|
DataType::Real => write!(f, "REAL"),
|
||||||
DataType::Float4 => write!(f, "FLOAT4"),
|
DataType::Float4 => write!(f, "FLOAT4"),
|
||||||
|
DataType::Float64 => write!(f, "FLOAT64"),
|
||||||
DataType::Double => write!(f, "DOUBLE"),
|
DataType::Double => write!(f, "DOUBLE"),
|
||||||
DataType::Float8 => write!(f, "FLOAT8"),
|
DataType::Float8 => write!(f, "FLOAT8"),
|
||||||
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
|
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
|
||||||
|
@ -316,15 +338,13 @@ impl fmt::Display for DataType {
|
||||||
DataType::JSON => write!(f, "JSON"),
|
DataType::JSON => write!(f, "JSON"),
|
||||||
DataType::Regclass => write!(f, "REGCLASS"),
|
DataType::Regclass => write!(f, "REGCLASS"),
|
||||||
DataType::Text => write!(f, "TEXT"),
|
DataType::Text => write!(f, "TEXT"),
|
||||||
DataType::String => write!(f, "STRING"),
|
DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false),
|
||||||
DataType::Bytea => write!(f, "BYTEA"),
|
DataType::Bytea => write!(f, "BYTEA"),
|
||||||
DataType::Array(ty) => {
|
DataType::Array(ty) => match ty {
|
||||||
if let Some(t) = &ty {
|
ArrayElemTypeDef::None => write!(f, "ARRAY"),
|
||||||
write!(f, "{t}[]")
|
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
|
||||||
} else {
|
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
|
||||||
write!(f, "ARRAY")
|
},
|
||||||
}
|
|
||||||
}
|
|
||||||
DataType::Custom(ty, modifiers) => {
|
DataType::Custom(ty, modifiers) => {
|
||||||
if modifiers.is_empty() {
|
if modifiers.is_empty() {
|
||||||
write!(f, "{ty}")
|
write!(f, "{ty}")
|
||||||
|
@ -352,6 +372,13 @@ impl fmt::Display for DataType {
|
||||||
}
|
}
|
||||||
write!(f, ")")
|
write!(f, ")")
|
||||||
}
|
}
|
||||||
|
DataType::Struct(fields) => {
|
||||||
|
if !fields.is_empty() {
|
||||||
|
write!(f, "STRUCT<{}>", display_comma_separated(fields))
|
||||||
|
} else {
|
||||||
|
write!(f, "STRUCT")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -533,3 +560,19 @@ impl fmt::Display for CharLengthUnits {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents the data type of the elements in an array (if any) as well as
|
||||||
|
/// the syntax used to declare the array.
|
||||||
|
///
|
||||||
|
/// For example: Bigquery/Hive use `ARRAY<INT>` whereas snowflake uses ARRAY.
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum ArrayElemTypeDef {
|
||||||
|
/// `ARRAY`
|
||||||
|
None,
|
||||||
|
/// `ARRAY<INT>`
|
||||||
|
AngleBracket(Box<DataType>),
|
||||||
|
/// `[]INT`
|
||||||
|
SquareBracket(Box<DataType>),
|
||||||
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use sqlparser_derive::{Visit, VisitMut};
|
use sqlparser_derive::{Visit, VisitMut};
|
||||||
|
|
||||||
pub use self::data_type::{
|
pub use self::data_type::{
|
||||||
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||||
};
|
};
|
||||||
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
|
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
|
||||||
pub use self::ddl::{
|
pub use self::ddl::{
|
||||||
|
@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A field definition within a struct.
|
||||||
|
///
|
||||||
|
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub struct StructField {
|
||||||
|
pub field_name: Option<Ident>,
|
||||||
|
pub field_type: DataType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for StructField {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
if let Some(name) = &self.field_name {
|
||||||
|
write!(f, "{name} {}", self.field_type)
|
||||||
|
} else {
|
||||||
|
write!(f, "{}", self.field_type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Options for `CAST` / `TRY_CAST`
|
/// Options for `CAST` / `TRY_CAST`
|
||||||
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
|
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
@ -597,6 +618,26 @@ pub enum Expr {
|
||||||
Rollup(Vec<Vec<Expr>>),
|
Rollup(Vec<Vec<Expr>>),
|
||||||
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
|
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
|
||||||
Tuple(Vec<Expr>),
|
Tuple(Vec<Expr>),
|
||||||
|
/// `BigQuery` specific `Struct` literal expression [1]
|
||||||
|
/// Syntax:
|
||||||
|
/// ```sql
|
||||||
|
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
|
||||||
|
/// ```
|
||||||
|
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
|
Struct {
|
||||||
|
/// Struct values.
|
||||||
|
values: Vec<Expr>,
|
||||||
|
/// Struct field definitions.
|
||||||
|
fields: Vec<StructField>,
|
||||||
|
},
|
||||||
|
/// `BigQuery` specific: An named expression in a typeless struct [1]
|
||||||
|
///
|
||||||
|
/// Syntax
|
||||||
|
/// ```sql
|
||||||
|
/// 1 AS A
|
||||||
|
/// ```
|
||||||
|
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||||
|
Named { expr: Box<Expr>, name: Ident },
|
||||||
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
|
||||||
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
|
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
|
||||||
/// An array expression e.g. `ARRAY[1, 2]`
|
/// An array expression e.g. `ARRAY[1, 2]`
|
||||||
|
@ -997,6 +1038,21 @@ impl fmt::Display for Expr {
|
||||||
Expr::Tuple(exprs) => {
|
Expr::Tuple(exprs) => {
|
||||||
write!(f, "({})", display_comma_separated(exprs))
|
write!(f, "({})", display_comma_separated(exprs))
|
||||||
}
|
}
|
||||||
|
Expr::Struct { values, fields } => {
|
||||||
|
if !fields.is_empty() {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"STRUCT<{}>({})",
|
||||||
|
display_comma_separated(fields),
|
||||||
|
display_comma_separated(values)
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
write!(f, "STRUCT({})", display_comma_separated(values))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Expr::Named { expr, name } => {
|
||||||
|
write!(f, "{} AS {}", expr, name)
|
||||||
|
}
|
||||||
Expr::ArrayIndex { obj, indexes } => {
|
Expr::ArrayIndex { obj, indexes } => {
|
||||||
write!(f, "{obj}")?;
|
write!(f, "{obj}")?;
|
||||||
for i in indexes {
|
for i in indexes {
|
||||||
|
|
|
@ -120,6 +120,7 @@ define_keywords!(
|
||||||
BY,
|
BY,
|
||||||
BYPASSRLS,
|
BYPASSRLS,
|
||||||
BYTEA,
|
BYTEA,
|
||||||
|
BYTES,
|
||||||
CACHE,
|
CACHE,
|
||||||
CALL,
|
CALL,
|
||||||
CALLED,
|
CALLED,
|
||||||
|
@ -270,6 +271,7 @@ define_keywords!(
|
||||||
FIRST_VALUE,
|
FIRST_VALUE,
|
||||||
FLOAT,
|
FLOAT,
|
||||||
FLOAT4,
|
FLOAT4,
|
||||||
|
FLOAT64,
|
||||||
FLOAT8,
|
FLOAT8,
|
||||||
FLOOR,
|
FLOOR,
|
||||||
FOLLOWING,
|
FOLLOWING,
|
||||||
|
@ -293,6 +295,7 @@ define_keywords!(
|
||||||
FUSION,
|
FUSION,
|
||||||
GENERATE,
|
GENERATE,
|
||||||
GENERATED,
|
GENERATED,
|
||||||
|
GEOGRAPHY,
|
||||||
GET,
|
GET,
|
||||||
GLOBAL,
|
GLOBAL,
|
||||||
GRANT,
|
GRANT,
|
||||||
|
@ -328,6 +331,7 @@ define_keywords!(
|
||||||
INT,
|
INT,
|
||||||
INT2,
|
INT2,
|
||||||
INT4,
|
INT4,
|
||||||
|
INT64,
|
||||||
INT8,
|
INT8,
|
||||||
INTEGER,
|
INTEGER,
|
||||||
INTERSECT,
|
INTERSECT,
|
||||||
|
@ -584,6 +588,7 @@ define_keywords!(
|
||||||
STORED,
|
STORED,
|
||||||
STRICT,
|
STRICT,
|
||||||
STRING,
|
STRING,
|
||||||
|
STRUCT,
|
||||||
SUBMULTISET,
|
SUBMULTISET,
|
||||||
SUBSTRING,
|
SUBSTRING,
|
||||||
SUBSTRING_REGEX,
|
SUBSTRING_REGEX,
|
||||||
|
|
|
@ -30,7 +30,7 @@ use IsOptional::*;
|
||||||
use crate::ast::helpers::stmt_create_table::CreateTableBuilder;
|
use crate::ast::helpers::stmt_create_table::CreateTableBuilder;
|
||||||
use crate::ast::*;
|
use crate::ast::*;
|
||||||
use crate::dialect::*;
|
use crate::dialect::*;
|
||||||
use crate::keywords::{self, Keyword};
|
use crate::keywords::{self, Keyword, ALL_KEYWORDS};
|
||||||
use crate::tokenizer::*;
|
use crate::tokenizer::*;
|
||||||
|
|
||||||
mod alter;
|
mod alter;
|
||||||
|
@ -197,6 +197,26 @@ impl std::error::Error for ParserError {}
|
||||||
// By default, allow expressions up to this deep before erroring
|
// By default, allow expressions up to this deep before erroring
|
||||||
const DEFAULT_REMAINING_DEPTH: usize = 50;
|
const DEFAULT_REMAINING_DEPTH: usize = 50;
|
||||||
|
|
||||||
|
/// Composite types declarations using angle brackets syntax can be arbitrary
|
||||||
|
/// nested such that the following declaration is possible:
|
||||||
|
/// `ARRAY<ARRAY<INT>>`
|
||||||
|
/// But the tokenizer recognizes the `>>` as a ShiftRight token.
|
||||||
|
/// We work-around that limitation when parsing a data type by accepting
|
||||||
|
/// either a `>` or `>>` token in such cases, remembering which variant we
|
||||||
|
/// matched.
|
||||||
|
/// In the latter case having matched a `>>`, the parent type will not look to
|
||||||
|
/// match its closing `>` as a result since that will have taken place at the
|
||||||
|
/// child type.
|
||||||
|
///
|
||||||
|
/// See [Parser::parse_data_type] for details
|
||||||
|
struct MatchedTrailingBracket(bool);
|
||||||
|
|
||||||
|
impl From<bool> for MatchedTrailingBracket {
|
||||||
|
fn from(value: bool) -> Self {
|
||||||
|
Self(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Options that control how the [`Parser`] parses SQL text
|
/// Options that control how the [`Parser`] parses SQL text
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct ParserOptions {
|
pub struct ParserOptions {
|
||||||
|
@ -833,6 +853,10 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
|
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
|
||||||
self.parse_match_against()
|
self.parse_match_against()
|
||||||
}
|
}
|
||||||
|
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
||||||
|
self.prev_token();
|
||||||
|
self.parse_bigquery_struct_literal()
|
||||||
|
}
|
||||||
// Here `w` is a word, check if it's a part of a multi-part
|
// Here `w` is a word, check if it's a part of a multi-part
|
||||||
// identifier, a function call, or a simple identifier:
|
// identifier, a function call, or a simple identifier:
|
||||||
_ => match self.peek_token().token {
|
_ => match self.peek_token().token {
|
||||||
|
@ -1798,6 +1822,172 @@ impl<'a> Parser<'a> {
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bigquery specific: Parse a struct literal
|
||||||
|
/// Syntax
|
||||||
|
/// ```sql
|
||||||
|
/// -- typed
|
||||||
|
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
|
||||||
|
/// -- typeless
|
||||||
|
/// STRUCT( expr1 [AS field_name] [, ... ])
|
||||||
|
/// ```
|
||||||
|
fn parse_bigquery_struct_literal(&mut self) -> Result<Expr, ParserError> {
|
||||||
|
let (fields, trailing_bracket) =
|
||||||
|
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
|
||||||
|
if trailing_bracket.0 {
|
||||||
|
return parser_err!("unmatched > in STRUCT literal", self.peek_token().location);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect_token(&Token::LParen)?;
|
||||||
|
let values = self
|
||||||
|
.parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?;
|
||||||
|
self.expect_token(&Token::RParen)?;
|
||||||
|
|
||||||
|
Ok(Expr::Struct { values, fields })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse an expression value for a bigquery struct [1]
|
||||||
|
/// Syntax
|
||||||
|
/// ```sql
|
||||||
|
/// expr [AS name]
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Parameter typed_syntax is set to true if the expression
|
||||||
|
/// is to be parsed as a field expression declared using typed
|
||||||
|
/// struct syntax [2], and false if using typeless struct syntax [3].
|
||||||
|
///
|
||||||
|
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct
|
||||||
|
/// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
|
||||||
|
/// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
|
||||||
|
fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result<Expr, ParserError> {
|
||||||
|
let expr = self.parse_expr()?;
|
||||||
|
if self.parse_keyword(Keyword::AS) {
|
||||||
|
if typed_syntax {
|
||||||
|
return parser_err!("Typed syntax does not allow AS", {
|
||||||
|
self.prev_token();
|
||||||
|
self.peek_token().location
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let field_name = self.parse_identifier()?;
|
||||||
|
Ok(Expr::Named {
|
||||||
|
expr: expr.into(),
|
||||||
|
name: field_name,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Ok(expr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a Struct type definition as a sequence of field-value pairs.
|
||||||
|
/// The syntax of the Struct elem differs by dialect so it is customised
|
||||||
|
/// by the `elem_parser` argument.
|
||||||
|
///
|
||||||
|
/// Syntax
|
||||||
|
/// ```sql
|
||||||
|
/// Hive:
|
||||||
|
/// STRUCT<field_name: field_type>
|
||||||
|
///
|
||||||
|
/// BigQuery:
|
||||||
|
/// STRUCT<[field_name] field_type>
|
||||||
|
/// ```
|
||||||
|
fn parse_struct_type_def<F>(
|
||||||
|
&mut self,
|
||||||
|
mut elem_parser: F,
|
||||||
|
) -> Result<(Vec<StructField>, MatchedTrailingBracket), ParserError>
|
||||||
|
where
|
||||||
|
F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>,
|
||||||
|
{
|
||||||
|
let start_token = self.peek_token();
|
||||||
|
self.expect_keyword(Keyword::STRUCT)?;
|
||||||
|
|
||||||
|
// Nothing to do if we have no type information.
|
||||||
|
if Token::Lt != self.peek_token() {
|
||||||
|
return Ok((Default::default(), false.into()));
|
||||||
|
}
|
||||||
|
self.next_token();
|
||||||
|
|
||||||
|
let mut field_defs = vec![];
|
||||||
|
let trailing_bracket = loop {
|
||||||
|
let (def, trailing_bracket) = elem_parser(self)?;
|
||||||
|
field_defs.push(def);
|
||||||
|
if !self.consume_token(&Token::Comma) {
|
||||||
|
break trailing_bracket;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Angle brackets are balanced so we only expect the trailing `>>` after
|
||||||
|
// we've matched all field types for the current struct.
|
||||||
|
// e.g. this is invalid syntax `STRUCT<STRUCT<INT>>>, INT>(NULL)`
|
||||||
|
if trailing_bracket.0 {
|
||||||
|
return parser_err!("unmatched > in STRUCT definition", start_token.location);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
field_defs,
|
||||||
|
self.expect_closing_angle_bracket(trailing_bracket)?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a field definition in a BigQuery struct.
|
||||||
|
/// Syntax:
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// [field_name] field_type
|
||||||
|
/// ```
|
||||||
|
fn parse_big_query_struct_field_def(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
|
||||||
|
let is_anonymous_field = if let Token::Word(w) = self.peek_token().token {
|
||||||
|
ALL_KEYWORDS
|
||||||
|
.binary_search(&w.value.to_uppercase().as_str())
|
||||||
|
.is_ok()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
|
||||||
|
let field_name = if is_anonymous_field {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.parse_identifier()?)
|
||||||
|
};
|
||||||
|
|
||||||
|
let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
StructField {
|
||||||
|
field_name,
|
||||||
|
field_type,
|
||||||
|
},
|
||||||
|
trailing_bracket,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// For nested types that use the angle bracket syntax, this matches either
|
||||||
|
/// `>`, `>>` or nothing depending on which variant is expected (specified by the previously
|
||||||
|
/// matched `trailing_bracket` argument). It returns whether there is a trailing
|
||||||
|
/// left to be matched - (i.e. if '>>' was matched).
|
||||||
|
fn expect_closing_angle_bracket(
|
||||||
|
&mut self,
|
||||||
|
trailing_bracket: MatchedTrailingBracket,
|
||||||
|
) -> Result<MatchedTrailingBracket, ParserError> {
|
||||||
|
let trailing_bracket = if !trailing_bracket.0 {
|
||||||
|
match self.peek_token().token {
|
||||||
|
Token::Gt => {
|
||||||
|
self.next_token();
|
||||||
|
false.into()
|
||||||
|
}
|
||||||
|
Token::ShiftRight => {
|
||||||
|
self.next_token();
|
||||||
|
true.into()
|
||||||
|
}
|
||||||
|
_ => return self.expected(">", self.peek_token()),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
false.into()
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(trailing_bracket)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse an operator following an expression
|
/// Parse an operator following an expression
|
||||||
pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
|
pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
|
||||||
// allow the dialect to override infix parsing
|
// allow the dialect to override infix parsing
|
||||||
|
@ -4876,7 +5066,22 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
||||||
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
|
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
|
||||||
|
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
|
||||||
|
if trailing_bracket.0 {
|
||||||
|
return parser_err!(
|
||||||
|
format!("unmatched > after parsing data type {ty}"),
|
||||||
|
self.peek_token()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_data_type_helper(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<(DataType, MatchedTrailingBracket), ParserError> {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
|
let mut trailing_bracket = false.into();
|
||||||
let mut data = match next_token.token {
|
let mut data = match next_token.token {
|
||||||
Token::Word(w) => match w.keyword {
|
Token::Word(w) => match w.keyword {
|
||||||
Keyword::BOOLEAN => Ok(DataType::Boolean),
|
Keyword::BOOLEAN => Ok(DataType::Boolean),
|
||||||
|
@ -4884,6 +5089,7 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
|
Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
|
||||||
Keyword::REAL => Ok(DataType::Real),
|
Keyword::REAL => Ok(DataType::Real),
|
||||||
Keyword::FLOAT4 => Ok(DataType::Float4),
|
Keyword::FLOAT4 => Ok(DataType::Float4),
|
||||||
|
Keyword::FLOAT64 => Ok(DataType::Float64),
|
||||||
Keyword::FLOAT8 => Ok(DataType::Float8),
|
Keyword::FLOAT8 => Ok(DataType::Float8),
|
||||||
Keyword::DOUBLE => {
|
Keyword::DOUBLE => {
|
||||||
if self.parse_keyword(Keyword::PRECISION) {
|
if self.parse_keyword(Keyword::PRECISION) {
|
||||||
|
@ -4940,6 +5146,7 @@ impl<'a> Parser<'a> {
|
||||||
Ok(DataType::Int4(optional_precision?))
|
Ok(DataType::Int4(optional_precision?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Keyword::INT64 => Ok(DataType::Int64),
|
||||||
Keyword::INTEGER => {
|
Keyword::INTEGER => {
|
||||||
let optional_precision = self.parse_optional_precision();
|
let optional_precision = self.parse_optional_precision();
|
||||||
if self.parse_keyword(Keyword::UNSIGNED) {
|
if self.parse_keyword(Keyword::UNSIGNED) {
|
||||||
|
@ -4994,6 +5201,7 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)),
|
Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)),
|
||||||
Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)),
|
Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)),
|
||||||
Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)),
|
Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)),
|
||||||
|
Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)),
|
||||||
Keyword::UUID => Ok(DataType::Uuid),
|
Keyword::UUID => Ok(DataType::Uuid),
|
||||||
Keyword::DATE => Ok(DataType::Date),
|
Keyword::DATE => Ok(DataType::Date),
|
||||||
Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)),
|
Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)),
|
||||||
|
@ -5037,7 +5245,7 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::INTERVAL => Ok(DataType::Interval),
|
Keyword::INTERVAL => Ok(DataType::Interval),
|
||||||
Keyword::JSON => Ok(DataType::JSON),
|
Keyword::JSON => Ok(DataType::JSON),
|
||||||
Keyword::REGCLASS => Ok(DataType::Regclass),
|
Keyword::REGCLASS => Ok(DataType::Regclass),
|
||||||
Keyword::STRING => Ok(DataType::String),
|
Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)),
|
||||||
Keyword::TEXT => Ok(DataType::Text),
|
Keyword::TEXT => Ok(DataType::Text),
|
||||||
Keyword::BYTEA => Ok(DataType::Bytea),
|
Keyword::BYTEA => Ok(DataType::Bytea),
|
||||||
Keyword::NUMERIC => Ok(DataType::Numeric(
|
Keyword::NUMERIC => Ok(DataType::Numeric(
|
||||||
|
@ -5059,17 +5267,23 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
|
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
|
||||||
Keyword::ARRAY => {
|
Keyword::ARRAY => {
|
||||||
if dialect_of!(self is SnowflakeDialect) {
|
if dialect_of!(self is SnowflakeDialect) {
|
||||||
Ok(DataType::Array(None))
|
Ok(DataType::Array(ArrayElemTypeDef::None))
|
||||||
} else {
|
} else {
|
||||||
// Hive array syntax. Note that nesting arrays - or other Hive syntax
|
|
||||||
// that ends with > will fail due to "C++" problem - >> is parsed as
|
|
||||||
// Token::ShiftRight
|
|
||||||
self.expect_token(&Token::Lt)?;
|
self.expect_token(&Token::Lt)?;
|
||||||
let inside_type = self.parse_data_type()?;
|
let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?;
|
||||||
self.expect_token(&Token::Gt)?;
|
trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?;
|
||||||
Ok(DataType::Array(Some(Box::new(inside_type))))
|
Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
|
inside_type,
|
||||||
|
))))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => {
|
||||||
|
self.prev_token();
|
||||||
|
let (field_defs, _trailing_bracket) =
|
||||||
|
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
|
||||||
|
trailing_bracket = _trailing_bracket;
|
||||||
|
Ok(DataType::Struct(field_defs))
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
let type_name = self.parse_object_name()?;
|
let type_name = self.parse_object_name()?;
|
||||||
|
@ -5087,9 +5301,9 @@ impl<'a> Parser<'a> {
|
||||||
// Keyword::ARRAY syntax from above
|
// Keyword::ARRAY syntax from above
|
||||||
while self.consume_token(&Token::LBracket) {
|
while self.consume_token(&Token::LBracket) {
|
||||||
self.expect_token(&Token::RBracket)?;
|
self.expect_token(&Token::RBracket)?;
|
||||||
data = DataType::Array(Some(Box::new(data)))
|
data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data)))
|
||||||
}
|
}
|
||||||
Ok(data)
|
Ok((data, trailing_bracket))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_string_values(&mut self) -> Result<Vec<String>, ParserError> {
|
pub fn parse_string_values(&mut self) -> Result<Vec<String>, ParserError> {
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
mod test_utils;
|
mod test_utils;
|
||||||
|
|
||||||
|
use sqlparser::ast;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use sqlparser::ast::*;
|
use sqlparser::ast::*;
|
||||||
|
@ -85,6 +86,494 @@ fn parse_raw_literal() {
|
||||||
panic!("invalid query")
|
panic!("invalid query")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_nested_data_types() {
|
||||||
|
let sql = "CREATE TABLE table (x STRUCT<a ARRAY<INT64>, b BYTES(42)>, y ARRAY<STRUCT<INT64>>)";
|
||||||
|
match bigquery().one_statement_parses_to(sql, sql) {
|
||||||
|
Statement::CreateTable { name, columns, .. } => {
|
||||||
|
assert_eq!(name, ObjectName(vec!["table".into()]));
|
||||||
|
assert_eq!(
|
||||||
|
columns,
|
||||||
|
vec![
|
||||||
|
ColumnDef {
|
||||||
|
name: Ident::new("x"),
|
||||||
|
data_type: DataType::Struct(vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some("a".into()),
|
||||||
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
|
||||||
|
Box::new(DataType::Int64,)
|
||||||
|
))
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some("b".into()),
|
||||||
|
field_type: DataType::Bytes(Some(42))
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
collation: None,
|
||||||
|
options: vec![],
|
||||||
|
},
|
||||||
|
ColumnDef {
|
||||||
|
name: Ident::new("y"),
|
||||||
|
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
|
DataType::Struct(vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Int64,
|
||||||
|
}]),
|
||||||
|
))),
|
||||||
|
collation: None,
|
||||||
|
options: vec![],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_invalid_brackets() {
|
||||||
|
let sql = "SELECT STRUCT<INT64>>(NULL)";
|
||||||
|
assert_eq!(
|
||||||
|
bigquery().parse_sql_statements(sql).unwrap_err(),
|
||||||
|
ParserError::ParserError("unmatched > in STRUCT literal".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = "SELECT STRUCT<STRUCT<INT64>>>(NULL)";
|
||||||
|
assert_eq!(
|
||||||
|
bigquery().parse_sql_statements(sql).unwrap_err(),
|
||||||
|
ParserError::ParserError("Expected (, found: >".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = "CREATE TABLE table (x STRUCT<STRUCT<INT64>>>)";
|
||||||
|
assert_eq!(
|
||||||
|
bigquery().parse_sql_statements(sql).unwrap_err(),
|
||||||
|
ParserError::ParserError(
|
||||||
|
"Expected ',' or ')' after column definition, found: >".to_string()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_tuple_struct_literal() {
|
||||||
|
// tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax
|
||||||
|
// syntax: (expr1, expr2 [, ... ])
|
||||||
|
let sql = "SELECT (1, 2, 3), (1, 1.0, '123', true)";
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(2, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Tuple(vec![
|
||||||
|
Expr::Value(number("1")),
|
||||||
|
Expr::Value(number("2")),
|
||||||
|
Expr::Value(number("3")),
|
||||||
|
]),
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Tuple(vec![
|
||||||
|
Expr::Value(number("1")),
|
||||||
|
Expr::Value(number("1.0")),
|
||||||
|
Expr::Value(Value::SingleQuotedString("123".to_string())),
|
||||||
|
Expr::Value(Value::Boolean(true))
|
||||||
|
]),
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_typeless_struct_syntax() {
|
||||||
|
// typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
|
||||||
|
// syntax: STRUCT( expr1 [AS field_name] [, ... ])
|
||||||
|
let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)";
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(5, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![
|
||||||
|
Expr::Value(number("1")),
|
||||||
|
Expr::Value(number("2")),
|
||||||
|
Expr::Value(number("3")),
|
||||||
|
],
|
||||||
|
fields: Default::default()
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::SingleQuotedString("abc".to_string())),],
|
||||||
|
fields: Default::default()
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![
|
||||||
|
Expr::Value(number("1")),
|
||||||
|
Expr::CompoundIdentifier(vec![Ident::from("t"), Ident::from("str_col")]),
|
||||||
|
],
|
||||||
|
fields: Default::default()
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![
|
||||||
|
Expr::Named {
|
||||||
|
expr: Expr::Value(number("1")).into(),
|
||||||
|
name: Ident::from("a")
|
||||||
|
},
|
||||||
|
Expr::Named {
|
||||||
|
expr: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(),
|
||||||
|
name: Ident::from("b")
|
||||||
|
},
|
||||||
|
],
|
||||||
|
fields: Default::default()
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[3])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Named {
|
||||||
|
expr: Expr::Identifier(Ident::from("str_col")).into(),
|
||||||
|
name: Ident::from("abc")
|
||||||
|
}],
|
||||||
|
fields: Default::default()
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[4])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_typed_struct_syntax() {
|
||||||
|
// typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax
|
||||||
|
// syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<INT64>(5), STRUCT<x INT64, y STRING>(1, t.str_col), STRUCT<arr ARRAY<FLOAT64>, str STRUCT<BOOL>>(nested_col)"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(3, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(number("5")),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Int64,
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![
|
||||||
|
Expr::Value(number("1")),
|
||||||
|
Expr::CompoundIdentifier(vec![
|
||||||
|
Ident {
|
||||||
|
value: "t".into(),
|
||||||
|
quote_style: None,
|
||||||
|
},
|
||||||
|
Ident {
|
||||||
|
value: "str_col".into(),
|
||||||
|
quote_style: None,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
fields: vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident {
|
||||||
|
value: "x".into(),
|
||||||
|
quote_style: None,
|
||||||
|
}),
|
||||||
|
field_type: DataType::Int64
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident {
|
||||||
|
value: "y".into(),
|
||||||
|
quote_style: None,
|
||||||
|
}),
|
||||||
|
field_type: DataType::String(None)
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Identifier(Ident {
|
||||||
|
value: "nested_col".into(),
|
||||||
|
quote_style: None,
|
||||||
|
}),],
|
||||||
|
fields: vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some("arr".into()),
|
||||||
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
|
DataType::Float64
|
||||||
|
)))
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some("str".into()),
|
||||||
|
field_type: DataType::Struct(vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Bool
|
||||||
|
}])
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<x STRUCT, y ARRAY<STRUCT>>(nested_col)"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(1, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Identifier(Ident {
|
||||||
|
value: "nested_col".into(),
|
||||||
|
quote_style: None,
|
||||||
|
}),],
|
||||||
|
fields: vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some("x".into()),
|
||||||
|
field_type: DataType::Struct(Default::default())
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some("y".into()),
|
||||||
|
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
|
||||||
|
DataType::Struct(Default::default())
|
||||||
|
)))
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<BOOL>(true), STRUCT<BYTES(42)>(B'abc')"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(2, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::Boolean(true)),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Bool
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral(
|
||||||
|
"abc".into()
|
||||||
|
)),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Bytes(Some(42))
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<DATE>("2011-05-05"), STRUCT<DATETIME>(DATETIME '1999-01-01 01:23:34.45'), STRUCT<FLOAT64>(5.0), STRUCT<INT64>(1)"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(4, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::DoubleQuotedString(
|
||||||
|
"2011-05-05".to_string()
|
||||||
|
)),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Date
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::Datetime(None),
|
||||||
|
value: "1999-01-01 01:23:34.45".to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Datetime(None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(number("5.0")),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Float64
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(number("1")),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Int64
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[3])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(2, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Interval(ast::Interval {
|
||||||
|
value: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||||
|
"1-2 3 4:5:6.789999".to_string()
|
||||||
|
))),
|
||||||
|
leading_field: None,
|
||||||
|
leading_precision: None,
|
||||||
|
last_field: None,
|
||||||
|
fractional_seconds_precision: None
|
||||||
|
}),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Interval
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::JSON,
|
||||||
|
value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::JSON
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<STRING(42)>("foo"), STRUCT<TIMESTAMP>(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT<TIME>(TIME '15:30:00')"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(3, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::DoubleQuotedString("foo".to_string())),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::String(Some(42))
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::Timestamp(None, TimezoneInfo::None),
|
||||||
|
value: "2008-12-25 15:30:00 America/Los_Angeles".to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Timestamp(None, TimezoneInfo::None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::Time(None, TimezoneInfo::None),
|
||||||
|
value: "15:30:00".to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Time(None, TimezoneInfo::None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<NUMERIC>(NUMERIC '1'), STRUCT<BIGNUMERIC>(BIGNUMERIC '1')"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(2, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::Numeric(ExactNumberInfo::None),
|
||||||
|
value: "1".to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::Numeric(ExactNumberInfo::None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::TypedString {
|
||||||
|
data_type: DataType::BigNumeric(ExactNumberInfo::None),
|
||||||
|
value: "1".to_string()
|
||||||
|
},],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: None,
|
||||||
|
field_type: DataType::BigNumeric(ExactNumberInfo::None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_typed_struct_with_field_name() {
|
||||||
|
let sql = r#"SELECT STRUCT<x INT64>(5), STRUCT<y STRING>("foo")"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(2, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(number("5")),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: Some(Ident::from("x")),
|
||||||
|
field_type: DataType::Int64
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(Value::DoubleQuotedString("foo".to_string())),],
|
||||||
|
fields: vec![StructField {
|
||||||
|
field_name: Some(Ident::from("y")),
|
||||||
|
field_type: DataType::String(None)
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT STRUCT<x INT64, y INT64>(5, 5)"#;
|
||||||
|
let select = bigquery().verified_only_select(sql);
|
||||||
|
assert_eq!(1, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Struct {
|
||||||
|
values: vec![Expr::Value(number("5")), Expr::Value(number("5")),],
|
||||||
|
fields: vec![
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::from("x")),
|
||||||
|
field_type: DataType::Int64
|
||||||
|
},
|
||||||
|
StructField {
|
||||||
|
field_name: Some(Ident::from("y")),
|
||||||
|
field_type: DataType::Int64
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_table_identifiers() {
|
fn parse_table_identifiers() {
|
||||||
/// Parses a table identifier ident and verifies that re-serializing the
|
/// Parses a table identifier ident and verifies that re-serializing the
|
||||||
|
|
|
@ -23,8 +23,8 @@ use sqlparser::ast::SelectItem::UnnamedExpr;
|
||||||
use sqlparser::ast::TableFactor::{Pivot, Unpivot};
|
use sqlparser::ast::TableFactor::{Pivot, Unpivot};
|
||||||
use sqlparser::ast::*;
|
use sqlparser::ast::*;
|
||||||
use sqlparser::dialect::{
|
use sqlparser::dialect::{
|
||||||
AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect,
|
AnsiDialect, BigQueryDialect, ClickHouseDialect, Dialect, DuckDbDialect, GenericDialect,
|
||||||
MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
|
HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
|
||||||
SnowflakeDialect,
|
SnowflakeDialect,
|
||||||
};
|
};
|
||||||
use sqlparser::keywords::ALL_KEYWORDS;
|
use sqlparser::keywords::ALL_KEYWORDS;
|
||||||
|
@ -2516,15 +2516,41 @@ fn parse_create_table() {
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_create_table_hive_array() {
|
fn parse_create_table_hive_array() {
|
||||||
// Parsing [] type arrays does not work in MsSql since [ is used in is_delimited_identifier_start
|
// Parsing [] type arrays does not work in MsSql since [ is used in is_delimited_identifier_start
|
||||||
|
for (dialects, angle_bracket_syntax) in [
|
||||||
|
(
|
||||||
|
vec![Box::new(PostgreSqlDialect {}) as Box<dyn Dialect>],
|
||||||
|
false,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
Box::new(HiveDialect {}) as Box<dyn Dialect>,
|
||||||
|
Box::new(BigQueryDialect {}) as Box<dyn Dialect>,
|
||||||
|
],
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
] {
|
||||||
let dialects = TestedDialects {
|
let dialects = TestedDialects {
|
||||||
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(HiveDialect {})],
|
dialects,
|
||||||
options: None,
|
options: None,
|
||||||
};
|
};
|
||||||
let sql = "CREATE TABLE IF NOT EXISTS something (name int, val array<int>)";
|
|
||||||
match dialects.one_statement_parses_to(
|
let sql = format!(
|
||||||
sql,
|
"CREATE TABLE IF NOT EXISTS something (name INT, val {})",
|
||||||
"CREATE TABLE IF NOT EXISTS something (name INT, val INT[])",
|
if angle_bracket_syntax {
|
||||||
) {
|
"ARRAY<INT>"
|
||||||
|
} else {
|
||||||
|
"INT[]"
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let expected = Box::new(DataType::Int(None));
|
||||||
|
let expected = if angle_bracket_syntax {
|
||||||
|
ArrayElemTypeDef::AngleBracket(expected)
|
||||||
|
} else {
|
||||||
|
ArrayElemTypeDef::SquareBracket(expected)
|
||||||
|
};
|
||||||
|
|
||||||
|
match dialects.one_statement_parses_to(sql.as_str(), sql.as_str()) {
|
||||||
Statement::CreateTable {
|
Statement::CreateTable {
|
||||||
if_not_exists,
|
if_not_exists,
|
||||||
name,
|
name,
|
||||||
|
@ -2544,7 +2570,7 @@ fn parse_create_table_hive_array() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: Ident::new("val"),
|
name: Ident::new("val"),
|
||||||
data_type: DataType::Array(Some(Box::new(DataType::Int(None)))),
|
data_type: DataType::Array(expected),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![],
|
options: vec![],
|
||||||
},
|
},
|
||||||
|
@ -2553,8 +2579,9 @@ fn parse_create_table_hive_array() {
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SnowflakeDialect using array diffrent
|
// SnowflakeDialect using array different
|
||||||
let dialects = TestedDialects {
|
let dialects = TestedDialects {
|
||||||
dialects: vec![
|
dialects: vec![
|
||||||
Box::new(PostgreSqlDialect {}),
|
Box::new(PostgreSqlDialect {}),
|
||||||
|
|
|
@ -1780,9 +1780,11 @@ fn parse_array_index_expr() {
|
||||||
})],
|
})],
|
||||||
named: true,
|
named: true,
|
||||||
})),
|
})),
|
||||||
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
|
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(
|
||||||
DataType::Int(None)
|
DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(DataType::Int(
|
||||||
)))))),
|
None
|
||||||
|
))))
|
||||||
|
))),
|
||||||
format: None,
|
format: None,
|
||||||
}))),
|
}))),
|
||||||
indexes: vec![num[1].clone(), num[2].clone()],
|
indexes: vec![num[1].clone(), num[2].clone()],
|
||||||
|
|
|
@ -166,7 +166,7 @@ fn parse_array() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("a"))),
|
expr: Box::new(Expr::Identifier(Ident::new("a"))),
|
||||||
data_type: DataType::Array(None),
|
data_type: DataType::Array(ArrayElemTypeDef::None),
|
||||||
format: None,
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue