ClickHouse data types (#1285)

This commit is contained in:
Aleksei Piianin 2024-06-07 13:09:42 +02:00 committed by GitHub
parent 6d4776b482
commit 2fb919d8b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 528 additions and 24 deletions

View file

@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{display_comma_separated, ObjectName, StructField};
use super::value::escape_single_quote_string;
use super::{value::escape_single_quote_string, ColumnDef};
/// SQL data types
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@ -129,10 +129,39 @@ pub enum DataType {
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Int4(Option<u64>),
/// Integer type in [bigquery]
/// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse]
/// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits)
/// Int8 with optional display width e.g. INT8 or INT8(11)
/// Note: Int8 mean 8 bits in [clickhouse]
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int8(Option<u64>),
/// Integer type in [clickhouse]
/// Note: Int16 mean 16 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int16,
/// Integer type in [clickhouse]
/// Note: Int16 mean 32 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int32,
/// Integer type in [bigquery], [clickhouse]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int64,
/// Integer type in [clickhouse]
/// Note: Int128 mean 128 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int128,
/// Integer type in [clickhouse]
/// Note: Int256 mean 256 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
Int256,
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
Integer(Option<u64>),
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@ -141,25 +170,54 @@ pub enum DataType {
UnsignedInt4(Option<u64>),
/// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
UnsignedInteger(Option<u64>),
/// Unsigned integer type in [clickhouse]
/// Note: UInt8 mean 8 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt8,
/// Unsigned integer type in [clickhouse]
/// Note: UInt16 mean 16 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt16,
/// Unsigned integer type in [clickhouse]
/// Note: UInt32 mean 32 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt32,
/// Unsigned integer type in [clickhouse]
/// Note: UInt64 mean 64 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt64,
/// Unsigned integer type in [clickhouse]
/// Note: UInt128 mean 128 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt128,
/// Unsigned integer type in [clickhouse]
/// Note: UInt256 mean 256 bits in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
UInt256,
/// Big integer with optional display width e.g. BIGINT or BIGINT(20)
BigInt(Option<u64>),
/// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
UnsignedBigInt(Option<u64>),
/// Int8 as alias for Bigint in [postgresql]
/// Note: Int8 mean 8 bytes in postgres (not 8 bits)
/// Int8 with optional display width e.g. INT8 or INT8(11)
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Int8(Option<u64>),
/// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED
UnsignedInt8(Option<u64>),
/// Float4 as alias for Real in [postgresql]
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Float4,
/// Floating point in [clickhouse]
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
Float32,
/// Floating point in [bigquery]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
Float64,
/// Floating point e.g. REAL
Real,
@ -182,6 +240,10 @@ pub enum DataType {
Boolean,
/// Date
Date,
/// Date32 with the same range as Datetime64
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32
Date32,
/// Time with optional time precision and time zone information e.g. [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
@ -190,6 +252,10 @@ pub enum DataType {
///
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
Datetime(Option<u64>),
/// Datetime with time precision and optional timezone e.g. [ClickHouse][1].
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
Datetime64(u64, Option<String>),
/// Timestamp with optional time precision and time zone information e.g. [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
@ -206,12 +272,28 @@ pub enum DataType {
Text,
/// String with optional length.
String(Option<u64>),
/// A fixed-length string e.g [ClickHouse][1].
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring
FixedString(u64),
/// Bytea
Bytea,
/// Custom type such as enums
Custom(ObjectName, Vec<String>),
/// Arrays
Array(ArrayElemTypeDef),
/// Map
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
Map(Box<DataType>, Box<DataType>),
/// Tuple
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
Tuple(Vec<StructField>),
/// Nested
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
Nested(Vec<ColumnDef>),
/// Enums
Enum(Vec<String>),
/// Set
@ -221,6 +303,14 @@ pub enum DataType {
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
/// Nullable - special marker NULL represents in ClickHouse as a data type.
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
Nullable(Box<DataType>),
/// LowCardinality - changes the internal representation of other data types to be dictionary-encoded.
///
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality
LowCardinality(Box<DataType>),
/// No type specified - only used with
/// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such
/// as `CREATE TABLE t1 (a)`.
@ -296,9 +386,24 @@ impl fmt::Display for DataType {
DataType::Int4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, false)
}
DataType::Int8(zerofill) => {
format_type_with_optional_length(f, "INT8", zerofill, false)
}
DataType::Int16 => {
write!(f, "Int16")
}
DataType::Int32 => {
write!(f, "Int32")
}
DataType::Int64 => {
write!(f, "INT64")
}
DataType::Int128 => {
write!(f, "Int128")
}
DataType::Int256 => {
write!(f, "Int256")
}
DataType::UnsignedInt4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, true)
}
@ -314,14 +419,30 @@ impl fmt::Display for DataType {
DataType::UnsignedBigInt(zerofill) => {
format_type_with_optional_length(f, "BIGINT", zerofill, true)
}
DataType::Int8(zerofill) => {
format_type_with_optional_length(f, "INT8", zerofill, false)
}
DataType::UnsignedInt8(zerofill) => {
format_type_with_optional_length(f, "INT8", zerofill, true)
}
DataType::UInt8 => {
write!(f, "UInt8")
}
DataType::UInt16 => {
write!(f, "UInt16")
}
DataType::UInt32 => {
write!(f, "UInt32")
}
DataType::UInt64 => {
write!(f, "UInt64")
}
DataType::UInt128 => {
write!(f, "UInt128")
}
DataType::UInt256 => {
write!(f, "UInt256")
}
DataType::Real => write!(f, "REAL"),
DataType::Float4 => write!(f, "FLOAT4"),
DataType::Float32 => write!(f, "Float32"),
DataType::Float64 => write!(f, "FLOAT64"),
DataType::Double => write!(f, "DOUBLE"),
DataType::Float8 => write!(f, "FLOAT8"),
@ -329,6 +450,7 @@ impl fmt::Display for DataType {
DataType::Bool => write!(f, "BOOL"),
DataType::Boolean => write!(f, "BOOLEAN"),
DataType::Date => write!(f, "DATE"),
DataType::Date32 => write!(f, "Date32"),
DataType::Time(precision, timezone_info) => {
format_datetime_precision_and_tz(f, "TIME", precision, timezone_info)
}
@ -338,6 +460,14 @@ impl fmt::Display for DataType {
DataType::Timestamp(precision, timezone_info) => {
format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info)
}
DataType::Datetime64(precision, timezone) => {
format_clickhouse_datetime_precision_and_timezone(
f,
"DateTime64",
precision,
timezone,
)
}
DataType::Interval => write!(f, "INTERVAL"),
DataType::JSON => write!(f, "JSON"),
DataType::JSONB => write!(f, "JSONB"),
@ -350,6 +480,7 @@ impl fmt::Display for DataType {
ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"),
ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"),
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"),
},
DataType::Custom(ty, modifiers) => {
if modifiers.is_empty() {
@ -385,6 +516,25 @@ impl fmt::Display for DataType {
write!(f, "STRUCT")
}
}
// ClickHouse
DataType::Nullable(data_type) => {
write!(f, "Nullable({})", data_type)
}
DataType::FixedString(character_length) => {
write!(f, "FixedString({})", character_length)
}
DataType::LowCardinality(data_type) => {
write!(f, "LowCardinality({})", data_type)
}
DataType::Map(key_data_type, value_data_type) => {
write!(f, "Map({}, {})", key_data_type, value_data_type)
}
DataType::Tuple(fields) => {
write!(f, "Tuple({})", display_comma_separated(fields))
}
DataType::Nested(fields) => {
write!(f, "Nested({})", display_comma_separated(fields))
}
DataType::Unspecified => Ok(()),
}
}
@ -439,6 +589,23 @@ fn format_datetime_precision_and_tz(
Ok(())
}
fn format_clickhouse_datetime_precision_and_timezone(
f: &mut fmt::Formatter,
sql_type: &'static str,
len: &u64,
time_zone: &Option<String>,
) -> fmt::Result {
write!(f, "{sql_type}({len}")?;
if let Some(time_zone) = time_zone {
write!(f, ", '{time_zone}'")?;
}
write!(f, ")")?;
Ok(())
}
/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
@ -593,4 +760,6 @@ pub enum ArrayElemTypeDef {
AngleBracket(Box<DataType>),
/// `INT[]` or `INT[2]`
SquareBracket(Box<DataType>, Option<u64>),
/// `Array(Int64)`
Parenthesis(Box<DataType>),
}

View file

@ -273,7 +273,7 @@ impl fmt::Display for Interval {
}
}
/// A field definition within a struct.
/// A field definition within a struct
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

View file

@ -202,7 +202,9 @@ define_keywords!(
DATA,
DATABASE,
DATE,
DATE32,
DATETIME,
DATETIME64,
DAY,
DAYOFWEEK,
DAYOFYEAR,
@ -292,7 +294,9 @@ define_keywords!(
FILTER,
FIRST,
FIRST_VALUE,
FIXEDSTRING,
FLOAT,
FLOAT32,
FLOAT4,
FLOAT64,
FLOAT8,
@ -362,7 +366,11 @@ define_keywords!(
INSERT,
INSTALL,
INT,
INT128,
INT16,
INT2,
INT256,
INT32,
INT4,
INT64,
INT8,
@ -411,6 +419,7 @@ define_keywords!(
LOCKED,
LOGIN,
LOGS,
LOWCARDINALITY,
LOWER,
LOW_PRIORITY,
MACRO,
@ -455,6 +464,7 @@ define_keywords!(
NATURAL,
NCHAR,
NCLOB,
NESTED,
NEW,
NEXT,
NO,
@ -475,6 +485,7 @@ define_keywords!(
NTH_VALUE,
NTILE,
NULL,
NULLABLE,
NULLIF,
NULLS,
NUMERIC,
@ -713,8 +724,15 @@ define_keywords!(
TRUE,
TRUNCATE,
TRY_CAST,
TUPLE,
TYPE,
UESCAPE,
UINT128,
UINT16,
UINT256,
UINT32,
UINT64,
UINT8,
UNBOUNDED,
UNCACHE,
UNCOMMITTED,

View file

@ -2099,7 +2099,7 @@ impl<'a> Parser<'a> {
/// ```
fn parse_bigquery_struct_literal(&mut self) -> Result<Expr, ParserError> {
let (fields, trailing_bracket) =
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
self.parse_struct_type_def(Self::parse_struct_field_def)?;
if trailing_bracket.0 {
return parser_err!("unmatched > in STRUCT literal", self.peek_token().location);
}
@ -2194,13 +2194,16 @@ impl<'a> Parser<'a> {
))
}
/// Parse a field definition in a BigQuery struct.
/// Parse a field definition in a struct [1] or tuple [2].
/// Syntax:
///
/// ```sql
/// [field_name] field_type
/// ```
fn parse_big_query_struct_field_def(
///
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type
/// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
fn parse_struct_field_def(
&mut self,
) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
// Look beyond the next item to infer whether both field name
@ -2266,6 +2269,47 @@ impl<'a> Parser<'a> {
})
}
/// Parse clickhouse map [1]
/// Syntax
/// ```sql
/// Map(key_data_type, value_data_type)
/// ```
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map
fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> {
self.expect_keyword(Keyword::MAP)?;
self.expect_token(&Token::LParen)?;
let key_data_type = self.parse_data_type()?;
self.expect_token(&Token::Comma)?;
let value_data_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok((key_data_type, value_data_type))
}
/// Parse clickhouse tuple [1]
/// Syntax
/// ```sql
/// Tuple([field_name] field_type, ...)
/// ```
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
fn parse_click_house_tuple_def(&mut self) -> Result<Vec<StructField>, ParserError> {
self.expect_keyword(Keyword::TUPLE)?;
self.expect_token(&Token::LParen)?;
let mut field_defs = vec![];
loop {
let (def, _) = self.parse_struct_field_def()?;
field_defs.push(def);
if !self.consume_token(&Token::Comma) {
break;
}
}
self.expect_token(&Token::RParen)?;
Ok(field_defs)
}
/// For nested types that use the angle bracket syntax, this matches either
/// `>`, `>>` or nothing depending on which variant is expected (specified by the previously
/// matched `trailing_bracket` argument). It returns whether there is a trailing
@ -6820,6 +6864,7 @@ impl<'a> Parser<'a> {
Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
Keyword::REAL => Ok(DataType::Real),
Keyword::FLOAT4 => Ok(DataType::Float4),
Keyword::FLOAT32 => Ok(DataType::Float32),
Keyword::FLOAT64 => Ok(DataType::Float64),
Keyword::FLOAT8 => Ok(DataType::Float8),
Keyword::DOUBLE => {
@ -6877,7 +6922,19 @@ impl<'a> Parser<'a> {
Ok(DataType::Int4(optional_precision?))
}
}
Keyword::INT8 => {
let optional_precision = self.parse_optional_precision();
if self.parse_keyword(Keyword::UNSIGNED) {
Ok(DataType::UnsignedInt8(optional_precision?))
} else {
Ok(DataType::Int8(optional_precision?))
}
}
Keyword::INT16 => Ok(DataType::Int16),
Keyword::INT32 => Ok(DataType::Int32),
Keyword::INT64 => Ok(DataType::Int64),
Keyword::INT128 => Ok(DataType::Int128),
Keyword::INT256 => Ok(DataType::Int256),
Keyword::INTEGER => {
let optional_precision = self.parse_optional_precision();
if self.parse_keyword(Keyword::UNSIGNED) {
@ -6894,14 +6951,12 @@ impl<'a> Parser<'a> {
Ok(DataType::BigInt(optional_precision?))
}
}
Keyword::INT8 => {
let optional_precision = self.parse_optional_precision();
if self.parse_keyword(Keyword::UNSIGNED) {
Ok(DataType::UnsignedInt8(optional_precision?))
} else {
Ok(DataType::Int8(optional_precision?))
}
}
Keyword::UINT8 => Ok(DataType::UInt8),
Keyword::UINT16 => Ok(DataType::UInt16),
Keyword::UINT32 => Ok(DataType::UInt32),
Keyword::UINT64 => Ok(DataType::UInt64),
Keyword::UINT128 => Ok(DataType::UInt128),
Keyword::UINT256 => Ok(DataType::UInt256),
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
Keyword::NVARCHAR => {
Ok(DataType::Nvarchar(self.parse_optional_character_length()?))
@ -6937,7 +6992,13 @@ impl<'a> Parser<'a> {
Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)),
Keyword::UUID => Ok(DataType::Uuid),
Keyword::DATE => Ok(DataType::Date),
Keyword::DATE32 => Ok(DataType::Date32),
Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)),
Keyword::DATETIME64 => {
self.prev_token();
let (precision, time_zone) = self.parse_datetime_64()?;
Ok(DataType::Datetime64(precision, time_zone))
}
Keyword::TIMESTAMP => {
let precision = self.parse_optional_precision()?;
let tz = if self.parse_keyword(Keyword::WITH) {
@ -6980,6 +7041,12 @@ impl<'a> Parser<'a> {
Keyword::JSONB => Ok(DataType::JSONB),
Keyword::REGCLASS => Ok(DataType::Regclass),
Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)),
Keyword::FIXEDSTRING => {
self.expect_token(&Token::LParen)?;
let character_length = self.parse_literal_uint()?;
self.expect_token(&Token::RParen)?;
Ok(DataType::FixedString(character_length))
}
Keyword::TEXT => Ok(DataType::Text),
Keyword::BYTEA => Ok(DataType::Bytea),
Keyword::NUMERIC => Ok(DataType::Numeric(
@ -7002,6 +7069,10 @@ impl<'a> Parser<'a> {
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {
Ok(DataType::Array(ArrayElemTypeDef::None))
} else if dialect_of!(self is ClickHouseDialect) {
Ok(self.parse_sub_type(|internal_type| {
DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type))
})?)
} else {
self.expect_token(&Token::Lt)?;
let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?;
@ -7014,10 +7085,35 @@ impl<'a> Parser<'a> {
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
self.parse_struct_type_def(Self::parse_struct_field_def)?;
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
}
Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
Ok(self.parse_sub_type(DataType::Nullable)?)
}
Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
Ok(self.parse_sub_type(DataType::LowCardinality)?)
}
Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
self.prev_token();
let (key_data_type, value_data_type) = self.parse_click_house_map_def()?;
Ok(DataType::Map(
Box::new(key_data_type),
Box::new(value_data_type),
))
}
Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
self.expect_token(&Token::LParen)?;
let field_defs = self.parse_comma_separated(Parser::parse_column_def)?;
self.expect_token(&Token::RParen)?;
Ok(DataType::Nested(field_defs))
}
Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
self.prev_token();
let field_defs = self.parse_click_house_tuple_def()?;
Ok(DataType::Tuple(field_defs))
}
_ => {
self.prev_token();
let type_name = self.parse_object_name(false)?;
@ -7416,6 +7512,26 @@ impl<'a> Parser<'a> {
}
}
/// Parse datetime64 [1]
/// Syntax
/// ```sql
/// DateTime64(precision[, timezone])
/// ```
///
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
pub fn parse_datetime_64(&mut self) -> Result<(u64, Option<String>), ParserError> {
self.expect_keyword(Keyword::DATETIME64)?;
self.expect_token(&Token::LParen)?;
let precision = self.parse_literal_uint()?;
let time_zone = if self.consume_token(&Token::Comma) {
Some(self.parse_literal_string()?)
} else {
None
};
self.expect_token(&Token::RParen)?;
Ok((precision, time_zone))
}
pub fn parse_optional_character_length(
&mut self,
) -> Result<Option<CharacterLength>, ParserError> {
@ -7508,6 +7624,17 @@ impl<'a> Parser<'a> {
}
}
/// Parse a parenthesized sub data type
fn parse_sub_type<F>(&mut self, parent_type: F) -> Result<DataType, ParserError>
where
F: FnOnce(Box<DataType>) -> DataType,
{
self.expect_token(&Token::LParen)?;
let inside_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok(parent_type(inside_type.into()))
}
pub fn parse_delete(&mut self) -> Result<Statement, ParserError> {
let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) {
// `FROM` keyword is optional in BigQuery SQL.

View file

@ -220,6 +220,196 @@ fn parse_create_table() {
);
}
fn column_def(name: Ident, data_type: DataType) -> ColumnDef {
ColumnDef {
name,
data_type,
collation: None,
options: vec![],
}
}
#[test]
fn parse_clickhouse_data_types() {
let sql = concat!(
"CREATE TABLE table (",
"a1 UInt8, a2 UInt16, a3 UInt32, a4 UInt64, a5 UInt128, a6 UInt256,",
" b1 Int8, b2 Int16, b3 Int32, b4 Int64, b5 Int128, b6 Int256,",
" c1 Float32, c2 Float64,",
" d1 Date32, d2 DateTime64(3), d3 DateTime64(3, 'UTC'),",
" e1 FixedString(255),",
" f1 LowCardinality(Int32)",
") ORDER BY (a1)",
);
// ClickHouse has a case-sensitive definition of data type, but canonical representation is not
let canonical_sql = sql
.replace(" Int8", " INT8")
.replace(" Int64", " INT64")
.replace(" Float64", " FLOAT64");
match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) {
Statement::CreateTable { name, columns, .. } => {
assert_eq!(name, ObjectName(vec!["table".into()]));
assert_eq!(
columns,
vec![
column_def("a1".into(), DataType::UInt8),
column_def("a2".into(), DataType::UInt16),
column_def("a3".into(), DataType::UInt32),
column_def("a4".into(), DataType::UInt64),
column_def("a5".into(), DataType::UInt128),
column_def("a6".into(), DataType::UInt256),
column_def("b1".into(), DataType::Int8(None)),
column_def("b2".into(), DataType::Int16),
column_def("b3".into(), DataType::Int32),
column_def("b4".into(), DataType::Int64),
column_def("b5".into(), DataType::Int128),
column_def("b6".into(), DataType::Int256),
column_def("c1".into(), DataType::Float32),
column_def("c2".into(), DataType::Float64),
column_def("d1".into(), DataType::Date32),
column_def("d2".into(), DataType::Datetime64(3, None)),
column_def("d3".into(), DataType::Datetime64(3, Some("UTC".into()))),
column_def("e1".into(), DataType::FixedString(255)),
column_def(
"f1".into(),
DataType::LowCardinality(Box::new(DataType::Int32))
),
]
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_table_with_nullable() {
let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#;
// ClickHouse has a case-sensitive definition of data type, but canonical representation is not
let canonical_sql = sql.replace("String", "STRING");
match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) {
Statement::CreateTable { name, columns, .. } => {
assert_eq!(name, ObjectName(vec!["table".into()]));
assert_eq!(
columns,
vec![
column_def("k".into(), DataType::UInt8),
column_def(
Ident::with_quote('`', "a"),
DataType::Nullable(Box::new(DataType::String(None)))
),
column_def(
Ident::with_quote('`', "b"),
DataType::Nullable(Box::new(DataType::Datetime64(
9,
Some("UTC".to_string())
)))
),
column_def(
"c".into(),
DataType::Nullable(Box::new(DataType::Datetime64(9, None)))
),
ColumnDef {
name: "d".into(),
data_type: DataType::Date32,
collation: None,
options: vec![ColumnOptionDef {
name: None,
option: ColumnOption::Null
}],
}
]
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_table_with_nested_data_types() {
let sql = concat!(
"CREATE TABLE table (",
" i Nested(a Array(Int16), b LowCardinality(String)),",
" k Array(Tuple(FixedString(128), Int128)),",
" l Tuple(a DateTime64(9), b Array(UUID)),",
" m Map(String, UInt16)",
") ENGINE=MergeTree ORDER BY (k)"
);
match clickhouse().one_statement_parses_to(sql, "") {
Statement::CreateTable { name, columns, .. } => {
assert_eq!(name, ObjectName(vec!["table".into()]));
assert_eq!(
columns,
vec![
ColumnDef {
name: Ident::new("i"),
data_type: DataType::Nested(vec![
column_def(
"a".into(),
DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new(
DataType::Int16
),))
),
column_def(
"b".into(),
DataType::LowCardinality(Box::new(DataType::String(None)))
)
]),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("k"),
data_type: DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new(
DataType::Tuple(vec![
StructField {
field_name: None,
field_type: DataType::FixedString(128)
},
StructField {
field_name: None,
field_type: DataType::Int128
}
])
))),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("l"),
data_type: DataType::Tuple(vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Datetime64(9, None),
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Array(ArrayElemTypeDef::Parenthesis(
Box::new(DataType::Uuid)
))
},
]),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("m"),
data_type: DataType::Map(
Box::new(DataType::String(None)),
Box::new(DataType::UInt16)
),
collation: None,
options: vec![],
},
]
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_view_with_fields_data_types() {
match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) {