mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-09 21:42:05 +00:00
ClickHouse data types (#1285)
This commit is contained in:
parent
6d4776b482
commit
2fb919d8b2
5 changed files with 528 additions and 24 deletions
|
@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut};
|
|||
|
||||
use crate::ast::{display_comma_separated, ObjectName, StructField};
|
||||
|
||||
use super::value::escape_single_quote_string;
|
||||
use super::{value::escape_single_quote_string, ColumnDef};
|
||||
|
||||
/// SQL data types
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
|
@ -129,10 +129,39 @@ pub enum DataType {
|
|||
///
|
||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||
Int4(Option<u64>),
|
||||
/// Integer type in [bigquery]
|
||||
/// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse]
|
||||
/// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits)
|
||||
/// Int8 with optional display width e.g. INT8 or INT8(11)
|
||||
/// Note: Int8 mean 8 bits in [clickhouse]
|
||||
///
|
||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int8(Option<u64>),
|
||||
/// Integer type in [clickhouse]
|
||||
/// Note: Int16 mean 16 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int16,
|
||||
/// Integer type in [clickhouse]
|
||||
/// Note: Int16 mean 32 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int32,
|
||||
/// Integer type in [bigquery], [clickhouse]
|
||||
///
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int64,
|
||||
/// Integer type in [clickhouse]
|
||||
/// Note: Int128 mean 128 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int128,
|
||||
/// Integer type in [clickhouse]
|
||||
/// Note: Int256 mean 256 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
Int256,
|
||||
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
|
||||
Integer(Option<u64>),
|
||||
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
|
||||
|
@ -141,25 +170,54 @@ pub enum DataType {
|
|||
UnsignedInt4(Option<u64>),
|
||||
/// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
|
||||
UnsignedInteger(Option<u64>),
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt8 mean 8 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt8,
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt16 mean 16 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt16,
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt32 mean 32 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt32,
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt64 mean 64 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt64,
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt128 mean 128 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt128,
|
||||
/// Unsigned integer type in [clickhouse]
|
||||
/// Note: UInt256 mean 256 bits in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint
|
||||
UInt256,
|
||||
/// Big integer with optional display width e.g. BIGINT or BIGINT(20)
|
||||
BigInt(Option<u64>),
|
||||
/// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
|
||||
UnsignedBigInt(Option<u64>),
|
||||
/// Int8 as alias for Bigint in [postgresql]
|
||||
/// Note: Int8 mean 8 bytes in postgres (not 8 bits)
|
||||
/// Int8 with optional display width e.g. INT8 or INT8(11)
|
||||
///
|
||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||
Int8(Option<u64>),
|
||||
/// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED
|
||||
UnsignedInt8(Option<u64>),
|
||||
/// Float4 as alias for Real in [postgresql]
|
||||
///
|
||||
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
|
||||
Float4,
|
||||
/// Floating point in [clickhouse]
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
|
||||
Float32,
|
||||
/// Floating point in [bigquery]
|
||||
///
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float
|
||||
Float64,
|
||||
/// Floating point e.g. REAL
|
||||
Real,
|
||||
|
@ -182,6 +240,10 @@ pub enum DataType {
|
|||
Boolean,
|
||||
/// Date
|
||||
Date,
|
||||
/// Date32 with the same range as Datetime64
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32
|
||||
Date32,
|
||||
/// Time with optional time precision and time zone information e.g. [standard][1].
|
||||
///
|
||||
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
|
||||
|
@ -190,6 +252,10 @@ pub enum DataType {
|
|||
///
|
||||
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
|
||||
Datetime(Option<u64>),
|
||||
/// Datetime with time precision and optional timezone e.g. [ClickHouse][1].
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
|
||||
Datetime64(u64, Option<String>),
|
||||
/// Timestamp with optional time precision and time zone information e.g. [standard][1].
|
||||
///
|
||||
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
|
||||
|
@ -206,12 +272,28 @@ pub enum DataType {
|
|||
Text,
|
||||
/// String with optional length.
|
||||
String(Option<u64>),
|
||||
/// A fixed-length string e.g [ClickHouse][1].
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring
|
||||
FixedString(u64),
|
||||
/// Bytea
|
||||
Bytea,
|
||||
/// Custom type such as enums
|
||||
Custom(ObjectName, Vec<String>),
|
||||
/// Arrays
|
||||
Array(ArrayElemTypeDef),
|
||||
/// Map
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
|
||||
Map(Box<DataType>, Box<DataType>),
|
||||
/// Tuple
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
|
||||
Tuple(Vec<StructField>),
|
||||
/// Nested
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
|
||||
Nested(Vec<ColumnDef>),
|
||||
/// Enums
|
||||
Enum(Vec<String>),
|
||||
/// Set
|
||||
|
@ -221,6 +303,14 @@ pub enum DataType {
|
|||
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||
Struct(Vec<StructField>),
|
||||
/// Nullable - special marker NULL represents in ClickHouse as a data type.
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable
|
||||
Nullable(Box<DataType>),
|
||||
/// LowCardinality - changes the internal representation of other data types to be dictionary-encoded.
|
||||
///
|
||||
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality
|
||||
LowCardinality(Box<DataType>),
|
||||
/// No type specified - only used with
|
||||
/// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such
|
||||
/// as `CREATE TABLE t1 (a)`.
|
||||
|
@ -296,9 +386,24 @@ impl fmt::Display for DataType {
|
|||
DataType::Int4(zerofill) => {
|
||||
format_type_with_optional_length(f, "INT4", zerofill, false)
|
||||
}
|
||||
DataType::Int8(zerofill) => {
|
||||
format_type_with_optional_length(f, "INT8", zerofill, false)
|
||||
}
|
||||
DataType::Int16 => {
|
||||
write!(f, "Int16")
|
||||
}
|
||||
DataType::Int32 => {
|
||||
write!(f, "Int32")
|
||||
}
|
||||
DataType::Int64 => {
|
||||
write!(f, "INT64")
|
||||
}
|
||||
DataType::Int128 => {
|
||||
write!(f, "Int128")
|
||||
}
|
||||
DataType::Int256 => {
|
||||
write!(f, "Int256")
|
||||
}
|
||||
DataType::UnsignedInt4(zerofill) => {
|
||||
format_type_with_optional_length(f, "INT4", zerofill, true)
|
||||
}
|
||||
|
@ -314,14 +419,30 @@ impl fmt::Display for DataType {
|
|||
DataType::UnsignedBigInt(zerofill) => {
|
||||
format_type_with_optional_length(f, "BIGINT", zerofill, true)
|
||||
}
|
||||
DataType::Int8(zerofill) => {
|
||||
format_type_with_optional_length(f, "INT8", zerofill, false)
|
||||
}
|
||||
DataType::UnsignedInt8(zerofill) => {
|
||||
format_type_with_optional_length(f, "INT8", zerofill, true)
|
||||
}
|
||||
DataType::UInt8 => {
|
||||
write!(f, "UInt8")
|
||||
}
|
||||
DataType::UInt16 => {
|
||||
write!(f, "UInt16")
|
||||
}
|
||||
DataType::UInt32 => {
|
||||
write!(f, "UInt32")
|
||||
}
|
||||
DataType::UInt64 => {
|
||||
write!(f, "UInt64")
|
||||
}
|
||||
DataType::UInt128 => {
|
||||
write!(f, "UInt128")
|
||||
}
|
||||
DataType::UInt256 => {
|
||||
write!(f, "UInt256")
|
||||
}
|
||||
DataType::Real => write!(f, "REAL"),
|
||||
DataType::Float4 => write!(f, "FLOAT4"),
|
||||
DataType::Float32 => write!(f, "Float32"),
|
||||
DataType::Float64 => write!(f, "FLOAT64"),
|
||||
DataType::Double => write!(f, "DOUBLE"),
|
||||
DataType::Float8 => write!(f, "FLOAT8"),
|
||||
|
@ -329,6 +450,7 @@ impl fmt::Display for DataType {
|
|||
DataType::Bool => write!(f, "BOOL"),
|
||||
DataType::Boolean => write!(f, "BOOLEAN"),
|
||||
DataType::Date => write!(f, "DATE"),
|
||||
DataType::Date32 => write!(f, "Date32"),
|
||||
DataType::Time(precision, timezone_info) => {
|
||||
format_datetime_precision_and_tz(f, "TIME", precision, timezone_info)
|
||||
}
|
||||
|
@ -338,6 +460,14 @@ impl fmt::Display for DataType {
|
|||
DataType::Timestamp(precision, timezone_info) => {
|
||||
format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info)
|
||||
}
|
||||
DataType::Datetime64(precision, timezone) => {
|
||||
format_clickhouse_datetime_precision_and_timezone(
|
||||
f,
|
||||
"DateTime64",
|
||||
precision,
|
||||
timezone,
|
||||
)
|
||||
}
|
||||
DataType::Interval => write!(f, "INTERVAL"),
|
||||
DataType::JSON => write!(f, "JSON"),
|
||||
DataType::JSONB => write!(f, "JSONB"),
|
||||
|
@ -350,6 +480,7 @@ impl fmt::Display for DataType {
|
|||
ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"),
|
||||
ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"),
|
||||
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
|
||||
ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"),
|
||||
},
|
||||
DataType::Custom(ty, modifiers) => {
|
||||
if modifiers.is_empty() {
|
||||
|
@ -385,6 +516,25 @@ impl fmt::Display for DataType {
|
|||
write!(f, "STRUCT")
|
||||
}
|
||||
}
|
||||
// ClickHouse
|
||||
DataType::Nullable(data_type) => {
|
||||
write!(f, "Nullable({})", data_type)
|
||||
}
|
||||
DataType::FixedString(character_length) => {
|
||||
write!(f, "FixedString({})", character_length)
|
||||
}
|
||||
DataType::LowCardinality(data_type) => {
|
||||
write!(f, "LowCardinality({})", data_type)
|
||||
}
|
||||
DataType::Map(key_data_type, value_data_type) => {
|
||||
write!(f, "Map({}, {})", key_data_type, value_data_type)
|
||||
}
|
||||
DataType::Tuple(fields) => {
|
||||
write!(f, "Tuple({})", display_comma_separated(fields))
|
||||
}
|
||||
DataType::Nested(fields) => {
|
||||
write!(f, "Nested({})", display_comma_separated(fields))
|
||||
}
|
||||
DataType::Unspecified => Ok(()),
|
||||
}
|
||||
}
|
||||
|
@ -439,6 +589,23 @@ fn format_datetime_precision_and_tz(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn format_clickhouse_datetime_precision_and_timezone(
|
||||
f: &mut fmt::Formatter,
|
||||
sql_type: &'static str,
|
||||
len: &u64,
|
||||
time_zone: &Option<String>,
|
||||
) -> fmt::Result {
|
||||
write!(f, "{sql_type}({len}")?;
|
||||
|
||||
if let Some(time_zone) = time_zone {
|
||||
write!(f, ", '{time_zone}'")?;
|
||||
}
|
||||
|
||||
write!(f, ")")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Timestamp and Time data types information about TimeZone formatting.
|
||||
///
|
||||
/// This is more related to a display information than real differences between each variant. To
|
||||
|
@ -593,4 +760,6 @@ pub enum ArrayElemTypeDef {
|
|||
AngleBracket(Box<DataType>),
|
||||
/// `INT[]` or `INT[2]`
|
||||
SquareBracket(Box<DataType>, Option<u64>),
|
||||
/// `Array(Int64)`
|
||||
Parenthesis(Box<DataType>),
|
||||
}
|
||||
|
|
|
@ -273,7 +273,7 @@ impl fmt::Display for Interval {
|
|||
}
|
||||
}
|
||||
|
||||
/// A field definition within a struct.
|
||||
/// A field definition within a struct
|
||||
///
|
||||
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
|
|
|
@ -202,7 +202,9 @@ define_keywords!(
|
|||
DATA,
|
||||
DATABASE,
|
||||
DATE,
|
||||
DATE32,
|
||||
DATETIME,
|
||||
DATETIME64,
|
||||
DAY,
|
||||
DAYOFWEEK,
|
||||
DAYOFYEAR,
|
||||
|
@ -292,7 +294,9 @@ define_keywords!(
|
|||
FILTER,
|
||||
FIRST,
|
||||
FIRST_VALUE,
|
||||
FIXEDSTRING,
|
||||
FLOAT,
|
||||
FLOAT32,
|
||||
FLOAT4,
|
||||
FLOAT64,
|
||||
FLOAT8,
|
||||
|
@ -362,7 +366,11 @@ define_keywords!(
|
|||
INSERT,
|
||||
INSTALL,
|
||||
INT,
|
||||
INT128,
|
||||
INT16,
|
||||
INT2,
|
||||
INT256,
|
||||
INT32,
|
||||
INT4,
|
||||
INT64,
|
||||
INT8,
|
||||
|
@ -411,6 +419,7 @@ define_keywords!(
|
|||
LOCKED,
|
||||
LOGIN,
|
||||
LOGS,
|
||||
LOWCARDINALITY,
|
||||
LOWER,
|
||||
LOW_PRIORITY,
|
||||
MACRO,
|
||||
|
@ -455,6 +464,7 @@ define_keywords!(
|
|||
NATURAL,
|
||||
NCHAR,
|
||||
NCLOB,
|
||||
NESTED,
|
||||
NEW,
|
||||
NEXT,
|
||||
NO,
|
||||
|
@ -475,6 +485,7 @@ define_keywords!(
|
|||
NTH_VALUE,
|
||||
NTILE,
|
||||
NULL,
|
||||
NULLABLE,
|
||||
NULLIF,
|
||||
NULLS,
|
||||
NUMERIC,
|
||||
|
@ -713,8 +724,15 @@ define_keywords!(
|
|||
TRUE,
|
||||
TRUNCATE,
|
||||
TRY_CAST,
|
||||
TUPLE,
|
||||
TYPE,
|
||||
UESCAPE,
|
||||
UINT128,
|
||||
UINT16,
|
||||
UINT256,
|
||||
UINT32,
|
||||
UINT64,
|
||||
UINT8,
|
||||
UNBOUNDED,
|
||||
UNCACHE,
|
||||
UNCOMMITTED,
|
||||
|
|
|
@ -2099,7 +2099,7 @@ impl<'a> Parser<'a> {
|
|||
/// ```
|
||||
fn parse_bigquery_struct_literal(&mut self) -> Result<Expr, ParserError> {
|
||||
let (fields, trailing_bracket) =
|
||||
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
|
||||
self.parse_struct_type_def(Self::parse_struct_field_def)?;
|
||||
if trailing_bracket.0 {
|
||||
return parser_err!("unmatched > in STRUCT literal", self.peek_token().location);
|
||||
}
|
||||
|
@ -2194,13 +2194,16 @@ impl<'a> Parser<'a> {
|
|||
))
|
||||
}
|
||||
|
||||
/// Parse a field definition in a BigQuery struct.
|
||||
/// Parse a field definition in a struct [1] or tuple [2].
|
||||
/// Syntax:
|
||||
///
|
||||
/// ```sql
|
||||
/// [field_name] field_type
|
||||
/// ```
|
||||
fn parse_big_query_struct_field_def(
|
||||
///
|
||||
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type
|
||||
/// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
|
||||
fn parse_struct_field_def(
|
||||
&mut self,
|
||||
) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
|
||||
// Look beyond the next item to infer whether both field name
|
||||
|
@ -2266,6 +2269,47 @@ impl<'a> Parser<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Parse clickhouse map [1]
|
||||
/// Syntax
|
||||
/// ```sql
|
||||
/// Map(key_data_type, value_data_type)
|
||||
/// ```
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map
|
||||
fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> {
|
||||
self.expect_keyword(Keyword::MAP)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let key_data_type = self.parse_data_type()?;
|
||||
self.expect_token(&Token::Comma)?;
|
||||
let value_data_type = self.parse_data_type()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
Ok((key_data_type, value_data_type))
|
||||
}
|
||||
|
||||
/// Parse clickhouse tuple [1]
|
||||
/// Syntax
|
||||
/// ```sql
|
||||
/// Tuple([field_name] field_type, ...)
|
||||
/// ```
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
|
||||
fn parse_click_house_tuple_def(&mut self) -> Result<Vec<StructField>, ParserError> {
|
||||
self.expect_keyword(Keyword::TUPLE)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let mut field_defs = vec![];
|
||||
loop {
|
||||
let (def, _) = self.parse_struct_field_def()?;
|
||||
field_defs.push(def);
|
||||
if !self.consume_token(&Token::Comma) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.expect_token(&Token::RParen)?;
|
||||
|
||||
Ok(field_defs)
|
||||
}
|
||||
|
||||
/// For nested types that use the angle bracket syntax, this matches either
|
||||
/// `>`, `>>` or nothing depending on which variant is expected (specified by the previously
|
||||
/// matched `trailing_bracket` argument). It returns whether there is a trailing
|
||||
|
@ -6820,6 +6864,7 @@ impl<'a> Parser<'a> {
|
|||
Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
|
||||
Keyword::REAL => Ok(DataType::Real),
|
||||
Keyword::FLOAT4 => Ok(DataType::Float4),
|
||||
Keyword::FLOAT32 => Ok(DataType::Float32),
|
||||
Keyword::FLOAT64 => Ok(DataType::Float64),
|
||||
Keyword::FLOAT8 => Ok(DataType::Float8),
|
||||
Keyword::DOUBLE => {
|
||||
|
@ -6877,7 +6922,19 @@ impl<'a> Parser<'a> {
|
|||
Ok(DataType::Int4(optional_precision?))
|
||||
}
|
||||
}
|
||||
Keyword::INT8 => {
|
||||
let optional_precision = self.parse_optional_precision();
|
||||
if self.parse_keyword(Keyword::UNSIGNED) {
|
||||
Ok(DataType::UnsignedInt8(optional_precision?))
|
||||
} else {
|
||||
Ok(DataType::Int8(optional_precision?))
|
||||
}
|
||||
}
|
||||
Keyword::INT16 => Ok(DataType::Int16),
|
||||
Keyword::INT32 => Ok(DataType::Int32),
|
||||
Keyword::INT64 => Ok(DataType::Int64),
|
||||
Keyword::INT128 => Ok(DataType::Int128),
|
||||
Keyword::INT256 => Ok(DataType::Int256),
|
||||
Keyword::INTEGER => {
|
||||
let optional_precision = self.parse_optional_precision();
|
||||
if self.parse_keyword(Keyword::UNSIGNED) {
|
||||
|
@ -6894,14 +6951,12 @@ impl<'a> Parser<'a> {
|
|||
Ok(DataType::BigInt(optional_precision?))
|
||||
}
|
||||
}
|
||||
Keyword::INT8 => {
|
||||
let optional_precision = self.parse_optional_precision();
|
||||
if self.parse_keyword(Keyword::UNSIGNED) {
|
||||
Ok(DataType::UnsignedInt8(optional_precision?))
|
||||
} else {
|
||||
Ok(DataType::Int8(optional_precision?))
|
||||
}
|
||||
}
|
||||
Keyword::UINT8 => Ok(DataType::UInt8),
|
||||
Keyword::UINT16 => Ok(DataType::UInt16),
|
||||
Keyword::UINT32 => Ok(DataType::UInt32),
|
||||
Keyword::UINT64 => Ok(DataType::UInt64),
|
||||
Keyword::UINT128 => Ok(DataType::UInt128),
|
||||
Keyword::UINT256 => Ok(DataType::UInt256),
|
||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
|
||||
Keyword::NVARCHAR => {
|
||||
Ok(DataType::Nvarchar(self.parse_optional_character_length()?))
|
||||
|
@ -6937,7 +6992,13 @@ impl<'a> Parser<'a> {
|
|||
Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)),
|
||||
Keyword::UUID => Ok(DataType::Uuid),
|
||||
Keyword::DATE => Ok(DataType::Date),
|
||||
Keyword::DATE32 => Ok(DataType::Date32),
|
||||
Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)),
|
||||
Keyword::DATETIME64 => {
|
||||
self.prev_token();
|
||||
let (precision, time_zone) = self.parse_datetime_64()?;
|
||||
Ok(DataType::Datetime64(precision, time_zone))
|
||||
}
|
||||
Keyword::TIMESTAMP => {
|
||||
let precision = self.parse_optional_precision()?;
|
||||
let tz = if self.parse_keyword(Keyword::WITH) {
|
||||
|
@ -6980,6 +7041,12 @@ impl<'a> Parser<'a> {
|
|||
Keyword::JSONB => Ok(DataType::JSONB),
|
||||
Keyword::REGCLASS => Ok(DataType::Regclass),
|
||||
Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)),
|
||||
Keyword::FIXEDSTRING => {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let character_length = self.parse_literal_uint()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(DataType::FixedString(character_length))
|
||||
}
|
||||
Keyword::TEXT => Ok(DataType::Text),
|
||||
Keyword::BYTEA => Ok(DataType::Bytea),
|
||||
Keyword::NUMERIC => Ok(DataType::Numeric(
|
||||
|
@ -7002,6 +7069,10 @@ impl<'a> Parser<'a> {
|
|||
Keyword::ARRAY => {
|
||||
if dialect_of!(self is SnowflakeDialect) {
|
||||
Ok(DataType::Array(ArrayElemTypeDef::None))
|
||||
} else if dialect_of!(self is ClickHouseDialect) {
|
||||
Ok(self.parse_sub_type(|internal_type| {
|
||||
DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type))
|
||||
})?)
|
||||
} else {
|
||||
self.expect_token(&Token::Lt)?;
|
||||
let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?;
|
||||
|
@ -7014,10 +7085,35 @@ impl<'a> Parser<'a> {
|
|||
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
||||
self.prev_token();
|
||||
let (field_defs, _trailing_bracket) =
|
||||
self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?;
|
||||
self.parse_struct_type_def(Self::parse_struct_field_def)?;
|
||||
trailing_bracket = _trailing_bracket;
|
||||
Ok(DataType::Struct(field_defs))
|
||||
}
|
||||
Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
|
||||
Ok(self.parse_sub_type(DataType::Nullable)?)
|
||||
}
|
||||
Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
|
||||
Ok(self.parse_sub_type(DataType::LowCardinality)?)
|
||||
}
|
||||
Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
|
||||
self.prev_token();
|
||||
let (key_data_type, value_data_type) = self.parse_click_house_map_def()?;
|
||||
Ok(DataType::Map(
|
||||
Box::new(key_data_type),
|
||||
Box::new(value_data_type),
|
||||
))
|
||||
}
|
||||
Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let field_defs = self.parse_comma_separated(Parser::parse_column_def)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(DataType::Nested(field_defs))
|
||||
}
|
||||
Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
|
||||
self.prev_token();
|
||||
let field_defs = self.parse_click_house_tuple_def()?;
|
||||
Ok(DataType::Tuple(field_defs))
|
||||
}
|
||||
_ => {
|
||||
self.prev_token();
|
||||
let type_name = self.parse_object_name(false)?;
|
||||
|
@ -7416,6 +7512,26 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse datetime64 [1]
|
||||
/// Syntax
|
||||
/// ```sql
|
||||
/// DateTime64(precision[, timezone])
|
||||
/// ```
|
||||
///
|
||||
/// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64
|
||||
pub fn parse_datetime_64(&mut self) -> Result<(u64, Option<String>), ParserError> {
|
||||
self.expect_keyword(Keyword::DATETIME64)?;
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let precision = self.parse_literal_uint()?;
|
||||
let time_zone = if self.consume_token(&Token::Comma) {
|
||||
Some(self.parse_literal_string()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok((precision, time_zone))
|
||||
}
|
||||
|
||||
pub fn parse_optional_character_length(
|
||||
&mut self,
|
||||
) -> Result<Option<CharacterLength>, ParserError> {
|
||||
|
@ -7508,6 +7624,17 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a parenthesized sub data type
|
||||
fn parse_sub_type<F>(&mut self, parent_type: F) -> Result<DataType, ParserError>
|
||||
where
|
||||
F: FnOnce(Box<DataType>) -> DataType,
|
||||
{
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let inside_type = self.parse_data_type()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(parent_type(inside_type.into()))
|
||||
}
|
||||
|
||||
pub fn parse_delete(&mut self) -> Result<Statement, ParserError> {
|
||||
let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) {
|
||||
// `FROM` keyword is optional in BigQuery SQL.
|
||||
|
|
|
@ -220,6 +220,196 @@ fn parse_create_table() {
|
|||
);
|
||||
}
|
||||
|
||||
fn column_def(name: Ident, data_type: DataType) -> ColumnDef {
|
||||
ColumnDef {
|
||||
name,
|
||||
data_type,
|
||||
collation: None,
|
||||
options: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_clickhouse_data_types() {
|
||||
let sql = concat!(
|
||||
"CREATE TABLE table (",
|
||||
"a1 UInt8, a2 UInt16, a3 UInt32, a4 UInt64, a5 UInt128, a6 UInt256,",
|
||||
" b1 Int8, b2 Int16, b3 Int32, b4 Int64, b5 Int128, b6 Int256,",
|
||||
" c1 Float32, c2 Float64,",
|
||||
" d1 Date32, d2 DateTime64(3), d3 DateTime64(3, 'UTC'),",
|
||||
" e1 FixedString(255),",
|
||||
" f1 LowCardinality(Int32)",
|
||||
") ORDER BY (a1)",
|
||||
);
|
||||
// ClickHouse has a case-sensitive definition of data type, but canonical representation is not
|
||||
let canonical_sql = sql
|
||||
.replace(" Int8", " INT8")
|
||||
.replace(" Int64", " INT64")
|
||||
.replace(" Float64", " FLOAT64");
|
||||
|
||||
match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) {
|
||||
Statement::CreateTable { name, columns, .. } => {
|
||||
assert_eq!(name, ObjectName(vec!["table".into()]));
|
||||
assert_eq!(
|
||||
columns,
|
||||
vec![
|
||||
column_def("a1".into(), DataType::UInt8),
|
||||
column_def("a2".into(), DataType::UInt16),
|
||||
column_def("a3".into(), DataType::UInt32),
|
||||
column_def("a4".into(), DataType::UInt64),
|
||||
column_def("a5".into(), DataType::UInt128),
|
||||
column_def("a6".into(), DataType::UInt256),
|
||||
column_def("b1".into(), DataType::Int8(None)),
|
||||
column_def("b2".into(), DataType::Int16),
|
||||
column_def("b3".into(), DataType::Int32),
|
||||
column_def("b4".into(), DataType::Int64),
|
||||
column_def("b5".into(), DataType::Int128),
|
||||
column_def("b6".into(), DataType::Int256),
|
||||
column_def("c1".into(), DataType::Float32),
|
||||
column_def("c2".into(), DataType::Float64),
|
||||
column_def("d1".into(), DataType::Date32),
|
||||
column_def("d2".into(), DataType::Datetime64(3, None)),
|
||||
column_def("d3".into(), DataType::Datetime64(3, Some("UTC".into()))),
|
||||
column_def("e1".into(), DataType::FixedString(255)),
|
||||
column_def(
|
||||
"f1".into(),
|
||||
DataType::LowCardinality(Box::new(DataType::Int32))
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_table_with_nullable() {
|
||||
let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#;
|
||||
// ClickHouse has a case-sensitive definition of data type, but canonical representation is not
|
||||
let canonical_sql = sql.replace("String", "STRING");
|
||||
|
||||
match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) {
|
||||
Statement::CreateTable { name, columns, .. } => {
|
||||
assert_eq!(name, ObjectName(vec!["table".into()]));
|
||||
assert_eq!(
|
||||
columns,
|
||||
vec![
|
||||
column_def("k".into(), DataType::UInt8),
|
||||
column_def(
|
||||
Ident::with_quote('`', "a"),
|
||||
DataType::Nullable(Box::new(DataType::String(None)))
|
||||
),
|
||||
column_def(
|
||||
Ident::with_quote('`', "b"),
|
||||
DataType::Nullable(Box::new(DataType::Datetime64(
|
||||
9,
|
||||
Some("UTC".to_string())
|
||||
)))
|
||||
),
|
||||
column_def(
|
||||
"c".into(),
|
||||
DataType::Nullable(Box::new(DataType::Datetime64(9, None)))
|
||||
),
|
||||
ColumnDef {
|
||||
name: "d".into(),
|
||||
data_type: DataType::Date32,
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
option: ColumnOption::Null
|
||||
}],
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_table_with_nested_data_types() {
|
||||
let sql = concat!(
|
||||
"CREATE TABLE table (",
|
||||
" i Nested(a Array(Int16), b LowCardinality(String)),",
|
||||
" k Array(Tuple(FixedString(128), Int128)),",
|
||||
" l Tuple(a DateTime64(9), b Array(UUID)),",
|
||||
" m Map(String, UInt16)",
|
||||
") ENGINE=MergeTree ORDER BY (k)"
|
||||
);
|
||||
|
||||
match clickhouse().one_statement_parses_to(sql, "") {
|
||||
Statement::CreateTable { name, columns, .. } => {
|
||||
assert_eq!(name, ObjectName(vec!["table".into()]));
|
||||
assert_eq!(
|
||||
columns,
|
||||
vec![
|
||||
ColumnDef {
|
||||
name: Ident::new("i"),
|
||||
data_type: DataType::Nested(vec![
|
||||
column_def(
|
||||
"a".into(),
|
||||
DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new(
|
||||
DataType::Int16
|
||||
),))
|
||||
),
|
||||
column_def(
|
||||
"b".into(),
|
||||
DataType::LowCardinality(Box::new(DataType::String(None)))
|
||||
)
|
||||
]),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
ColumnDef {
|
||||
name: Ident::new("k"),
|
||||
data_type: DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new(
|
||||
DataType::Tuple(vec![
|
||||
StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::FixedString(128)
|
||||
},
|
||||
StructField {
|
||||
field_name: None,
|
||||
field_type: DataType::Int128
|
||||
}
|
||||
])
|
||||
))),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
ColumnDef {
|
||||
name: Ident::new("l"),
|
||||
data_type: DataType::Tuple(vec![
|
||||
StructField {
|
||||
field_name: Some("a".into()),
|
||||
field_type: DataType::Datetime64(9, None),
|
||||
},
|
||||
StructField {
|
||||
field_name: Some("b".into()),
|
||||
field_type: DataType::Array(ArrayElemTypeDef::Parenthesis(
|
||||
Box::new(DataType::Uuid)
|
||||
))
|
||||
},
|
||||
]),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
ColumnDef {
|
||||
name: Ident::new("m"),
|
||||
data_type: DataType::Map(
|
||||
Box::new(DataType::String(None)),
|
||||
Box::new(DataType::UInt16)
|
||||
),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_view_with_fields_data_types() {
|
||||
match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue