Add support of the ENUM8|ENUM16 for ClickHouse dialect (#1574)

This commit is contained in:
hulk 2024-12-05 22:59:07 +08:00 committed by GitHub
parent c761f0babb
commit dd7ba72a0b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 179 additions and 49 deletions

View file

@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, UnionField};
use super::{value::escape_single_quote_string, ColumnDef};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum EnumMember {
Name(String),
/// ClickHouse allows to specify an integer value for each enum value.
///
/// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
NamedValue(String, Expr),
}
/// SQL data types
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -334,7 +345,7 @@ pub enum DataType {
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
Nested(Vec<ColumnDef>),
/// Enums
Enum(Vec<String>),
Enum(Vec<EnumMember>, Option<u8>),
/// Set
Set(Vec<String>),
/// Struct
@ -546,13 +557,24 @@ impl fmt::Display for DataType {
write!(f, "{}({})", ty, modifiers.join(", "))
}
}
DataType::Enum(vals) => {
write!(f, "ENUM(")?;
DataType::Enum(vals, bits) => {
match bits {
Some(bits) => write!(f, "ENUM{}", bits),
None => write!(f, "ENUM"),
}?;
write!(f, "(")?;
for (i, v) in vals.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
write!(f, "'{}'", escape_single_quote_string(v))?;
match v {
EnumMember::Name(name) => {
write!(f, "'{}'", escape_single_quote_string(name))?
}
EnumMember::NamedValue(name, value) => {
write!(f, "'{}' = {}", escape_single_quote_string(name), value)?
}
}
}
write!(f, ")")
}

View file

@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::tokenizer::Span;
pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};

View file

@ -286,6 +286,8 @@ define_keywords!(
ENFORCED,
ENGINE,
ENUM,
ENUM16,
ENUM8,
EPHEMERAL,
EPOCH,
EQUALS,

View file

@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
| Keyword::CURRENT_USER
| Keyword::SESSION_USER
| Keyword::USER
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
Keyword::CURRENT_TIMESTAMP
| Keyword::CURRENT_TIME
| Keyword::CURRENT_DATE
@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
Keyword::EXISTS
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
self.peek_nth_token(1).token,
Token::Word(Word {
keyword: Keyword::SELECT | Keyword::WITH,
..
})
) =>
{
Ok(Some(self.parse_exists_expr(false)?))
}
{
Ok(Some(self.parse_exists_expr(false)?))
}
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
Ok(Some(self.parse_array_expr(true)?))
}
Keyword::ARRAY
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
Keyword::NOT => Ok(Some(self.parse_not()?)),
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
Ok(Some(self.parse_match_against()?))
@ -5023,7 +5023,7 @@ impl<'a> Parser<'a> {
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
}
}
},
}
};
Ok(owner)
}
@ -7997,6 +7997,23 @@ impl<'a> Parser<'a> {
}
}
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
self.expect_token(&Token::LParen)?;
let values = self.parse_comma_separated(|parser| {
let name = parser.parse_literal_string()?;
let e = if parser.consume_token(&Token::Eq) {
let value = parser.parse_number()?;
EnumMember::NamedValue(name, value)
} else {
EnumMember::Name(name)
};
Ok(e)
})?;
self.expect_token(&Token::RParen)?;
Ok(values)
}
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@ -8235,7 +8252,9 @@ impl<'a> Parser<'a> {
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
self.parse_exact_number_optional_precision_scale()?,
)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {