Add support of the ENUM8|ENUM16 for ClickHouse dialect (#1574)

This commit is contained in:
hulk 2024-12-05 22:59:07 +08:00 committed by GitHub
parent c761f0babb
commit dd7ba72a0b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 179 additions and 49 deletions

View file

@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, UnionField};
use super::{value::escape_single_quote_string, ColumnDef};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum EnumMember {
Name(String),
/// ClickHouse allows to specify an integer value for each enum value.
///
/// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
NamedValue(String, Expr),
}
/// SQL data types
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@ -334,7 +345,7 @@ pub enum DataType {
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
Nested(Vec<ColumnDef>),
/// Enums
Enum(Vec<String>),
Enum(Vec<EnumMember>, Option<u8>),
/// Set
Set(Vec<String>),
/// Struct
@ -546,13 +557,24 @@ impl fmt::Display for DataType {
write!(f, "{}({})", ty, modifiers.join(", "))
}
}
DataType::Enum(vals) => {
write!(f, "ENUM(")?;
DataType::Enum(vals, bits) => {
match bits {
Some(bits) => write!(f, "ENUM{}", bits),
None => write!(f, "ENUM"),
}?;
write!(f, "(")?;
for (i, v) in vals.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
write!(f, "'{}'", escape_single_quote_string(v))?;
match v {
EnumMember::Name(name) => {
write!(f, "'{}'", escape_single_quote_string(name))?
}
EnumMember::NamedValue(name, value) => {
write!(f, "'{}' = {}", escape_single_quote_string(name), value)?
}
}
}
write!(f, ")")
}

View file

@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::tokenizer::Span;
pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};

View file

@ -286,6 +286,8 @@ define_keywords!(
ENFORCED,
ENGINE,
ENUM,
ENUM16,
ENUM8,
EPHEMERAL,
EPOCH,
EQUALS,

View file

@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
| Keyword::CURRENT_USER
| Keyword::SESSION_USER
| Keyword::USER
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
Keyword::CURRENT_TIMESTAMP
| Keyword::CURRENT_TIME
| Keyword::CURRENT_DATE
@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
Keyword::EXISTS
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
self.peek_nth_token(1).token,
Token::Word(Word {
keyword: Keyword::SELECT | Keyword::WITH,
..
})
) =>
{
Ok(Some(self.parse_exists_expr(false)?))
}
{
Ok(Some(self.parse_exists_expr(false)?))
}
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
Ok(Some(self.parse_array_expr(true)?))
}
Keyword::ARRAY
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
Keyword::NOT => Ok(Some(self.parse_not()?)),
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
Ok(Some(self.parse_match_against()?))
@ -5023,7 +5023,7 @@ impl<'a> Parser<'a> {
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
}
}
},
}
};
Ok(owner)
}
@ -7997,6 +7997,23 @@ impl<'a> Parser<'a> {
}
}
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
self.expect_token(&Token::LParen)?;
let values = self.parse_comma_separated(|parser| {
let name = parser.parse_literal_string()?;
let e = if parser.consume_token(&Token::Eq) {
let value = parser.parse_number()?;
EnumMember::NamedValue(name, value)
} else {
EnumMember::Name(name)
};
Ok(e)
})?;
self.expect_token(&Token::RParen)?;
Ok(values)
}
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@ -8235,7 +8252,9 @@ impl<'a> Parser<'a> {
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
self.parse_exact_number_optional_precision_scale()?,
)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {

View file

@ -51,6 +51,7 @@ mod test_utils;
use pretty_assertions::assert_eq;
use sqlparser::ast::ColumnOption::Comment;
use sqlparser::ast::Expr::{Identifier, UnaryOp};
use sqlparser::ast::Value::Number;
use sqlparser::test_utils::all_dialects_except;
#[test]
@ -9250,7 +9251,7 @@ fn parse_cache_table() {
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) {sql}",
)
.as_str()
.as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@ -9275,7 +9276,7 @@ fn parse_cache_table() {
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) AS {sql}",
)
.as_str()
.as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@ -11459,7 +11460,7 @@ fn parse_explain_with_option_list() {
}),
},
];
run_explain_analyze (
run_explain_analyze(
all_dialects_where(|d| d.supports_explain_with_utility_options()),
"EXPLAIN (ANALYZE, VERBOSE true, WAL OFF, FORMAT YAML, USER_DEF_NUM -100.1) SELECT sqrt(id) FROM foo",
false,
@ -12459,3 +12460,83 @@ fn parse_create_table_with_bit_types() {
_ => unreachable!(),
}
}
#[test]
fn parse_create_table_with_enum_types() {
let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))";
match all_dialects().verified_stmt(sql) {
Statement::CreateTable(CreateTable { name, columns, .. }) => {
assert_eq!(name.to_string(), "t0");
assert_eq!(
vec![
ColumnDef {
name: Ident::new("foo"),
data_type: DataType::Enum(
vec![
EnumMember::NamedValue(
"a".to_string(),
Expr::Value(Number("1".parse().unwrap(), false))
),
EnumMember::NamedValue(
"b".to_string(),
Expr::Value(Number("2".parse().unwrap(), false))
)
],
Some(8)
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("bar"),
data_type: DataType::Enum(
vec![
EnumMember::NamedValue(
"a".to_string(),
Expr::Value(Number("1".parse().unwrap(), false))
),
EnumMember::NamedValue(
"b".to_string(),
Expr::Value(Number("2".parse().unwrap(), false))
)
],
Some(16)
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("baz"),
data_type: DataType::Enum(
vec![
EnumMember::Name("a".to_string()),
EnumMember::Name("b".to_string())
],
None
),
collation: None,
options: vec![],
}
],
columns
);
}
_ => unreachable!(),
}
// invalid case missing value for enum pair
assert_eq!(
all_dialects()
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))")
.unwrap_err(),
ParserError::ParserError("Expected: a value, found: )".to_string())
);
// invalid case that name is not a string
assert_eq!(
all_dialects()
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
.unwrap_err(),
ParserError::ParserError("Expected: literal string, found: 2".to_string())
);
}

View file

@ -685,7 +685,7 @@ fn table_constraint_unique_primary_ctor(
#[test]
fn parse_create_table_primary_and_unique_key() {
let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
.map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
let index_type_display = [Some(KeyOrIndexDisplay::Key), None];
@ -753,7 +753,7 @@ fn parse_create_table_primary_and_unique_key() {
#[test]
fn parse_create_table_primary_and_unique_key_with_index_options() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
.map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
let index_type_display = [Some(KeyOrIndexDisplay::Index), None];
@ -827,7 +827,7 @@ fn parse_create_table_primary_and_unique_key_with_index_type() {
#[test]
fn parse_create_table_primary_and_unique_key_characteristic_test() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
.map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
for sql in &sqls {
mysql_and_generic().verified_stmt(sql);
}
@ -890,7 +890,13 @@ fn parse_create_table_set_enum() {
},
ColumnDef {
name: Ident::new("baz"),
data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]),
data_type: DataType::Enum(
vec![
EnumMember::Name("a".to_string()),
EnumMember::Name("b".to_string())
],
None
),
collation: None,
options: vec![],
}