feat: Add support for MSSQL table options (#1414)

This commit is contained in:
Simon Sawert 2024-09-11 19:09:41 +02:00 committed by GitHub
parent cb0c511b05
commit b9e7754886
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 523 additions and 65 deletions

View file

@ -2082,6 +2082,15 @@ pub enum CreateTableOptions {
/// e.g. `WITH (description = "123")`
///
/// <https://www.postgresql.org/docs/current/sql-createtable.html>
///
/// MSSQL supports more specific options that's not only key-value pairs.
///
/// WITH (
/// DISTRIBUTION = ROUND_ROBIN,
/// CLUSTERED INDEX (column_a DESC, column_b)
/// )
///
/// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#syntax>
With(Vec<SqlOption>),
/// Options specified using the `OPTIONS` keyword.
/// e.g. `OPTIONS(description = "123")`
@ -5728,14 +5737,119 @@ pub struct HiveFormat {
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct SqlOption {
pub struct ClusteredIndex {
pub name: Ident,
pub value: Expr,
pub asc: Option<bool>,
}
impl fmt::Display for ClusteredIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.name)?;
match self.asc {
Some(true) => write!(f, " ASC"),
Some(false) => write!(f, " DESC"),
_ => Ok(()),
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum TableOptionsClustered {
ColumnstoreIndex,
ColumnstoreIndexOrder(Vec<Ident>),
Index(Vec<ClusteredIndex>),
}
impl fmt::Display for TableOptionsClustered {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TableOptionsClustered::ColumnstoreIndex => {
write!(f, "CLUSTERED COLUMNSTORE INDEX")
}
TableOptionsClustered::ColumnstoreIndexOrder(values) => {
write!(
f,
"CLUSTERED COLUMNSTORE INDEX ORDER ({})",
display_comma_separated(values)
)
}
TableOptionsClustered::Index(values) => {
write!(f, "CLUSTERED INDEX ({})", display_comma_separated(values))
}
}
}
}
/// Specifies which partition the boundary values on table partitioning belongs to.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum PartitionRangeDirection {
Left,
Right,
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum SqlOption {
/// Clustered represents the clustered version of table storage for MSSQL.
///
/// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TableOptions>
Clustered(TableOptionsClustered),
/// Single identifier options, e.g. `HEAP` for MSSQL.
///
/// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TableOptions>
Ident(Ident),
/// Any option that consists of a key value pair where the value is an expression. e.g.
///
/// WITH(DISTRIBUTION = ROUND_ROBIN)
KeyValue { key: Ident, value: Expr },
/// One or more table partitions and represents which partition the boundary values belong to,
/// e.g.
///
/// PARTITION (id RANGE LEFT FOR VALUES (10, 20, 30, 40))
///
/// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TablePartitionOptions>
Partition {
column_name: Ident,
range_direction: Option<PartitionRangeDirection>,
for_values: Vec<Expr>,
},
}
impl fmt::Display for SqlOption {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} = {}", self.name, self.value)
match self {
SqlOption::Clustered(c) => write!(f, "{}", c),
SqlOption::Ident(ident) => {
write!(f, "{}", ident)
}
SqlOption::KeyValue { key: name, value } => {
write!(f, "{} = {}", name, value)
}
SqlOption::Partition {
column_name,
range_direction,
for_values,
} => {
let direction = match range_direction {
Some(PartitionRangeDirection::Left) => " LEFT",
Some(PartitionRangeDirection::Right) => " RIGHT",
None => "",
};
write!(
f,
"PARTITION ({} RANGE{} FOR VALUES ({}))",
column_name,
direction,
display_comma_separated(for_values)
)
}
}
}
}

View file

@ -166,6 +166,7 @@ define_keywords!(
COLLECTION,
COLUMN,
COLUMNS,
COLUMNSTORE,
COMMENT,
COMMIT,
COMMITTED,
@ -355,6 +356,7 @@ define_keywords!(
HASH,
HAVING,
HEADER,
HEAP,
HIGH_PRIORITY,
HISTORY,
HIVEVAR,

View file

@ -6480,10 +6480,91 @@ impl<'a> Parser<'a> {
}
pub fn parse_sql_option(&mut self) -> Result<SqlOption, ParserError> {
let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect);
match self.peek_token().token {
Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => {
Ok(SqlOption::Ident(self.parse_identifier(false)?))
}
Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => {
self.parse_option_partition()
}
Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => {
self.parse_option_clustered()
}
_ => {
let name = self.parse_identifier(false)?;
self.expect_token(&Token::Eq)?;
let value = self.parse_expr()?;
Ok(SqlOption { name, value })
Ok(SqlOption::KeyValue { key: name, value })
}
}
}
pub fn parse_option_clustered(&mut self) -> Result<SqlOption, ParserError> {
if self.parse_keywords(&[
Keyword::CLUSTERED,
Keyword::COLUMNSTORE,
Keyword::INDEX,
Keyword::ORDER,
]) {
Ok(SqlOption::Clustered(
TableOptionsClustered::ColumnstoreIndexOrder(
self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?,
),
))
} else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) {
Ok(SqlOption::Clustered(
TableOptionsClustered::ColumnstoreIndex,
))
} else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) {
self.expect_token(&Token::LParen)?;
let columns = self.parse_comma_separated(|p| {
let name = p.parse_identifier(false)?;
let asc = p.parse_asc_desc();
Ok(ClusteredIndex { name, asc })
})?;
self.expect_token(&Token::RParen)?;
Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns)))
} else {
Err(ParserError::ParserError(
"invalid CLUSTERED sequence".to_string(),
))
}
}
pub fn parse_option_partition(&mut self) -> Result<SqlOption, ParserError> {
self.expect_keyword(Keyword::PARTITION)?;
self.expect_token(&Token::LParen)?;
let column_name = self.parse_identifier(false)?;
self.expect_keyword(Keyword::RANGE)?;
let range_direction = if self.parse_keyword(Keyword::LEFT) {
Some(PartitionRangeDirection::Left)
} else if self.parse_keyword(Keyword::RIGHT) {
Some(PartitionRangeDirection::Right)
} else {
None
};
self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?;
self.expect_token(&Token::LParen)?;
let for_values = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
self.expect_token(&Token::RParen)?;
Ok(SqlOption::Partition {
column_name,
range_direction,
for_values,
})
}
pub fn parse_partition(&mut self) -> Result<Partition, ParserError> {
@ -11014,17 +11095,23 @@ impl<'a> Parser<'a> {
})
}
/// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
let expr = self.parse_expr()?;
let asc = if self.parse_keyword(Keyword::ASC) {
/// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of
/// them.
pub fn parse_asc_desc(&mut self) -> Option<bool> {
if self.parse_keyword(Keyword::ASC) {
Some(true)
} else if self.parse_keyword(Keyword::DESC) {
Some(false)
} else {
None
};
}
}
/// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
let expr = self.parse_expr()?;
let asc = self.parse_asc_desc();
let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) {
Some(true)

View file

@ -267,8 +267,8 @@ fn parse_create_view_with_options() {
ViewColumnDef {
name: Ident::new("age"),
data_type: None,
options: Some(vec![SqlOption {
name: Ident::new("description"),
options: Some(vec![SqlOption::KeyValue {
key: Ident::new("description"),
value: Expr::Value(Value::DoubleQuotedString("field age".to_string())),
}])
},
@ -287,8 +287,8 @@ fn parse_create_view_with_options() {
unreachable!()
};
assert_eq!(
&SqlOption {
name: Ident::new("description"),
&SqlOption::KeyValue {
key: Ident::new("description"),
value: Expr::Value(Value::DoubleQuotedString(
"a view that expires in 2 days".to_string()
)),
@ -414,8 +414,8 @@ fn parse_create_table_with_options() {
},
ColumnOptionDef {
name: None,
option: ColumnOption::Options(vec![SqlOption {
name: Ident::new("description"),
option: ColumnOption::Options(vec![SqlOption::KeyValue {
key: Ident::new("description"),
value: Expr::Value(Value::DoubleQuotedString(
"field x".to_string()
)),
@ -429,8 +429,8 @@ fn parse_create_table_with_options() {
collation: None,
options: vec![ColumnOptionDef {
name: None,
option: ColumnOption::Options(vec![SqlOption {
name: Ident::new("description"),
option: ColumnOption::Options(vec![SqlOption::KeyValue {
key: Ident::new("description"),
value: Expr::Value(Value::DoubleQuotedString(
"field y".to_string()
)),
@ -448,12 +448,12 @@ fn parse_create_table_with_options() {
Ident::new("age"),
])),
Some(vec![
SqlOption {
name: Ident::new("partition_expiration_days"),
SqlOption::KeyValue {
key: Ident::new("partition_expiration_days"),
value: Expr::Value(number("1")),
},
SqlOption {
name: Ident::new("description"),
SqlOption::KeyValue {
key: Ident::new("description"),
value: Expr::Value(Value::DoubleQuotedString(
"table option description".to_string()
)),
@ -2005,8 +2005,8 @@ fn test_bigquery_create_function() {
function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value(number(
"42"
)))),
options: Some(vec![SqlOption {
name: Ident::new("x"),
options: Some(vec![SqlOption::KeyValue {
key: Ident::new("x"),
value: Expr::Value(Value::SingleQuotedString("y".into())),
}]),
behavior: None,

View file

@ -3637,12 +3637,12 @@ fn parse_create_table_with_options() {
Statement::CreateTable(CreateTable { with_options, .. }) => {
assert_eq!(
vec![
SqlOption {
name: "foo".into(),
SqlOption::KeyValue {
key: "foo".into(),
value: Expr::Value(Value::SingleQuotedString("bar".into())),
},
SqlOption {
name: "a".into(),
SqlOption::KeyValue {
key: "a".into(),
value: Expr::Value(number("123")),
},
],
@ -3870,8 +3870,8 @@ fn parse_alter_table() {
AlterTableOperation::SetTblProperties { table_properties } => {
assert_eq!(
table_properties,
[SqlOption {
name: Ident {
[SqlOption::KeyValue {
key: Ident {
value: "classification".to_string(),
quote_style: Some('\'')
},
@ -3958,12 +3958,12 @@ fn parse_alter_view_with_options() {
Statement::AlterView { with_options, .. } => {
assert_eq!(
vec![
SqlOption {
name: "foo".into(),
SqlOption::KeyValue {
key: "foo".into(),
value: Expr::Value(Value::SingleQuotedString("bar".into())),
},
SqlOption {
name: "a".into(),
SqlOption::KeyValue {
key: "a".into(),
value: Expr::Value(number("123")),
},
],
@ -6729,12 +6729,12 @@ fn parse_create_view_with_options() {
Statement::CreateView { options, .. } => {
assert_eq!(
CreateTableOptions::With(vec![
SqlOption {
name: "foo".into(),
SqlOption::KeyValue {
key: "foo".into(),
value: Expr::Value(Value::SingleQuotedString("bar".into())),
},
SqlOption {
name: "a".into(),
SqlOption::KeyValue {
key: "a".into(),
value: Expr::Value(number("123")),
},
]),
@ -8827,12 +8827,12 @@ fn parse_cache_table() {
table_name: ObjectName(vec![Ident::with_quote('\'', cache_table_name)]),
has_as: false,
options: vec![
SqlOption {
name: Ident::with_quote('\'', "K1"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K1"),
value: Expr::Value(Value::SingleQuotedString("V1".into())),
},
SqlOption {
name: Ident::with_quote('\'', "K2"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K2"),
value: Expr::Value(number("0.88")),
},
],
@ -8852,12 +8852,12 @@ fn parse_cache_table() {
table_name: ObjectName(vec![Ident::with_quote('\'', cache_table_name)]),
has_as: false,
options: vec![
SqlOption {
name: Ident::with_quote('\'', "K1"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K1"),
value: Expr::Value(Value::SingleQuotedString("V1".into())),
},
SqlOption {
name: Ident::with_quote('\'', "K2"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K2"),
value: Expr::Value(number("0.88")),
},
],
@ -8877,12 +8877,12 @@ fn parse_cache_table() {
table_name: ObjectName(vec![Ident::with_quote('\'', cache_table_name)]),
has_as: true,
options: vec![
SqlOption {
name: Ident::with_quote('\'', "K1"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K1"),
value: Expr::Value(Value::SingleQuotedString("V1".into())),
},
SqlOption {
name: Ident::with_quote('\'', "K2"),
SqlOption::KeyValue {
key: Ident::with_quote('\'', "K2"),
value: Expr::Value(number("0.88")),
},
],
@ -9695,8 +9695,8 @@ fn parse_unload() {
value: "s3://...".to_string(),
quote_style: Some('\'')
},
with: vec![SqlOption {
name: Ident {
with: vec![SqlOption::KeyValue {
key: Ident {
value: "format".to_string(),
quote_style: None
},

View file

@ -653,6 +653,261 @@ fn parse_use() {
}
}
#[test]
fn parse_create_table_with_valid_options() {
let options = [
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (DISTRIBUTION = ROUND_ROBIN, PARTITION (column_a RANGE FOR VALUES (10, 11)))",
vec![
SqlOption::KeyValue {
key: Ident {
value: "DISTRIBUTION".to_string(),
quote_style: None,
},
value: Expr::Identifier(Ident {
value: "ROUND_ROBIN".to_string(),
quote_style: None,
})
},
SqlOption::Partition {
column_name: "column_a".into(),
range_direction: None,
for_values: vec![Expr::Value(test_utils::number("10")), Expr::Value(test_utils::number("11"))] ,
},
],
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (PARTITION (column_a RANGE LEFT FOR VALUES (10, 11)))",
vec![
SqlOption::Partition {
column_name: "column_a".into(),
range_direction: Some(PartitionRangeDirection::Left),
for_values: vec![
Expr::Value(test_utils::number("10")),
Expr::Value(test_utils::number("11")),
],
}
],
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (CLUSTERED COLUMNSTORE INDEX)",
vec![SqlOption::Clustered(TableOptionsClustered::ColumnstoreIndex)],
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (CLUSTERED COLUMNSTORE INDEX ORDER (column_a, column_b))",
vec![
SqlOption::Clustered(TableOptionsClustered::ColumnstoreIndexOrder(vec![
"column_a".into(),
"column_b".into(),
]))
],
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (CLUSTERED INDEX (column_a ASC, column_b DESC, column_c))",
vec![
SqlOption::Clustered(TableOptionsClustered::Index(vec![
ClusteredIndex {
name: Ident {
value: "column_a".to_string(),
quote_style: None,
},
asc: Some(true),
},
ClusteredIndex {
name: Ident {
value: "column_b".to_string(),
quote_style: None,
},
asc: Some(false),
},
ClusteredIndex {
name: Ident {
value: "column_c".to_string(),
quote_style: None,
},
asc: None,
},
]))
],
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (DISTRIBUTION = HASH(column_a, column_b), HEAP)",
vec![
SqlOption::KeyValue {
key: Ident {
value: "DISTRIBUTION".to_string(),
quote_style: None,
},
value: Expr::Function(
Function {
name: ObjectName(
vec![
Ident {
value: "HASH".to_string(),
quote_style: None,
},
],
),
parameters: FunctionArguments::None,
args: FunctionArguments::List(
FunctionArgumentList {
duplicate_treatment: None,
args: vec![
FunctionArg::Unnamed(
FunctionArgExpr::Expr(
Expr::Identifier(
Ident {
value: "column_a".to_string(),
quote_style: None,
},
),
),
),
FunctionArg::Unnamed(
FunctionArgExpr::Expr(
Expr::Identifier(
Ident {
value: "column_b".to_string(),
quote_style: None,
},
),
),
),
],
clauses: vec![],
},
),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
},
),
},
SqlOption::Ident("HEAP".into()),
],
),
];
for (sql, with_options) in options {
assert_eq!(
ms_and_generic().verified_stmt(sql),
Statement::CreateTable(CreateTable {
or_replace: false,
temporary: false,
external: false,
global: None,
if_not_exists: false,
transient: false,
volatile: false,
name: ObjectName(vec![Ident {
value: "mytable".to_string(),
quote_style: None,
},],),
columns: vec![
ColumnDef {
name: Ident {
value: "column_a".to_string(),
quote_style: None,
},
data_type: Int(None,),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident {
value: "column_b".to_string(),
quote_style: None,
},
data_type: Int(None,),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident {
value: "column_c".to_string(),
quote_style: None,
},
data_type: Int(None,),
collation: None,
options: vec![],
},
],
constraints: vec![],
hive_distribution: HiveDistributionStyle::NONE,
hive_formats: Some(HiveFormat {
row_format: None,
serde_properties: None,
storage: None,
location: None,
},),
table_properties: vec![],
with_options,
file_format: None,
location: None,
query: None,
without_rowid: false,
like: None,
clone: None,
engine: None,
comment: None,
auto_increment_offset: None,
default_charset: None,
collation: None,
on_commit: None,
on_cluster: None,
primary_key: None,
order_by: None,
partition_by: None,
cluster_by: None,
clustered_by: None,
options: None,
strict: false,
copy_grants: false,
enable_schema_evolution: None,
change_tracking: None,
data_retention_time_in_days: None,
max_data_extension_time_in_days: None,
default_ddl_collation: None,
with_aggregation_policy: None,
with_row_access_policy: None,
with_tags: None,
})
);
}
}
#[test]
fn parse_create_table_with_invalid_options() {
let invalid_cases = vec![
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (CLUSTERED COLUMNSTORE INDEX ORDER ())",
"Expected: identifier, found: )",
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (CLUSTERED COLUMNSTORE)",
"invalid CLUSTERED sequence",
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (HEAP INDEX)",
"Expected: ), found: INDEX",
),
(
"CREATE TABLE mytable (column_a INT, column_b INT, column_c INT) WITH (PARTITION (RANGE LEFT FOR VALUES (10, 11)))",
"Expected: RANGE, found: LEFT",
),
];
for (sql, expected_error) in invalid_cases {
let res = ms_and_generic().parse_sql_statements(sql);
assert_eq!(
format!("sql parser error: {expected_error}"),
res.unwrap_err().to_string()
);
}
}
fn ms() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(MsSqlDialect {})],

View file

@ -461,16 +461,16 @@ fn parse_create_table_with_defaults() {
assert_eq!(
with_options,
vec![
SqlOption {
name: "fillfactor".into(),
SqlOption::KeyValue {
key: "fillfactor".into(),
value: Expr::Value(number("20"))
},
SqlOption {
name: "user_catalog_table".into(),
SqlOption::KeyValue {
key: "user_catalog_table".into(),
value: Expr::Value(Value::Boolean(true))
},
SqlOption {
name: "autovacuum_vacuum_threshold".into(),
SqlOption::KeyValue {
key: "autovacuum_vacuum_threshold".into(),
value: Expr::Value(number("100"))
},
]
@ -4482,12 +4482,12 @@ fn parse_create_table_with_options() {
Statement::CreateTable(CreateTable { with_options, .. }) => {
assert_eq!(
vec![
SqlOption {
name: "foo".into(),
SqlOption::KeyValue {
key: "foo".into(),
value: Expr::Value(Value::SingleQuotedString("bar".into())),
},
SqlOption {
name: "a".into(),
SqlOption::KeyValue {
key: "a".into(),
value: Expr::Value(number("123")),
},
],