Add support of FORMAT clause for ClickHouse parser (#1335)

This commit is contained in:
hulk 2024-07-09 19:49:04 +08:00 committed by GitHub
parent 9f60eb1571
commit 07278952f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 114 additions and 8 deletions

View file

@ -43,14 +43,14 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint,
JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr,
NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem,
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table,
TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode,
Values, WildcardAdditionalOptions, With,
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join,
JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView,
LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr,
PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement,
ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator,
SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion,
TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,

View file

@ -54,6 +54,11 @@ pub struct Query {
///
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query)
pub settings: Option<Vec<Setting>>,
/// `SELECT * FROM t FORMAT JSONCompact`
///
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format)
/// (ClickHouse-specific)
pub format_clause: Option<FormatClause>,
}
impl fmt::Display for Query {
@ -86,6 +91,9 @@ impl fmt::Display for Query {
if let Some(ref for_clause) = self.for_clause {
write!(f, " {}", for_clause)?;
}
if let Some(ref format) = self.format_clause {
write!(f, " {}", format)?;
}
Ok(())
}
}
@ -1959,6 +1967,26 @@ impl fmt::Display for GroupByExpr {
}
}
/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse.
///
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/format>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum FormatClause {
Identifier(Ident),
Null,
}
impl fmt::Display for FormatClause {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident),
FormatClause::Null => write!(f, "FORMAT NULL"),
}
}
}
/// FOR XML or FOR JSON clause, specific to MSSQL
/// (formats the output of a query as XML or JSON)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

View file

@ -856,6 +856,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::PREWHERE,
// for ClickHouse SELECT * FROM t SETTINGS ...
Keyword::SETTINGS,
// for ClickHouse SELECT * FROM t FORMAT...
Keyword::FORMAT,
// for Snowflake START WITH .. CONNECT BY
Keyword::START,
Keyword::CONNECT,

View file

@ -7918,6 +7918,7 @@ impl<'a> Parser<'a> {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})
} else if self.parse_keyword(Keyword::UPDATE) {
Ok(Query {
@ -7931,6 +7932,7 @@ impl<'a> Parser<'a> {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})
} else {
let body = self.parse_boxed_query_body(0)?;
@ -8006,6 +8008,18 @@ impl<'a> Parser<'a> {
locks.push(self.parse_lock()?);
}
}
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect)
&& self.parse_keyword(Keyword::FORMAT)
{
if self.parse_keyword(Keyword::NULL) {
Some(FormatClause::Null)
} else {
let ident = self.parse_identifier(false)?;
Some(FormatClause::Identifier(ident))
}
} else {
None
};
Ok(Query {
with,
@ -8018,6 +8032,7 @@ impl<'a> Parser<'a> {
locks,
for_clause,
settings,
format_clause,
})
}
}
@ -9164,6 +9179,7 @@ impl<'a> Parser<'a> {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}),
alias,
})

View file

@ -768,6 +768,38 @@ fn test_prewhere() {
}
}
#[test]
fn test_query_with_format_clause() {
let format_options = vec!["TabSeparated", "JSONCompact", "NULL"];
for format in &format_options {
let sql = format!("SELECT * FROM t FORMAT {}", format);
match clickhouse_and_generic().verified_stmt(&sql) {
Statement::Query(query) => {
if *format == "NULL" {
assert_eq!(query.format_clause, Some(FormatClause::Null));
} else {
assert_eq!(
query.format_clause,
Some(FormatClause::Identifier(Ident::new(*format)))
);
}
}
_ => unreachable!(),
}
}
let invalid_cases = [
"SELECT * FROM t FORMAT",
"SELECT * FROM t FORMAT TabSeparated JSONCompact",
"SELECT * FROM t FORMAT TabSeparated TabSeparated",
];
for sql in &invalid_cases {
clickhouse_and_generic()
.parse_sql_statements(sql)
.expect_err("Expected: FORMAT {identifier}, found: ");
}
}
fn clickhouse() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(ClickHouseDialect {})],

View file

@ -415,6 +415,7 @@ fn parse_update_set_from() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}),
alias: Some(TableAlias {
name: Ident::new("t2"),
@ -3430,6 +3431,7 @@ fn parse_create_table_as_table() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
});
match verified_stmt(sql1) {
@ -3456,6 +3458,7 @@ fn parse_create_table_as_table() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
});
match verified_stmt(sql2) {
@ -5003,6 +5006,7 @@ fn parse_interval_and_or_xor() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))];
assert_eq!(actual_ast, expected_ast);
@ -7659,6 +7663,7 @@ fn parse_merge() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}),
alias: Some(TableAlias {
name: Ident {
@ -9180,6 +9185,7 @@ fn parse_unload() {
for_clause: None,
order_by: vec![],
settings: None,
format_clause: None,
}),
to: Ident {
value: "s3://...".to_string(),

View file

@ -104,6 +104,7 @@ fn parse_create_procedure() {
for_clause: None,
order_by: vec![],
settings: None,
format_clause: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: None,
top: None,
@ -550,6 +551,7 @@ fn parse_substring_in_select() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}),
query
);

View file

@ -927,6 +927,7 @@ fn parse_escaped_quote_identifiers_with_escape() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
);
}
@ -976,6 +977,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
);
}
@ -1022,6 +1024,7 @@ fn parse_escaped_backticks_with_escape() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
);
}
@ -1068,6 +1071,7 @@ fn parse_escaped_backticks_with_no_escape() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
);
}
@ -1273,6 +1277,7 @@ fn parse_simple_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1316,6 +1321,7 @@ fn parse_ignore_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1359,6 +1365,7 @@ fn parse_priority_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1399,6 +1406,7 @@ fn parse_priority_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1447,6 +1455,7 @@ fn parse_insert_as() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1507,6 +1516,7 @@ fn parse_insert_as() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1551,6 +1561,7 @@ fn parse_replace_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1589,6 +1600,7 @@ fn parse_empty_row_insert() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -1650,6 +1662,7 @@ fn parse_insert_with_on_duplicate_update() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
source
);
@ -2294,6 +2307,7 @@ fn parse_substring_in_select() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}),
query
);
@ -2601,6 +2615,7 @@ fn parse_hex_string_introducer() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
}))
)
}

View file

@ -1095,6 +1095,7 @@ fn parse_copy_to() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
to: true,
target: CopyTarget::File {
@ -2426,6 +2427,7 @@ fn parse_array_subquery_expr() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
filter: None,
null_treatment: None,
@ -4050,6 +4052,7 @@ fn test_simple_postgres_insert_with_alias() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
partitioned: None,
after_columns: vec![],
@ -4118,6 +4121,7 @@ fn test_simple_postgres_insert_with_alias() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
partitioned: None,
after_columns: vec![],
@ -4182,6 +4186,7 @@ fn test_simple_insert_with_quoted_alias() {
locks: vec![],
for_clause: None,
settings: None,
format_clause: None,
})),
partitioned: None,
after_columns: vec![],