mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-04 06:18:17 +00:00
Add support for ClickHouse FORMAT
on INSERT
(#1628)
This commit is contained in:
parent
b09514e492
commit
0c3b6c0974
10 changed files with 157 additions and 38 deletions
|
@ -32,11 +32,11 @@ use sqlparser_derive::{Visit, VisitMut};
|
||||||
pub use super::ddl::{ColumnDef, TableConstraint};
|
pub use super::ddl::{ColumnDef, TableConstraint};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr,
|
display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy,
|
||||||
FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
|
CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat,
|
||||||
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
|
HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert,
|
||||||
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
|
OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption,
|
||||||
TableObject, TableWithJoins, Tag, WrappedCollection,
|
SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// CREATE INDEX statement.
|
/// CREATE INDEX statement.
|
||||||
|
@ -497,6 +497,19 @@ pub struct Insert {
|
||||||
pub priority: Option<MysqlInsertPriority>,
|
pub priority: Option<MysqlInsertPriority>,
|
||||||
/// Only for mysql
|
/// Only for mysql
|
||||||
pub insert_alias: Option<InsertAliases>,
|
pub insert_alias: Option<InsertAliases>,
|
||||||
|
/// Settings used for ClickHouse.
|
||||||
|
///
|
||||||
|
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
|
||||||
|
///
|
||||||
|
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
|
||||||
|
pub settings: Option<Vec<Setting>>,
|
||||||
|
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
|
||||||
|
/// `JSON`, `JSONAsString`, `LineAsString` and more.
|
||||||
|
///
|
||||||
|
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
|
||||||
|
///
|
||||||
|
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
|
||||||
|
pub format_clause: Option<InputFormatClause>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Insert {
|
impl Display for Insert {
|
||||||
|
@ -545,12 +558,18 @@ impl Display for Insert {
|
||||||
write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
|
write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(settings) = &self.settings {
|
||||||
|
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(source) = &self.source {
|
if let Some(source) = &self.source {
|
||||||
write!(f, "{source}")?;
|
write!(f, "{source}")?;
|
||||||
} else if !self.assignments.is_empty() {
|
} else if !self.assignments.is_empty() {
|
||||||
write!(f, "SET ")?;
|
write!(f, "SET ")?;
|
||||||
write!(f, "{}", display_comma_separated(&self.assignments))?;
|
write!(f, "{}", display_comma_separated(&self.assignments))?;
|
||||||
} else if self.source.is_none() && self.columns.is_empty() {
|
} else if let Some(format_clause) = &self.format_clause {
|
||||||
|
write!(f, "{format_clause}")?;
|
||||||
|
} else if self.columns.is_empty() {
|
||||||
write!(f, "DEFAULT VALUES")?;
|
write!(f, "DEFAULT VALUES")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -61,10 +61,10 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
|
||||||
pub use self::query::{
|
pub use self::query::{
|
||||||
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
|
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
|
||||||
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
|
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
|
||||||
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate,
|
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem,
|
||||||
InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn,
|
InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator,
|
||||||
JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, LateralView,
|
JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn,
|
||||||
LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
|
LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
|
||||||
NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn,
|
NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn,
|
||||||
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
|
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
|
||||||
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
|
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
|
||||||
|
|
|
@ -2480,6 +2480,29 @@ impl fmt::Display for FormatClause {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// FORMAT identifier in input context, specific to ClickHouse.
|
||||||
|
///
|
||||||
|
/// [ClickHouse]: <https://clickhouse.com/docs/en/interfaces/formats>
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub struct InputFormatClause {
|
||||||
|
pub ident: Ident,
|
||||||
|
pub values: Vec<Expr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for InputFormatClause {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "FORMAT {}", self.ident)?;
|
||||||
|
|
||||||
|
if !self.values.is_empty() {
|
||||||
|
write!(f, " {}", display_comma_separated(self.values.as_slice()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// FOR XML or FOR JSON clause, specific to MSSQL
|
/// FOR XML or FOR JSON clause, specific to MSSQL
|
||||||
/// (formats the output of a query as XML or JSON)
|
/// (formats the output of a query as XML or JSON)
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
|
|
@ -1156,6 +1156,8 @@ impl Spanned for Insert {
|
||||||
priority: _, // todo, mysql specific
|
priority: _, // todo, mysql specific
|
||||||
insert_alias: _, // todo, mysql specific
|
insert_alias: _, // todo, mysql specific
|
||||||
assignments,
|
assignments,
|
||||||
|
settings: _, // todo, clickhouse specific
|
||||||
|
format_clause: _, // todo, clickhouse specific
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
union_spans(
|
union_spans(
|
||||||
|
|
|
@ -54,4 +54,16 @@ impl Dialect for ClickHouseDialect {
|
||||||
fn supports_insert_table_function(&self) -> bool {
|
fn supports_insert_table_function(&self) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_insert_format(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
|
||||||
|
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
|
||||||
|
//
|
||||||
|
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
|
||||||
|
fn supports_dictionary_syntax(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -797,6 +797,11 @@ pub trait Dialect: Debug + Any {
|
||||||
fn supports_insert_table_function(&self) -> bool {
|
fn supports_insert_table_function(&self) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT <format>`
|
||||||
|
fn supports_insert_format(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This represents the operators for which precedence must be defined
|
/// This represents the operators for which precedence must be defined
|
||||||
|
|
|
@ -949,9 +949,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
|
||||||
Keyword::PARTITION,
|
Keyword::PARTITION,
|
||||||
// for Clickhouse PREWHERE
|
// for Clickhouse PREWHERE
|
||||||
Keyword::PREWHERE,
|
Keyword::PREWHERE,
|
||||||
// for ClickHouse SELECT * FROM t SETTINGS ...
|
|
||||||
Keyword::SETTINGS,
|
Keyword::SETTINGS,
|
||||||
// for ClickHouse SELECT * FROM t FORMAT...
|
|
||||||
Keyword::FORMAT,
|
Keyword::FORMAT,
|
||||||
// for Snowflake START WITH .. CONNECT BY
|
// for Snowflake START WITH .. CONNECT BY
|
||||||
Keyword::START,
|
Keyword::START,
|
||||||
|
|
|
@ -12033,35 +12033,55 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
let is_mysql = dialect_of!(self is MySqlDialect);
|
let is_mysql = dialect_of!(self is MySqlDialect);
|
||||||
|
|
||||||
let (columns, partitioned, after_columns, source, assignments) =
|
let (columns, partitioned, after_columns, source, assignments) = if self
|
||||||
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
|
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
|
||||||
(vec![], None, vec![], None, vec![])
|
{
|
||||||
} else {
|
(vec![], None, vec![], None, vec![])
|
||||||
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
|
} else {
|
||||||
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
|
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
|
||||||
|
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
|
||||||
|
|
||||||
let partitioned = self.parse_insert_partition()?;
|
let partitioned = self.parse_insert_partition()?;
|
||||||
// Hive allows you to specify columns after partitions as well if you want.
|
// Hive allows you to specify columns after partitions as well if you want.
|
||||||
let after_columns = if dialect_of!(self is HiveDialect) {
|
let after_columns = if dialect_of!(self is HiveDialect) {
|
||||||
self.parse_parenthesized_column_list(Optional, false)?
|
self.parse_parenthesized_column_list(Optional, false)?
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
(columns, partitioned, after_columns)
|
|
||||||
} else {
|
} else {
|
||||||
Default::default()
|
vec![]
|
||||||
};
|
};
|
||||||
|
(columns, partitioned, after_columns)
|
||||||
let (source, assignments) =
|
} else {
|
||||||
if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
|
Default::default()
|
||||||
(None, self.parse_comma_separated(Parser::parse_assignment)?)
|
|
||||||
} else {
|
|
||||||
(Some(self.parse_query()?), vec![])
|
|
||||||
};
|
|
||||||
|
|
||||||
(columns, partitioned, after_columns, source, assignments)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let (source, assignments) = if self.peek_keyword(Keyword::FORMAT)
|
||||||
|
|| self.peek_keyword(Keyword::SETTINGS)
|
||||||
|
{
|
||||||
|
(None, vec![])
|
||||||
|
} else if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
|
||||||
|
(None, self.parse_comma_separated(Parser::parse_assignment)?)
|
||||||
|
} else {
|
||||||
|
(Some(self.parse_query()?), vec![])
|
||||||
|
};
|
||||||
|
|
||||||
|
(columns, partitioned, after_columns, source, assignments)
|
||||||
|
};
|
||||||
|
|
||||||
|
let (format_clause, settings) = if self.dialect.supports_insert_format() {
|
||||||
|
// Settings always comes before `FORMAT` for ClickHouse:
|
||||||
|
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
|
||||||
|
let settings = self.parse_settings()?;
|
||||||
|
|
||||||
|
let format = if self.parse_keyword(Keyword::FORMAT) {
|
||||||
|
Some(self.parse_input_format_clause()?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
(format, settings)
|
||||||
|
} else {
|
||||||
|
Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
|
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
|
||||||
&& self.parse_keyword(Keyword::AS)
|
&& self.parse_keyword(Keyword::AS)
|
||||||
{
|
{
|
||||||
|
@ -12146,10 +12166,24 @@ impl<'a> Parser<'a> {
|
||||||
replace_into,
|
replace_into,
|
||||||
priority,
|
priority,
|
||||||
insert_alias,
|
insert_alias,
|
||||||
|
settings,
|
||||||
|
format_clause,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parses input format clause used for [ClickHouse].
|
||||||
|
//
|
||||||
|
// <https://clickhouse.com/docs/en/interfaces/formats>
|
||||||
|
pub fn parse_input_format_clause(&mut self) -> Result<InputFormatClause, ParserError> {
|
||||||
|
let ident = self.parse_identifier()?;
|
||||||
|
let values = self
|
||||||
|
.maybe_parse(|p| p.parse_comma_separated(|p| p.parse_expr()))?
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
Ok(InputFormatClause { ident, values })
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the immediate tokens look like the
|
/// Returns true if the immediate tokens look like the
|
||||||
/// beginning of a subquery. `(SELECT ...`
|
/// beginning of a subquery. `(SELECT ...`
|
||||||
fn peek_subquery_start(&mut self) -> bool {
|
fn peek_subquery_start(&mut self) -> bool {
|
||||||
|
|
|
@ -1404,6 +1404,26 @@ fn test_query_with_format_clause() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_insert_query_with_format_clause() {
|
||||||
|
let cases = [
|
||||||
|
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
|
||||||
|
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
|
||||||
|
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
|
||||||
|
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
|
||||||
|
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
|
||||||
|
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
|
||||||
|
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
|
||||||
|
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
|
||||||
|
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
|
||||||
|
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
|
||||||
|
];
|
||||||
|
|
||||||
|
for sql in &cases {
|
||||||
|
clickhouse().verified_stmt(sql);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_create_table_on_commit_and_as_query() {
|
fn parse_create_table_on_commit_and_as_query() {
|
||||||
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;
|
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;
|
||||||
|
|
|
@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() {
|
||||||
returning: None,
|
returning: None,
|
||||||
replace_into: false,
|
replace_into: false,
|
||||||
priority: None,
|
priority: None,
|
||||||
insert_alias: None
|
insert_alias: None,
|
||||||
|
settings: None,
|
||||||
|
format_clause: None,
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() {
|
||||||
returning: None,
|
returning: None,
|
||||||
replace_into: false,
|
replace_into: false,
|
||||||
priority: None,
|
priority: None,
|
||||||
insert_alias: None
|
insert_alias: None,
|
||||||
|
settings: None,
|
||||||
|
format_clause: None,
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() {
|
||||||
replace_into: false,
|
replace_into: false,
|
||||||
priority: None,
|
priority: None,
|
||||||
insert_alias: None,
|
insert_alias: None,
|
||||||
|
settings: None,
|
||||||
|
format_clause: None,
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue