mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add support for XMLTABLE
(#1817)
This commit is contained in:
parent
3ec80e187d
commit
945f8e0534
6 changed files with 327 additions and 1 deletions
|
@ -81,7 +81,8 @@ pub use self::query::{
|
|||
TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier,
|
||||
TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion,
|
||||
TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values,
|
||||
WildcardAdditionalOptions, With, WithFill,
|
||||
WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, XmlPassingArgument,
|
||||
XmlPassingClause, XmlTableColumn, XmlTableColumnOption,
|
||||
};
|
||||
|
||||
pub use self::trigger::{
|
||||
|
|
186
src/ast/query.rs
186
src/ast/query.rs
|
@ -1271,6 +1271,37 @@ pub enum TableFactor {
|
|||
symbols: Vec<SymbolDefinition>,
|
||||
alias: Option<TableAlias>,
|
||||
},
|
||||
/// The `XMLTABLE` table-valued function.
|
||||
/// Part of the SQL standard, supported by PostgreSQL, Oracle, and DB2.
|
||||
///
|
||||
/// <https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING>
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT xmltable.*
|
||||
/// FROM xmldata,
|
||||
/// XMLTABLE('//ROWS/ROW'
|
||||
/// PASSING data
|
||||
/// COLUMNS id int PATH '@id',
|
||||
/// ordinality FOR ORDINALITY,
|
||||
/// "COUNTRY_NAME" text,
|
||||
/// country_id text PATH 'COUNTRY_ID',
|
||||
/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]',
|
||||
/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)',
|
||||
/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'
|
||||
/// );
|
||||
/// ````
|
||||
XmlTable {
|
||||
/// Optional XMLNAMESPACES clause (empty if not present)
|
||||
namespaces: Vec<XmlNamespaceDefinition>,
|
||||
/// The row-generating XPath expression.
|
||||
row_expression: Expr,
|
||||
/// The PASSING clause specifying the document expression.
|
||||
passing: XmlPassingClause,
|
||||
/// The columns to be extracted from each generated row.
|
||||
columns: Vec<XmlTableColumn>,
|
||||
/// The alias for the table.
|
||||
alias: Option<TableAlias>,
|
||||
},
|
||||
}
|
||||
|
||||
/// The table sample modifier options
|
||||
|
@ -1936,6 +1967,31 @@ impl fmt::Display for TableFactor {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
TableFactor::XmlTable {
|
||||
row_expression,
|
||||
passing,
|
||||
columns,
|
||||
alias,
|
||||
namespaces,
|
||||
} => {
|
||||
write!(f, "XMLTABLE(")?;
|
||||
if !namespaces.is_empty() {
|
||||
write!(
|
||||
f,
|
||||
"XMLNAMESPACES({}), ",
|
||||
display_comma_separated(namespaces)
|
||||
)?;
|
||||
}
|
||||
write!(
|
||||
f,
|
||||
"{row_expression}{passing} COLUMNS {columns})",
|
||||
columns = display_comma_separated(columns)
|
||||
)?;
|
||||
if let Some(alias) = alias {
|
||||
write!(f, " AS {alias}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3082,3 +3138,133 @@ pub enum UpdateTableFromKind {
|
|||
/// For Example: `UPDATE SET t1.name='aaa' FROM t1`
|
||||
AfterSet(Vec<TableWithJoins>),
|
||||
}
|
||||
|
||||
/// Defines the options for an XmlTable column: Named or ForOrdinality
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum XmlTableColumnOption {
|
||||
/// A named column with a type, optional path, and default value.
|
||||
NamedInfo {
|
||||
/// The type of the column to be extracted.
|
||||
r#type: DataType,
|
||||
/// The path to the column to be extracted. If None, defaults to the column name.
|
||||
path: Option<Expr>,
|
||||
/// Default value if path does not match
|
||||
default: Option<Expr>,
|
||||
/// Whether the column is nullable (NULL=true, NOT NULL=false)
|
||||
nullable: bool,
|
||||
},
|
||||
/// The FOR ORDINALITY marker
|
||||
ForOrdinality,
|
||||
}
|
||||
|
||||
/// A single column definition in XMLTABLE
|
||||
///
|
||||
/// ```sql
|
||||
/// COLUMNS
|
||||
/// id int PATH '@id',
|
||||
/// ordinality FOR ORDINALITY,
|
||||
/// "COUNTRY_NAME" text,
|
||||
/// country_id text PATH 'COUNTRY_ID',
|
||||
/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]',
|
||||
/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)',
|
||||
/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'
|
||||
/// ```
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct XmlTableColumn {
|
||||
/// The name of the column.
|
||||
pub name: Ident,
|
||||
/// Column options: type/path/default or FOR ORDINALITY
|
||||
pub option: XmlTableColumnOption,
|
||||
}
|
||||
|
||||
impl fmt::Display for XmlTableColumn {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.name)?;
|
||||
match &self.option {
|
||||
XmlTableColumnOption::NamedInfo {
|
||||
r#type,
|
||||
path,
|
||||
default,
|
||||
nullable,
|
||||
} => {
|
||||
write!(f, " {}", r#type)?;
|
||||
if let Some(p) = path {
|
||||
write!(f, " PATH {}", p)?;
|
||||
}
|
||||
if let Some(d) = default {
|
||||
write!(f, " DEFAULT {}", d)?;
|
||||
}
|
||||
if !*nullable {
|
||||
write!(f, " NOT NULL")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
XmlTableColumnOption::ForOrdinality => {
|
||||
write!(f, " FOR ORDINALITY")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Argument passed in the XMLTABLE PASSING clause
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct XmlPassingArgument {
|
||||
pub expr: Expr,
|
||||
pub alias: Option<Ident>,
|
||||
pub by_value: bool, // True if BY VALUE is specified
|
||||
}
|
||||
|
||||
impl fmt::Display for XmlPassingArgument {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if self.by_value {
|
||||
write!(f, "BY VALUE ")?;
|
||||
}
|
||||
write!(f, "{}", self.expr)?;
|
||||
if let Some(alias) = &self.alias {
|
||||
write!(f, " AS {}", alias)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The PASSING clause for XMLTABLE
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct XmlPassingClause {
|
||||
pub arguments: Vec<XmlPassingArgument>,
|
||||
}
|
||||
|
||||
impl fmt::Display for XmlPassingClause {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if !self.arguments.is_empty() {
|
||||
write!(f, " PASSING {}", display_comma_separated(&self.arguments))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single XML namespace definition in the XMLNAMESPACES clause.
|
||||
///
|
||||
/// `namespace_uri AS namespace_name`
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct XmlNamespaceDefinition {
|
||||
/// The namespace URI (a text expression).
|
||||
pub uri: Expr,
|
||||
/// The alias for the namespace (a simple identifier).
|
||||
pub name: Ident,
|
||||
}
|
||||
|
||||
impl fmt::Display for XmlNamespaceDefinition {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{} AS {}", self.uri, self.name)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1909,6 +1909,7 @@ impl Spanned for TableFactor {
|
|||
.chain(alias.as_ref().map(|alias| alias.span())),
|
||||
),
|
||||
TableFactor::JsonTable { .. } => Span::empty(),
|
||||
TableFactor::XmlTable { .. } => Span::empty(),
|
||||
TableFactor::Pivot {
|
||||
table,
|
||||
aggregate_functions,
|
||||
|
|
|
@ -654,6 +654,7 @@ define_keywords!(
|
|||
PARTITION,
|
||||
PARTITIONED,
|
||||
PARTITIONS,
|
||||
PASSING,
|
||||
PASSWORD,
|
||||
PAST,
|
||||
PATH,
|
||||
|
@ -989,6 +990,8 @@ define_keywords!(
|
|||
WORK,
|
||||
WRITE,
|
||||
XML,
|
||||
XMLNAMESPACES,
|
||||
XMLTABLE,
|
||||
XOR,
|
||||
YEAR,
|
||||
YEARS,
|
||||
|
|
|
@ -11992,6 +11992,7 @@ impl<'a> Parser<'a> {
|
|||
| TableFactor::Function { alias, .. }
|
||||
| TableFactor::UNNEST { alias, .. }
|
||||
| TableFactor::JsonTable { alias, .. }
|
||||
| TableFactor::XmlTable { alias, .. }
|
||||
| TableFactor::OpenJsonTable { alias, .. }
|
||||
| TableFactor::TableFunction { alias, .. }
|
||||
| TableFactor::Pivot { alias, .. }
|
||||
|
@ -12107,6 +12108,9 @@ impl<'a> Parser<'a> {
|
|||
} else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) {
|
||||
self.prev_token();
|
||||
self.parse_open_json_table_factor()
|
||||
} else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) {
|
||||
self.prev_token();
|
||||
self.parse_xml_table_factor()
|
||||
} else {
|
||||
let name = self.parse_object_name(true)?;
|
||||
|
||||
|
@ -12339,6 +12343,99 @@ impl<'a> Parser<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
fn parse_xml_table_factor(&mut self) -> Result<TableFactor, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let namespaces = if self.parse_keyword(Keyword::XMLNAMESPACES) {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let namespaces = self.parse_comma_separated(Parser::parse_xml_namespace_definition)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
self.expect_token(&Token::Comma)?;
|
||||
namespaces
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
let row_expression = self.parse_expr()?;
|
||||
let passing = self.parse_xml_passing_clause()?;
|
||||
self.expect_keyword_is(Keyword::COLUMNS)?;
|
||||
let columns = self.parse_comma_separated(Parser::parse_xml_table_column)?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
let alias = self.maybe_parse_table_alias()?;
|
||||
Ok(TableFactor::XmlTable {
|
||||
namespaces,
|
||||
row_expression,
|
||||
passing,
|
||||
columns,
|
||||
alias,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_xml_namespace_definition(&mut self) -> Result<XmlNamespaceDefinition, ParserError> {
|
||||
let uri = self.parse_expr()?;
|
||||
self.expect_keyword_is(Keyword::AS)?;
|
||||
let name = self.parse_identifier()?;
|
||||
Ok(XmlNamespaceDefinition { uri, name })
|
||||
}
|
||||
|
||||
fn parse_xml_table_column(&mut self) -> Result<XmlTableColumn, ParserError> {
|
||||
let name = self.parse_identifier()?;
|
||||
|
||||
let option = if self.parse_keyword(Keyword::FOR) {
|
||||
self.expect_keyword(Keyword::ORDINALITY)?;
|
||||
XmlTableColumnOption::ForOrdinality
|
||||
} else {
|
||||
let r#type = self.parse_data_type()?;
|
||||
let mut path = None;
|
||||
let mut default = None;
|
||||
|
||||
if self.parse_keyword(Keyword::PATH) {
|
||||
path = Some(self.parse_expr()?);
|
||||
}
|
||||
|
||||
if self.parse_keyword(Keyword::DEFAULT) {
|
||||
default = Some(self.parse_expr()?);
|
||||
}
|
||||
|
||||
let not_null = self.parse_keywords(&[Keyword::NOT, Keyword::NULL]);
|
||||
if !not_null {
|
||||
// NULL is the default but can be specified explicitly
|
||||
let _ = self.parse_keyword(Keyword::NULL);
|
||||
}
|
||||
|
||||
XmlTableColumnOption::NamedInfo {
|
||||
r#type,
|
||||
path,
|
||||
default,
|
||||
nullable: !not_null,
|
||||
}
|
||||
};
|
||||
Ok(XmlTableColumn { name, option })
|
||||
}
|
||||
|
||||
fn parse_xml_passing_clause(&mut self) -> Result<XmlPassingClause, ParserError> {
|
||||
let mut arguments = vec![];
|
||||
if self.parse_keyword(Keyword::PASSING) {
|
||||
loop {
|
||||
let by_value =
|
||||
self.parse_keyword(Keyword::BY) && self.expect_keyword(Keyword::VALUE).is_ok();
|
||||
let expr = self.parse_expr()?;
|
||||
let alias = if self.parse_keyword(Keyword::AS) {
|
||||
Some(self.parse_identifier()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
arguments.push(XmlPassingArgument {
|
||||
expr,
|
||||
alias,
|
||||
by_value,
|
||||
});
|
||||
if !self.consume_token(&Token::Comma) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(XmlPassingClause { arguments })
|
||||
}
|
||||
|
||||
fn parse_match_recognize(&mut self, table: TableFactor) -> Result<TableFactor, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
|
||||
|
|
|
@ -11741,6 +11741,44 @@ fn test_group_by_grouping_sets() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xmltable() {
|
||||
all_dialects()
|
||||
.verified_only_select("SELECT * FROM XMLTABLE('/root' PASSING data COLUMNS element TEXT)");
|
||||
|
||||
// Minimal meaningful working example: returns a single row with a single column named y containing the value z
|
||||
all_dialects().verified_only_select(
|
||||
"SELECT y FROM XMLTABLE('/X' PASSING '<X><y>z</y></X>' COLUMNS y TEXT)",
|
||||
);
|
||||
|
||||
// Test using subqueries
|
||||
all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('<X><y>z</y></X>' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))");
|
||||
|
||||
// NOT NULL
|
||||
all_dialects().verified_only_select(
|
||||
"SELECT y FROM XMLTABLE('/X' PASSING '<X></X>' COLUMNS y TEXT NOT NULL)",
|
||||
);
|
||||
|
||||
all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')");
|
||||
|
||||
all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')");
|
||||
|
||||
// Example from https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING
|
||||
all_dialects().verified_only_select(
|
||||
"SELECT xmltable.* FROM xmldata, XMLTABLE('//ROWS/ROW' PASSING data COLUMNS id INT PATH '@id', ordinality FOR ORDINALITY, \"COUNTRY_NAME\" TEXT, country_id TEXT PATH 'COUNTRY_ID', size_sq_km FLOAT PATH 'SIZE[@unit = \"sq_km\"]', size_other TEXT PATH 'concat(SIZE[@unit!=\"sq_km\"], \" \", SIZE[@unit!=\"sq_km\"]/@unit)', premier_name TEXT PATH 'PREMIER_NAME' DEFAULT 'not specified')"
|
||||
);
|
||||
|
||||
// Example from DB2 docs without explicit PASSING clause: https://www.ibm.com/docs/en/db2/12.1.0?topic=xquery-simple-column-name-passing-xmlexists-xmlquery-xmltable
|
||||
all_dialects().verified_only_select(
|
||||
"SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X"
|
||||
);
|
||||
|
||||
// Example from PostgreSQL with XMLNAMESPACES
|
||||
all_dialects().verified_only_select(
|
||||
"SELECT xmltable.* FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, 'http://example.com/b' AS \"B\"), '/x:example/x:item' PASSING (SELECT data FROM xmldata) COLUMNS foo INT PATH '@foo', bar INT PATH '@B:bar')"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_recognize() {
|
||||
use MatchRecognizePattern::*;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue