Add support for XMLTABLE (#1817)

This commit is contained in:
Ophir LOJKINE 2025-04-23 18:03:06 +02:00 committed by GitHub
parent 3ec80e187d
commit 945f8e0534
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 327 additions and 1 deletions

View file

@ -81,7 +81,8 @@ pub use self::query::{
TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier,
TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion,
TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values,
WildcardAdditionalOptions, With, WithFill,
WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, XmlPassingArgument,
XmlPassingClause, XmlTableColumn, XmlTableColumnOption,
};
pub use self::trigger::{

View file

@ -1271,6 +1271,37 @@ pub enum TableFactor {
symbols: Vec<SymbolDefinition>,
alias: Option<TableAlias>,
},
/// The `XMLTABLE` table-valued function.
/// Part of the SQL standard, supported by PostgreSQL, Oracle, and DB2.
///
/// <https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING>
///
/// ```sql
/// SELECT xmltable.*
/// FROM xmldata,
/// XMLTABLE('//ROWS/ROW'
/// PASSING data
/// COLUMNS id int PATH '@id',
/// ordinality FOR ORDINALITY,
/// "COUNTRY_NAME" text,
/// country_id text PATH 'COUNTRY_ID',
/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]',
/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)',
/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'
/// );
/// ````
XmlTable {
/// Optional XMLNAMESPACES clause (empty if not present)
namespaces: Vec<XmlNamespaceDefinition>,
/// The row-generating XPath expression.
row_expression: Expr,
/// The PASSING clause specifying the document expression.
passing: XmlPassingClause,
/// The columns to be extracted from each generated row.
columns: Vec<XmlTableColumn>,
/// The alias for the table.
alias: Option<TableAlias>,
},
}
/// The table sample modifier options
@ -1936,6 +1967,31 @@ impl fmt::Display for TableFactor {
}
Ok(())
}
TableFactor::XmlTable {
row_expression,
passing,
columns,
alias,
namespaces,
} => {
write!(f, "XMLTABLE(")?;
if !namespaces.is_empty() {
write!(
f,
"XMLNAMESPACES({}), ",
display_comma_separated(namespaces)
)?;
}
write!(
f,
"{row_expression}{passing} COLUMNS {columns})",
columns = display_comma_separated(columns)
)?;
if let Some(alias) = alias {
write!(f, " AS {alias}")?;
}
Ok(())
}
}
}
}
@ -3082,3 +3138,133 @@ pub enum UpdateTableFromKind {
/// For Example: `UPDATE SET t1.name='aaa' FROM t1`
AfterSet(Vec<TableWithJoins>),
}
/// Defines the options for an XmlTable column: Named or ForOrdinality
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum XmlTableColumnOption {
/// A named column with a type, optional path, and default value.
NamedInfo {
/// The type of the column to be extracted.
r#type: DataType,
/// The path to the column to be extracted. If None, defaults to the column name.
path: Option<Expr>,
/// Default value if path does not match
default: Option<Expr>,
/// Whether the column is nullable (NULL=true, NOT NULL=false)
nullable: bool,
},
/// The FOR ORDINALITY marker
ForOrdinality,
}
/// A single column definition in XMLTABLE
///
/// ```sql
/// COLUMNS
/// id int PATH '@id',
/// ordinality FOR ORDINALITY,
/// "COUNTRY_NAME" text,
/// country_id text PATH 'COUNTRY_ID',
/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]',
/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)',
/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'
/// ```
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct XmlTableColumn {
/// The name of the column.
pub name: Ident,
/// Column options: type/path/default or FOR ORDINALITY
pub option: XmlTableColumnOption,
}
impl fmt::Display for XmlTableColumn {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.name)?;
match &self.option {
XmlTableColumnOption::NamedInfo {
r#type,
path,
default,
nullable,
} => {
write!(f, " {}", r#type)?;
if let Some(p) = path {
write!(f, " PATH {}", p)?;
}
if let Some(d) = default {
write!(f, " DEFAULT {}", d)?;
}
if !*nullable {
write!(f, " NOT NULL")?;
}
Ok(())
}
XmlTableColumnOption::ForOrdinality => {
write!(f, " FOR ORDINALITY")
}
}
}
}
/// Argument passed in the XMLTABLE PASSING clause
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct XmlPassingArgument {
pub expr: Expr,
pub alias: Option<Ident>,
pub by_value: bool, // True if BY VALUE is specified
}
impl fmt::Display for XmlPassingArgument {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.by_value {
write!(f, "BY VALUE ")?;
}
write!(f, "{}", self.expr)?;
if let Some(alias) = &self.alias {
write!(f, " AS {}", alias)?;
}
Ok(())
}
}
/// The PASSING clause for XMLTABLE
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct XmlPassingClause {
pub arguments: Vec<XmlPassingArgument>,
}
impl fmt::Display for XmlPassingClause {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if !self.arguments.is_empty() {
write!(f, " PASSING {}", display_comma_separated(&self.arguments))?;
}
Ok(())
}
}
/// Represents a single XML namespace definition in the XMLNAMESPACES clause.
///
/// `namespace_uri AS namespace_name`
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct XmlNamespaceDefinition {
/// The namespace URI (a text expression).
pub uri: Expr,
/// The alias for the namespace (a simple identifier).
pub name: Ident,
}
impl fmt::Display for XmlNamespaceDefinition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} AS {}", self.uri, self.name)
}
}

View file

@ -1909,6 +1909,7 @@ impl Spanned for TableFactor {
.chain(alias.as_ref().map(|alias| alias.span())),
),
TableFactor::JsonTable { .. } => Span::empty(),
TableFactor::XmlTable { .. } => Span::empty(),
TableFactor::Pivot {
table,
aggregate_functions,

View file

@ -654,6 +654,7 @@ define_keywords!(
PARTITION,
PARTITIONED,
PARTITIONS,
PASSING,
PASSWORD,
PAST,
PATH,
@ -989,6 +990,8 @@ define_keywords!(
WORK,
WRITE,
XML,
XMLNAMESPACES,
XMLTABLE,
XOR,
YEAR,
YEARS,

View file

@ -11992,6 +11992,7 @@ impl<'a> Parser<'a> {
| TableFactor::Function { alias, .. }
| TableFactor::UNNEST { alias, .. }
| TableFactor::JsonTable { alias, .. }
| TableFactor::XmlTable { alias, .. }
| TableFactor::OpenJsonTable { alias, .. }
| TableFactor::TableFunction { alias, .. }
| TableFactor::Pivot { alias, .. }
@ -12107,6 +12108,9 @@ impl<'a> Parser<'a> {
} else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) {
self.prev_token();
self.parse_open_json_table_factor()
} else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) {
self.prev_token();
self.parse_xml_table_factor()
} else {
let name = self.parse_object_name(true)?;
@ -12339,6 +12343,99 @@ impl<'a> Parser<'a> {
})
}
fn parse_xml_table_factor(&mut self) -> Result<TableFactor, ParserError> {
self.expect_token(&Token::LParen)?;
let namespaces = if self.parse_keyword(Keyword::XMLNAMESPACES) {
self.expect_token(&Token::LParen)?;
let namespaces = self.parse_comma_separated(Parser::parse_xml_namespace_definition)?;
self.expect_token(&Token::RParen)?;
self.expect_token(&Token::Comma)?;
namespaces
} else {
vec![]
};
let row_expression = self.parse_expr()?;
let passing = self.parse_xml_passing_clause()?;
self.expect_keyword_is(Keyword::COLUMNS)?;
let columns = self.parse_comma_separated(Parser::parse_xml_table_column)?;
self.expect_token(&Token::RParen)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::XmlTable {
namespaces,
row_expression,
passing,
columns,
alias,
})
}
fn parse_xml_namespace_definition(&mut self) -> Result<XmlNamespaceDefinition, ParserError> {
let uri = self.parse_expr()?;
self.expect_keyword_is(Keyword::AS)?;
let name = self.parse_identifier()?;
Ok(XmlNamespaceDefinition { uri, name })
}
fn parse_xml_table_column(&mut self) -> Result<XmlTableColumn, ParserError> {
let name = self.parse_identifier()?;
let option = if self.parse_keyword(Keyword::FOR) {
self.expect_keyword(Keyword::ORDINALITY)?;
XmlTableColumnOption::ForOrdinality
} else {
let r#type = self.parse_data_type()?;
let mut path = None;
let mut default = None;
if self.parse_keyword(Keyword::PATH) {
path = Some(self.parse_expr()?);
}
if self.parse_keyword(Keyword::DEFAULT) {
default = Some(self.parse_expr()?);
}
let not_null = self.parse_keywords(&[Keyword::NOT, Keyword::NULL]);
if !not_null {
// NULL is the default but can be specified explicitly
let _ = self.parse_keyword(Keyword::NULL);
}
XmlTableColumnOption::NamedInfo {
r#type,
path,
default,
nullable: !not_null,
}
};
Ok(XmlTableColumn { name, option })
}
fn parse_xml_passing_clause(&mut self) -> Result<XmlPassingClause, ParserError> {
let mut arguments = vec![];
if self.parse_keyword(Keyword::PASSING) {
loop {
let by_value =
self.parse_keyword(Keyword::BY) && self.expect_keyword(Keyword::VALUE).is_ok();
let expr = self.parse_expr()?;
let alias = if self.parse_keyword(Keyword::AS) {
Some(self.parse_identifier()?)
} else {
None
};
arguments.push(XmlPassingArgument {
expr,
alias,
by_value,
});
if !self.consume_token(&Token::Comma) {
break;
}
}
}
Ok(XmlPassingClause { arguments })
}
fn parse_match_recognize(&mut self, table: TableFactor) -> Result<TableFactor, ParserError> {
self.expect_token(&Token::LParen)?;

View file

@ -11741,6 +11741,44 @@ fn test_group_by_grouping_sets() {
);
}
#[test]
fn test_xmltable() {
all_dialects()
.verified_only_select("SELECT * FROM XMLTABLE('/root' PASSING data COLUMNS element TEXT)");
// Minimal meaningful working example: returns a single row with a single column named y containing the value z
all_dialects().verified_only_select(
"SELECT y FROM XMLTABLE('/X' PASSING '<X><y>z</y></X>' COLUMNS y TEXT)",
);
// Test using subqueries
all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('<X><y>z</y></X>' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))");
// NOT NULL
all_dialects().verified_only_select(
"SELECT y FROM XMLTABLE('/X' PASSING '<X></X>' COLUMNS y TEXT NOT NULL)",
);
all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')");
all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')");
// Example from https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING
all_dialects().verified_only_select(
"SELECT xmltable.* FROM xmldata, XMLTABLE('//ROWS/ROW' PASSING data COLUMNS id INT PATH '@id', ordinality FOR ORDINALITY, \"COUNTRY_NAME\" TEXT, country_id TEXT PATH 'COUNTRY_ID', size_sq_km FLOAT PATH 'SIZE[@unit = \"sq_km\"]', size_other TEXT PATH 'concat(SIZE[@unit!=\"sq_km\"], \" \", SIZE[@unit!=\"sq_km\"]/@unit)', premier_name TEXT PATH 'PREMIER_NAME' DEFAULT 'not specified')"
);
// Example from DB2 docs without explicit PASSING clause: https://www.ibm.com/docs/en/db2/12.1.0?topic=xquery-simple-column-name-passing-xmlexists-xmlquery-xmltable
all_dialects().verified_only_select(
"SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X"
);
// Example from PostgreSQL with XMLNAMESPACES
all_dialects().verified_only_select(
"SELECT xmltable.* FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, 'http://example.com/b' AS \"B\"), '/x:example/x:item' PASSING (SELECT data FROM xmldata) COLUMNS foo INT PATH '@foo', bar INT PATH '@B:bar')"
);
}
#[test]
fn test_match_recognize() {
use MatchRecognizePattern::*;