mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add support for TABLESAMPLE
pipe operator (#1860)
This commit is contained in:
parent
eacf00d269
commit
a8bde39efb
3 changed files with 29 additions and 5 deletions
|
@ -1559,7 +1559,7 @@ impl fmt::Display for TableSampleBucket {
|
||||||
}
|
}
|
||||||
impl fmt::Display for TableSample {
|
impl fmt::Display for TableSample {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, " {}", self.modifier)?;
|
write!(f, "{}", self.modifier)?;
|
||||||
if let Some(name) = &self.name {
|
if let Some(name) = &self.name {
|
||||||
write!(f, " {}", name)?;
|
write!(f, " {}", name)?;
|
||||||
}
|
}
|
||||||
|
@ -1862,7 +1862,7 @@ impl fmt::Display for TableFactor {
|
||||||
write!(f, " WITH ORDINALITY")?;
|
write!(f, " WITH ORDINALITY")?;
|
||||||
}
|
}
|
||||||
if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample {
|
if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample {
|
||||||
write!(f, "{sample}")?;
|
write!(f, " {sample}")?;
|
||||||
}
|
}
|
||||||
if let Some(alias) = alias {
|
if let Some(alias) = alias {
|
||||||
write!(f, " AS {alias}")?;
|
write!(f, " AS {alias}")?;
|
||||||
|
@ -1877,7 +1877,7 @@ impl fmt::Display for TableFactor {
|
||||||
write!(f, "{version}")?;
|
write!(f, "{version}")?;
|
||||||
}
|
}
|
||||||
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
|
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
|
||||||
write!(f, "{sample}")?;
|
write!(f, " {sample}")?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -2680,6 +2680,10 @@ pub enum PipeOperator {
|
||||||
full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
|
full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
|
||||||
group_by_expr: Vec<ExprWithAliasAndOrderBy>,
|
group_by_expr: Vec<ExprWithAliasAndOrderBy>,
|
||||||
},
|
},
|
||||||
|
/// Selects a random sample of rows from the input table.
|
||||||
|
/// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT)
|
||||||
|
/// See more at <https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
|
||||||
|
TableSample { sample: Box<TableSample> },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for PipeOperator {
|
impl fmt::Display for PipeOperator {
|
||||||
|
@ -2731,6 +2735,10 @@ impl fmt::Display for PipeOperator {
|
||||||
PipeOperator::OrderBy { exprs } => {
|
PipeOperator::OrderBy { exprs } => {
|
||||||
write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
|
write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PipeOperator::TableSample { sample } => {
|
||||||
|
write!(f, "{}", sample)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11054,6 +11054,7 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::LIMIT,
|
Keyword::LIMIT,
|
||||||
Keyword::AGGREGATE,
|
Keyword::AGGREGATE,
|
||||||
Keyword::ORDER,
|
Keyword::ORDER,
|
||||||
|
Keyword::TABLESAMPLE,
|
||||||
])?;
|
])?;
|
||||||
match kw {
|
match kw {
|
||||||
Keyword::SELECT => {
|
Keyword::SELECT => {
|
||||||
|
@ -11116,6 +11117,10 @@ impl<'a> Parser<'a> {
|
||||||
let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
|
let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
|
||||||
pipe_operators.push(PipeOperator::OrderBy { exprs })
|
pipe_operators.push(PipeOperator::OrderBy { exprs })
|
||||||
}
|
}
|
||||||
|
Keyword::TABLESAMPLE => {
|
||||||
|
let sample = self.parse_table_sample(TableSampleModifier::TableSample)?;
|
||||||
|
pipe_operators.push(PipeOperator::TableSample { sample });
|
||||||
|
}
|
||||||
unhandled => {
|
unhandled => {
|
||||||
return Err(ParserError::ParserError(format!(
|
return Err(ParserError::ParserError(format!(
|
||||||
"`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
|
"`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
|
||||||
|
@ -12760,7 +12765,13 @@ impl<'a> Parser<'a> {
|
||||||
} else {
|
} else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
self.parse_table_sample(modifier).map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_table_sample(
|
||||||
|
&mut self,
|
||||||
|
modifier: TableSampleModifier,
|
||||||
|
) -> Result<Box<TableSample>, ParserError> {
|
||||||
let name = match self.parse_one_of_keywords(&[
|
let name = match self.parse_one_of_keywords(&[
|
||||||
Keyword::BERNOULLI,
|
Keyword::BERNOULLI,
|
||||||
Keyword::ROW,
|
Keyword::ROW,
|
||||||
|
@ -12842,14 +12853,14 @@ impl<'a> Parser<'a> {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Some(Box::new(TableSample {
|
Ok(Box::new(TableSample {
|
||||||
modifier,
|
modifier,
|
||||||
name,
|
name,
|
||||||
quantity,
|
quantity,
|
||||||
seed,
|
seed,
|
||||||
bucket,
|
bucket,
|
||||||
offset,
|
offset,
|
||||||
})))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_table_sample_seed(
|
fn parse_table_sample_seed(
|
||||||
|
|
|
@ -15156,6 +15156,11 @@ fn parse_pipeline_operator() {
|
||||||
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
|
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
|
||||||
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");
|
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");
|
||||||
|
|
||||||
|
// tablesample pipe operator
|
||||||
|
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
|
||||||
|
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)");
|
||||||
|
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)");
|
||||||
|
|
||||||
// many pipes
|
// many pipes
|
||||||
dialects.verified_stmt(
|
dialects.verified_stmt(
|
||||||
"SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",
|
"SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue