Add support for TABLESAMPLE pipe operator (#1860)

This commit is contained in:
Hendrik Makait 2025-05-30 09:14:36 +02:00 committed by GitHub
parent eacf00d269
commit a8bde39efb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 29 additions and 5 deletions

View file

@ -1559,7 +1559,7 @@ impl fmt::Display for TableSampleBucket {
}
impl fmt::Display for TableSample {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, " {}", self.modifier)?;
write!(f, "{}", self.modifier)?;
if let Some(name) = &self.name {
write!(f, " {}", name)?;
}
@ -1862,7 +1862,7 @@ impl fmt::Display for TableFactor {
write!(f, " WITH ORDINALITY")?;
}
if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample {
write!(f, "{sample}")?;
write!(f, " {sample}")?;
}
if let Some(alias) = alias {
write!(f, " AS {alias}")?;
@ -1877,7 +1877,7 @@ impl fmt::Display for TableFactor {
write!(f, "{version}")?;
}
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
write!(f, "{sample}")?;
write!(f, " {sample}")?;
}
Ok(())
}
@ -2680,6 +2680,10 @@ pub enum PipeOperator {
full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
group_by_expr: Vec<ExprWithAliasAndOrderBy>,
},
/// Selects a random sample of rows from the input table.
/// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT)
/// See more at <https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
TableSample { sample: Box<TableSample> },
}
impl fmt::Display for PipeOperator {
@ -2731,6 +2735,10 @@ impl fmt::Display for PipeOperator {
PipeOperator::OrderBy { exprs } => {
write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
}
PipeOperator::TableSample { sample } => {
write!(f, "{}", sample)
}
}
}
}

View file

@ -11054,6 +11054,7 @@ impl<'a> Parser<'a> {
Keyword::LIMIT,
Keyword::AGGREGATE,
Keyword::ORDER,
Keyword::TABLESAMPLE,
])?;
match kw {
Keyword::SELECT => {
@ -11116,6 +11117,10 @@ impl<'a> Parser<'a> {
let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
pipe_operators.push(PipeOperator::OrderBy { exprs })
}
Keyword::TABLESAMPLE => {
let sample = self.parse_table_sample(TableSampleModifier::TableSample)?;
pipe_operators.push(PipeOperator::TableSample { sample });
}
unhandled => {
return Err(ParserError::ParserError(format!(
"`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
@ -12760,7 +12765,13 @@ impl<'a> Parser<'a> {
} else {
return Ok(None);
};
self.parse_table_sample(modifier).map(Some)
}
fn parse_table_sample(
&mut self,
modifier: TableSampleModifier,
) -> Result<Box<TableSample>, ParserError> {
let name = match self.parse_one_of_keywords(&[
Keyword::BERNOULLI,
Keyword::ROW,
@ -12842,14 +12853,14 @@ impl<'a> Parser<'a> {
None
};
Ok(Some(Box::new(TableSample {
Ok(Box::new(TableSample {
modifier,
name,
quantity,
seed,
bucket,
offset,
})))
}))
}
fn parse_table_sample_seed(

View file

@ -15156,6 +15156,11 @@ fn parse_pipeline_operator() {
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");
// tablesample pipe operator
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)");
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)");
// many pipes
dialects.verified_stmt(
"SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",