datafusion-sqlparse/src/test_utils.rs
Yoav Cohen bc2c4e263d
Some checks failed
license / Release Audit Tool (RAT) (push) Has been cancelled
Rust / codestyle (push) Has been cancelled
Rust / lint (push) Has been cancelled
Rust / benchmark-lint (push) Has been cancelled
Rust / compile (push) Has been cancelled
Rust / docs (push) Has been cancelled
Rust / compile-no-std (push) Has been cancelled
Rust / test (beta) (push) Has been cancelled
Rust / test (nightly) (push) Has been cancelled
Rust / test (stable) (push) Has been cancelled
Support optional semicolon between statements (#1937)
2025-07-11 14:46:48 +02:00

509 lines
17 KiB
Rust

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
/// This module contains internal utilities used for testing the library.
/// While technically public, the library's users are not supposed to rely
/// on this module, as it will change without notice.
//
// Integration tests (i.e. everything under `tests/`) import this
// via `tests/test_utils/helpers`.
#[cfg(not(feature = "std"))]
use alloc::{
boxed::Box,
string::{String, ToString},
vec,
vec::Vec,
};
use core::fmt::Debug;
use crate::dialect::*;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::{Token, Tokenizer};
use crate::{ast::*, parser::ParserOptions};
#[cfg(test)]
use pretty_assertions::assert_eq;
/// Tests use the methods on this struct to invoke the parser on one or
/// multiple dialects.
pub struct TestedDialects {
pub dialects: Vec<Box<dyn Dialect>>,
pub options: Option<ParserOptions>,
pub recursion_limit: Option<usize>,
}
impl TestedDialects {
/// Create a TestedDialects with default options and the given dialects.
pub fn new(dialects: Vec<Box<dyn Dialect>>) -> Self {
Self {
dialects,
options: None,
recursion_limit: None,
}
}
pub fn new_with_options(dialects: Vec<Box<dyn Dialect>>, options: ParserOptions) -> Self {
Self {
dialects,
options: Some(options),
recursion_limit: None,
}
}
pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
self.recursion_limit = Some(recursion_limit);
self
}
fn new_parser<'a>(&self, dialect: &'a dyn Dialect) -> Parser<'a> {
let parser = Parser::new(dialect);
let parser = if let Some(options) = &self.options {
parser.with_options(options.clone())
} else {
parser
};
let parser = if let Some(recursion_limit) = &self.recursion_limit {
parser.with_recursion_limit(*recursion_limit)
} else {
parser
};
parser
}
/// Run the given function for all of `self.dialects`, assert that they
/// return the same result, and return that result.
pub fn one_of_identical_results<F, T: Debug + PartialEq>(&self, f: F) -> T
where
F: Fn(&dyn Dialect) -> T,
{
let parse_results = self.dialects.iter().map(|dialect| (dialect, f(&**dialect)));
parse_results
.fold(None, |s, (dialect, parsed)| {
if let Some((prev_dialect, prev_parsed)) = s {
assert_eq!(
prev_parsed, parsed,
"Parse results with {prev_dialect:?} are different from {dialect:?}"
);
}
Some((dialect, parsed))
})
.expect("tested dialects cannot be empty")
.1
}
pub fn run_parser_method<F, T: Debug + PartialEq>(&self, sql: &str, f: F) -> T
where
F: Fn(&mut Parser) -> T,
{
self.one_of_identical_results(|dialect| {
let mut parser = self.new_parser(dialect).try_with_sql(sql).unwrap();
f(&mut parser)
})
}
/// Parses a single SQL string into multiple statements, ensuring
/// the result is the same for all tested dialects.
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
self.one_of_identical_results(|dialect| {
let mut tokenizer = Tokenizer::new(dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize()?;
self.new_parser(dialect)
.with_tokens(tokens)
.parse_statements()
})
// To fail the `ensure_multiple_dialects_are_tested` test:
// Parser::parse_sql(&**self.dialects.first().unwrap(), sql)
}
/// Ensures that `sql` parses as a single [Statement] for all tested
/// dialects.
///
/// In general, the canonical SQL should be the same (see crate
/// documentation for rationale) and you should prefer the `verified_`
/// variants in testing, such as [`verified_statement`] or
/// [`verified_query`].
///
/// If `canonical` is non empty,this function additionally asserts
/// that:
///
/// 1. parsing `sql` results in the same [`Statement`] as parsing
/// `canonical`.
///
/// 2. re-serializing the result of parsing `sql` produces the same
/// `canonical` sql string
///
/// For multiple statements, use [`statements_parse_to`].
pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
let mut statements = self.parse_sql_statements(sql).expect(sql);
assert_eq!(statements.len(), 1);
if !canonical.is_empty() && sql != canonical {
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
}
let only_statement = statements.pop().unwrap();
if !canonical.is_empty() {
assert_eq!(canonical, only_statement.to_string())
}
only_statement
}
/// The same as [`one_statement_parses_to`] but it works for a multiple statements
pub fn statements_parse_to(&self, sql: &str, canonical: &str) -> Vec<Statement> {
let statements = self.parse_sql_statements(sql).expect(sql);
if !canonical.is_empty() && sql != canonical {
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
} else {
assert_eq!(
sql,
statements
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join("; ")
);
}
statements
}
/// Ensures that `sql` parses as an [`Expr`], and that
/// re-serializing the parse result produces canonical
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
let ast = self
.run_parser_method(sql, |parser| parser.parse_expr())
.unwrap();
assert_eq!(canonical, &ast.to_string());
ast
}
/// Ensures that `sql` parses as a single [Statement], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
pub fn verified_stmt(&self, sql: &str) -> Statement {
self.one_statement_parses_to(sql, sql)
}
/// Ensures that `sql` parses as a single [Query], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
pub fn verified_query(&self, sql: &str) -> Query {
match self.verified_stmt(sql) {
Statement::Query(query) => *query,
_ => panic!("Expected Query"),
}
}
/// Ensures that `sql` parses as a single [Query], and that
/// re-serializing the parse result matches the given canonical
/// sql string.
pub fn verified_query_with_canonical(&self, query: &str, canonical: &str) -> Query {
match self.one_statement_parses_to(query, canonical) {
Statement::Query(query) => *query,
_ => panic!("Expected Query"),
}
}
/// Ensures that `sql` parses as a single [Select], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
pub fn verified_only_select(&self, query: &str) -> Select {
match *self.verified_query(query).body {
SetExpr::Select(s) => *s,
_ => panic!("Expected SetExpr::Select"),
}
}
/// Ensures that `sql` parses as a single [`Select`], and that additionally:
///
/// 1. parsing `sql` results in the same [`Statement`] as parsing
/// `canonical`.
///
/// 2. re-serializing the result of parsing `sql` produces the same
/// `canonical` sql string
pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select {
let q = match self.one_statement_parses_to(query, canonical) {
Statement::Query(query) => *query,
_ => panic!("Expected Query"),
};
match *q.body {
SetExpr::Select(s) => *s,
_ => panic!("Expected SetExpr::Select"),
}
}
/// Ensures that `sql` parses as an [`Expr`], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
pub fn verified_expr(&self, sql: &str) -> Expr {
self.expr_parses_to(sql, sql)
}
/// Check that the tokenizer returns the expected tokens for the given SQL.
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
if self.dialects.is_empty() {
panic!("No dialects to test");
}
self.dialects.iter().for_each(|dialect| {
let mut tokenizer = Tokenizer::new(&**dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(expected, tokens, "Tokenized differently for {dialect:?}");
});
}
}
/// Returns all available dialects.
pub fn all_dialects() -> TestedDialects {
TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
Box::new(SnowflakeDialect {}),
Box::new(HiveDialect {}),
Box::new(RedshiftSqlDialect {}),
Box::new(MySqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SQLiteDialect {}),
Box::new(DuckDbDialect {}),
Box::new(DatabricksDialect {}),
Box::new(ClickHouseDialect {}),
])
}
// Returns all available dialects with the specified parser options
pub fn all_dialects_with_options(options: ParserOptions) -> TestedDialects {
TestedDialects::new_with_options(all_dialects().dialects, options)
}
/// Returns all dialects matching the given predicate.
pub fn all_dialects_where<F>(predicate: F) -> TestedDialects
where
F: Fn(&dyn Dialect) -> bool,
{
let mut dialects = all_dialects();
dialects.dialects.retain(|d| predicate(&**d));
dialects
}
/// Returns available dialects. The `except` predicate is used
/// to filter out specific dialects.
pub fn all_dialects_except<F>(except: F) -> TestedDialects
where
F: Fn(&dyn Dialect) -> bool,
{
all_dialects_where(|d| !except(d))
}
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
assert_eq!(
expected,
actual.iter().map(ToString::to_string).collect::<Vec<_>>()
);
}
pub fn only<T>(v: impl IntoIterator<Item = T>) -> T {
let mut iter = v.into_iter();
if let (Some(item), None) = (iter.next(), iter.next()) {
item
} else {
panic!("only called on collection without exactly one item")
}
}
pub fn expr_from_projection(item: &SelectItem) -> &Expr {
match item {
SelectItem::UnnamedExpr(expr) => expr,
_ => panic!("Expected UnnamedExpr"),
}
}
pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTableOperation {
match stmt {
Statement::AlterTable {
name,
if_exists,
only: is_only,
operations,
on_cluster: _,
location: _,
iceberg,
} => {
assert_eq!(name.to_string(), expected_name);
assert!(!if_exists);
assert!(!is_only);
assert!(!iceberg);
only(operations)
}
_ => panic!("Expected ALTER TABLE statement"),
}
}
pub fn alter_table_op(stmt: Statement) -> AlterTableOperation {
alter_table_op_with_name(stmt, "tab")
}
/// Creates a `Value::Number`, panic'ing if n is not a number
pub fn number(n: &str) -> Value {
Value::Number(n.parse().unwrap(), false)
}
/// Creates a [Value::SingleQuotedString]
pub fn single_quoted_string(s: impl Into<String>) -> Value {
Value::SingleQuotedString(s.into())
}
pub fn table_alias(name: impl Into<String>) -> Option<TableAlias> {
Some(TableAlias {
name: Ident::new(name),
columns: vec![],
})
}
pub fn table(name: impl Into<String>) -> TableFactor {
TableFactor::Table {
name: ObjectName::from(vec![Ident::new(name.into())]),
alias: None,
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
with_ordinality: false,
json_path: None,
sample: None,
index_hints: vec![],
}
}
pub fn table_from_name(name: ObjectName) -> TableFactor {
TableFactor::Table {
name,
alias: None,
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
with_ordinality: false,
json_path: None,
sample: None,
index_hints: vec![],
}
}
pub fn table_with_alias(name: impl Into<String>, alias: impl Into<String>) -> TableFactor {
TableFactor::Table {
name: ObjectName::from(vec![Ident::new(name)]),
alias: Some(TableAlias {
name: Ident::new(alias),
columns: vec![],
}),
args: None,
with_hints: vec![],
version: None,
partitions: vec![],
with_ordinality: false,
json_path: None,
sample: None,
index_hints: vec![],
}
}
pub fn join(relation: TableFactor) -> Join {
Join {
relation,
global: false,
join_operator: JoinOperator::Join(JoinConstraint::Natural),
}
}
pub fn call(function: &str, args: impl IntoIterator<Item = Expr>) -> Expr {
Expr::Function(Function {
name: ObjectName::from(vec![Ident::new(function)]),
uses_odbc_syntax: false,
parameters: FunctionArguments::None,
args: FunctionArguments::List(FunctionArgumentList {
duplicate_treatment: None,
args: args
.into_iter()
.map(|arg| FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)))
.collect(),
clauses: vec![],
}),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})
}
/// Gets the first index column (mysql calls it a key part) of the first index found in a
/// [`Statement::CreateIndex`], [`Statement::CreateTable`], or [`Statement::AlterTable`].
pub fn index_column(stmt: Statement) -> Expr {
match stmt {
Statement::CreateIndex(CreateIndex { columns, .. }) => {
columns.first().unwrap().column.expr.clone()
}
Statement::CreateTable(CreateTable { constraints, .. }) => {
match constraints.first().unwrap() {
TableConstraint::Index { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::Unique { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::PrimaryKey { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::FulltextOrSpatial { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
_ => panic!("Expected an index, unique, primary, full text, or spatial constraint (foreign key does not support general key part expressions)"),
}
}
Statement::AlterTable { operations, .. } => match operations.first().unwrap() {
AlterTableOperation::AddConstraint { constraint, .. } => {
match constraint {
TableConstraint::Index { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::Unique { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::PrimaryKey { columns, .. } => {
columns.first().unwrap().column.expr.clone()
}
TableConstraint::FulltextOrSpatial {
columns,
..
} => columns.first().unwrap().column.expr.clone(),
_ => panic!("Expected an index, unique, primary, full text, or spatial constraint (foreign key does not support general key part expressions)"),
}
}
_ => panic!("Expected a constraint"),
},
_ => panic!("Expected CREATE INDEX, ALTER TABLE, or CREATE TABLE, got: {stmt:?}"),
}
}