// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::ast::*; use sqlparser::dialect::{DatabricksDialect, GenericDialect}; use sqlparser::parser::ParserError; use sqlparser::tokenizer::Span; use test_utils::*; #[macro_use] mod test_utils; fn databricks() -> TestedDialects { TestedDialects::new(vec![Box::new(DatabricksDialect {})]) } fn databricks_and_generic() -> TestedDialects { TestedDialects::new(vec![ Box::new(DatabricksDialect {}), Box::new(GenericDialect {}), ]) } #[test] fn test_databricks_identifiers() { // databricks uses backtick for delimited identifiers assert_eq!( databricks().verified_only_select("SELECT `Ä`").projection[0], SelectItem::UnnamedExpr(Expr::Identifier(Ident::with_quote('`', "Ä"))) ); // double quotes produce string literals, not delimited identifiers assert_eq!( databricks() .verified_only_select(r#"SELECT "Ä""#) .projection[0], SelectItem::UnnamedExpr(Expr::Value( (Value::DoubleQuotedString("Ä".to_owned())).with_empty_span() )) ); } #[test] fn test_databricks_exists() { // exists is a function in databricks assert_eq!( databricks().verified_expr("exists(array(1, 2, 3), x -> x IS NULL)"), call( "exists", [ call( "array", [ Expr::value(number("1")), Expr::value(number("2")), Expr::value(number("3")) ] ), Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::One(Ident::new("x")), body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))) }) ] ), ); let res = databricks().parse_sql_statements("SELECT EXISTS ("); assert_eq!( // TODO: improve this error message... ParserError::ParserError("Expected: an expression, found: EOF".to_string()), res.unwrap_err(), ); } #[test] fn test_databricks_lambdas() { #[rustfmt::skip] let sql = concat!( "SELECT array_sort(array('Hello', 'World'), ", "(p1, p2) -> CASE WHEN p1 = p2 THEN 0 ", "WHEN reverse(p1) < reverse(p2) THEN -1 ", "ELSE 1 END)", ); pretty_assertions::assert_eq!( SelectItem::UnnamedExpr(call( "array_sort", [ call( "array", [ Expr::value(Value::SingleQuotedString("Hello".to_owned())), Expr::value(Value::SingleQuotedString("World".to_owned())) ] ), Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), body: Box::new(Expr::Case { case_token: AttachedToken::empty(), end_token: AttachedToken::empty(), operand: None, conditions: vec![ CaseWhen { condition: Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("p1"))), op: BinaryOperator::Eq, right: Box::new(Expr::Identifier(Ident::new("p2"))) }, result: Expr::value(number("0")) }, CaseWhen { condition: Expr::BinaryOp { left: Box::new(call( "reverse", [Expr::Identifier(Ident::new("p1"))] )), op: BinaryOperator::Lt, right: Box::new(call( "reverse", [Expr::Identifier(Ident::new("p2"))] )), }, result: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Box::new(Expr::value(number("1"))) } }, ], else_result: Some(Box::new(Expr::value(number("1")))) }) }) ] )), databricks().verified_only_select(sql).projection[0] ); databricks().verified_expr( "map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2))", ); databricks().verified_expr("transform(array(1, 2, 3), x -> x + 1)"); } #[test] fn test_values_clause() { let values = Values { explicit_row: false, rows: vec![ vec![ Expr::Value((Value::DoubleQuotedString("one".to_owned())).with_empty_span()), Expr::value(number("1")), ], vec![ Expr::Value((Value::SingleQuotedString("two".to_owned())).with_empty_span()), Expr::value(number("2")), ], ], }; let query = databricks().verified_query(r#"VALUES ("one", 1), ('two', 2)"#); assert_eq!(SetExpr::Values(values.clone()), *query.body); // VALUES is permitted in a FROM clause without a subquery let query = databricks().verified_query_with_canonical( r#"SELECT * FROM VALUES ("one", 1), ('two', 2)"#, r#"SELECT * FROM (VALUES ("one", 1), ('two', 2))"#, ); let Some(TableFactor::Derived { subquery, .. }) = query .body .as_select() .map(|select| &select.from[0].relation) else { panic!("expected subquery"); }; assert_eq!(SetExpr::Values(values), *subquery.body); // values is also a valid table name let query = databricks_and_generic().verified_query(concat!( "WITH values AS (SELECT 42) ", "SELECT * FROM values", )); assert_eq!( Some(&table_from_name(ObjectName::from(vec![Ident::new( "values" )]))), query .body .as_select() .map(|select| &select.from[0].relation) ); // TODO: support this example from https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html#examples // databricks().verified_query("VALUES 1, 2, 3"); } #[test] fn parse_use() { let valid_object_names = ["mydb", "WAREHOUSE", "DEFAULT"]; let quote_styles = ['"', '`']; for object_name in &valid_object_names { // Test single identifier without quotes assert_eq!( databricks().verified_stmt(&format!("USE {object_name}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::new( object_name.to_string() )]))) ); for "e in "e_styles { // Test single identifier with different type of quotes assert_eq!( databricks().verified_stmt(&format!("USE {quote}{object_name}{quote}")), Statement::Use(Use::Object(ObjectName::from(vec![Ident::with_quote( quote, object_name.to_string(), )]))) ); } } for "e in "e_styles { // Test single identifier with keyword and different type of quotes assert_eq!( databricks().verified_stmt(&format!("USE CATALOG {quote}my_catalog{quote}")), Statement::Use(Use::Catalog(ObjectName::from(vec![Ident::with_quote( quote, "my_catalog".to_string(), )]))) ); assert_eq!( databricks().verified_stmt(&format!("USE DATABASE {quote}my_database{quote}")), Statement::Use(Use::Database(ObjectName::from(vec![Ident::with_quote( quote, "my_database".to_string(), )]))) ); assert_eq!( databricks().verified_stmt(&format!("USE SCHEMA {quote}my_schema{quote}")), Statement::Use(Use::Schema(ObjectName::from(vec![Ident::with_quote( quote, "my_schema".to_string(), )]))) ); } // Test single identifier with keyword and no quotes assert_eq!( databricks().verified_stmt("USE CATALOG my_catalog"), Statement::Use(Use::Catalog(ObjectName::from(vec![Ident::new( "my_catalog" )]))) ); assert_eq!( databricks().verified_stmt("USE DATABASE my_schema"), Statement::Use(Use::Database(ObjectName::from(vec![Ident::new( "my_schema" )]))) ); assert_eq!( databricks().verified_stmt("USE SCHEMA my_schema"), Statement::Use(Use::Schema(ObjectName::from(vec![Ident::new("my_schema")]))) ); // Test invalid syntax - missing identifier let invalid_cases = ["USE SCHEMA", "USE DATABASE", "USE CATALOG"]; for sql in &invalid_cases { assert_eq!( databricks().parse_sql_statements(sql).unwrap_err(), ParserError::ParserError("Expected: identifier, found: EOF".to_string()), ); } } #[test] fn parse_databricks_struct_function() { assert_eq!( databricks_and_generic() .verified_only_select("SELECT STRUCT(1, 'foo')") .projection[0], SelectItem::UnnamedExpr(Expr::Struct { values: vec![ Expr::value(number("1")), Expr::Value((Value::SingleQuotedString("foo".to_string())).with_empty_span()) ], fields: vec![] }) ); assert_eq!( databricks_and_generic() .verified_only_select("SELECT STRUCT(1 AS one, 'foo' AS foo, false)") .projection[0], SelectItem::UnnamedExpr(Expr::Struct { values: vec![ Expr::Named { expr: Expr::value(number("1")).into(), name: Ident::new("one") }, Expr::Named { expr: Expr::Value( (Value::SingleQuotedString("foo".to_string())).with_empty_span() ) .into(), name: Ident::new("foo") }, Expr::Value((Value::Boolean(false)).with_empty_span()) ], fields: vec![] }) ); } #[test] fn data_type_timestamp_ntz() { // Literal assert_eq!( databricks().verified_expr("TIMESTAMP_NTZ '2025-03-29T18:52:00'"), Expr::TypedString { data_type: DataType::TimestampNtz, value: ValueWithSpan { value: Value::SingleQuotedString("2025-03-29T18:52:00".to_owned()), span: Span::empty(), } } ); // Cast assert_eq!( databricks().verified_expr("(created_at)::TIMESTAMP_NTZ"), Expr::Cast { kind: CastKind::DoubleColon, expr: Box::new(Expr::Nested(Box::new(Expr::Identifier( "created_at".into() )))), data_type: DataType::TimestampNtz, format: None } ); // Column definition match databricks().verified_stmt("CREATE TABLE foo (x TIMESTAMP_NTZ)") { Statement::CreateTable(CreateTable { columns, .. }) => { assert_eq!( columns, vec![ColumnDef { name: "x".into(), data_type: DataType::TimestampNtz, options: vec![], }] ); } s => panic!("Unexpected statement: {s:?}"), } }