diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 089a6ffc..4e97c6e2 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -124,7 +124,7 @@ test_source_distribution() { cargo build cargo test --all-features - if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then + if ( find . -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then echo "Cargo.toml version should not contain SNAPSHOT for releases" exit 1 fi diff --git a/src/ast/comments.rs b/src/ast/comments.rs new file mode 100644 index 00000000..1f5b3102 --- /dev/null +++ b/src/ast/comments.rs @@ -0,0 +1,329 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Provides a representation of source code comments in parsed SQL code. +//! +//! See [Comments::find] for an example. + +#[cfg(not(feature = "std"))] +use alloc::{string::String, vec::Vec}; + +use core::{ + ops::{Bound, Deref, RangeBounds}, + slice, +}; + +use crate::tokenizer::{Location, Span}; + +/// An opaque container for comments from a parse SQL source code. +#[derive(Default, Debug)] +pub struct Comments(Vec); + +impl Comments { + /// Accepts `comment` if its the first or is located strictly after the + /// last accepted comment. In other words, this method will skip the + /// comment if its comming out of order (as encountered in the parsed + /// source code.) + pub(crate) fn offer(&mut self, comment: CommentWithSpan) { + if self + .0 + .last() + .map(|last| last.span < comment.span) + .unwrap_or(true) + { + self.0.push(comment); + } + } + + /// Finds comments starting within the given location range. The order of + /// iterator reflects the order of the comments as encountered in the parsed + /// source code. + /// + /// # Example + /// ```rust + /// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location}; + /// + /// let sql = r#"/* + /// header comment ... + /// ... spanning multiple lines + /// */ + /// + /// -- first statement + /// SELECT 'hello' /* world */ FROM DUAL; + /// + /// -- second statement + /// SELECT 123 FROM DUAL; + /// + /// -- trailing comment + /// "#; + /// + /// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap(); + /// + /// // all comments appearing before line seven, i.e. before the first statement itself + /// assert_eq!( + /// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::>(), + /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]); + /// + /// // all comments appearing within the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::>(), + /// &[" world "]); + /// + /// // all comments appearing within or after the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::>(), + /// &[" world ", " second statement\n", " trailing comment\n"]); + /// ``` + /// + /// The [Spanned](crate::ast::Spanned) trait allows you to access location + /// information for certain AST nodes. + pub fn find>(&self, range: R) -> Iter<'_> { + let (start, end) = ( + self.start_index(range.start_bound()), + self.end_index(range.end_bound()), + ); + debug_assert!((0..=self.0.len()).contains(&start)); + debug_assert!((0..=self.0.len()).contains(&end)); + // in case the user specified a reverse range + Iter(if start <= end { + self.0[start..end].iter() + } else { + self.0[0..0].iter() + }) + } + + /// Find the index of the first comment starting "before" the given location. + /// + /// The returned index is _inclusive_ and within the range of `0..=self.0.len()`. + fn start_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Unbounded => 0, + } + } + + /// Find the index of the first comment starting "after" the given location. + /// + /// The returned index is _exclusive_ and within the range of `0..=self.0.len()`. + fn end_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Unbounded => self.0.len(), + } + } +} + +impl From for Vec { + fn from(comments: Comments) -> Self { + comments.0 + } +} + +/// A source code comment with information of its entire span. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CommentWithSpan { + /// The source code comment iself + pub comment: Comment, + /// The span of the comment including its markers + pub span: Span, +} + +impl Deref for CommentWithSpan { + type Target = Comment; + + fn deref(&self) -> &Self::Target { + &self.comment + } +} + +/// A unified type of the different source code comment formats. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Comment { + /// A single line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + SingleLine { content: String, prefix: String }, + + /// A multi-line comment, typically enclosed in `/* .. */` markers. The + /// string represents the content excluding the markers. + MultiLine(String), +} + +impl Comment { + /// Retrieves the content of the comment as string slice. + pub fn as_str(&self) -> &str { + match self { + Comment::SingleLine { content, prefix: _ } => content.as_str(), + Comment::MultiLine(content) => content.as_str(), + } + } +} + +impl Deref for Comment { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +/// An opaque iterator implementation over comments served by [Comments::find]. +pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>); + +impl<'a> Iterator for Iter<'a> { + type Item = &'a CommentWithSpan; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find() { + let comments = { + // ``` + // -- abc + // /* hello */--, world + // /* def + // ghi + // jkl + // */ + // ``` + let mut c = Comments(Vec::new()); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: " abc".into(), + prefix: "--".into(), + }, + span: Span::new((1, 1).into(), (1, 7).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" hello ".into()), + span: Span::new((2, 3).into(), (2, 14).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: ", world".into(), + prefix: "--".into(), + }, + span: Span::new((2, 14).into(), (2, 21).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()), + span: Span::new((3, 3).into(), (7, 1).into()), + }); + c + }; + + fn find>(comments: &Comments, range: R) -> Vec<&str> { + comments.find(range).map(|c| c.as_str()).collect::>() + } + + // ~ end-points only -------------------------------------------------- + assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new()); + assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]); + assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ start-points only ------------------------------------------------ + assert_eq!( + find(&comments, Location::new(1000, 1000)..), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(2, 14)..), + vec![", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(2, 15)..), + vec![" def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(0, 0)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + + // ~ ranges ----------------------------------------------------------- + assert_eq!( + find(&comments, Location::new(2, 1)..Location::new(1, 1)), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 3)), + vec![" abc"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 10)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 14)), + vec![" abc", " hello ", ", world"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ find everything -------------------------------------------------- + assert_eq!( + find(&comments, ..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + } +} diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 0df53c14..4e042a36 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -61,7 +61,7 @@ use crate::tokenizer::{Span, Token}; #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IndexColumn { pub column: OrderByExpr, - pub operator_class: Option, + pub operator_class: Option, } impl From for IndexColumn { @@ -371,10 +371,15 @@ pub enum AlterTableOperation { DropClusteringKey, SuspendRecluster, ResumeRecluster, - /// `REFRESH` + /// `REFRESH [ '' ]` /// - /// Note: this is Snowflake specific for dynamic tables - Refresh, + /// Note: this is Snowflake specific for dynamic/external tables + /// + /// + Refresh { + /// Optional subpath for external table refresh + subpath: Option, + }, /// `SUSPEND` /// /// Note: this is Snowflake specific for dynamic tables @@ -863,8 +868,12 @@ impl fmt::Display for AlterTableOperation { write!(f, "RESUME RECLUSTER")?; Ok(()) } - AlterTableOperation::Refresh => { - write!(f, "REFRESH") + AlterTableOperation::Refresh { subpath } => { + write!(f, "REFRESH")?; + if let Some(path) = subpath { + write!(f, " '{path}'")?; + } + Ok(()) } AlterTableOperation::Suspend => { write!(f, "SUSPEND") @@ -995,6 +1004,103 @@ impl fmt::Display for AlterTypeOperation { } } +/// `ALTER OPERATOR` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperator { + /// Operator name (can be schema-qualified) + pub name: ObjectName, + /// Left operand type (`None` if no left operand) + pub left_type: Option, + /// Right operand type + pub right_type: DataType, + /// The operation to perform + pub operation: AlterOperatorOperation, +} + +/// An [AlterOperator] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorOperation { + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { schema_name: ObjectName }, + /// `SET ( options )` + Set { + /// List of operator options to set + options: Vec, + }, +} + +/// Option for `ALTER OPERATOR SET` operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorOption { + /// `RESTRICT = { res_proc | NONE }` + Restrict(Option), + /// `JOIN = { join_proc | NONE }` + Join(Option), + /// `COMMUTATOR = com_op` + Commutator(ObjectName), + /// `NEGATOR = neg_op` + Negator(ObjectName), + /// `HASHES` + Hashes, + /// `MERGES` + Merges, +} + +impl fmt::Display for AlterOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ALTER OPERATOR {} (", self.name)?; + if let Some(left_type) = &self.left_type { + write!(f, "{}", left_type)?; + } else { + write!(f, "NONE")?; + } + write!(f, ", {}) {}", self.right_type, self.operation) + } +} + +impl fmt::Display for AlterOperatorOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::OwnerTo(owner) => write!(f, "OWNER TO {}", owner), + Self::SetSchema { schema_name } => write!(f, "SET SCHEMA {}", schema_name), + Self::Set { options } => { + write!(f, "SET (")?; + for (i, option) in options.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", option)?; + } + write!(f, ")") + } + } + } +} + +impl fmt::Display for OperatorOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Restrict(Some(proc_name)) => write!(f, "RESTRICT = {}", proc_name), + Self::Restrict(None) => write!(f, "RESTRICT = NONE"), + Self::Join(Some(proc_name)) => write!(f, "JOIN = {}", proc_name), + Self::Join(None) => write!(f, "JOIN = NONE"), + Self::Commutator(op_name) => write!(f, "COMMUTATOR = {}", op_name), + Self::Negator(op_name) => write!(f, "NEGATOR = {}", op_name), + Self::Hashes => write!(f, "HASHES"), + Self::Merges => write!(f, "MERGES"), + } + } +} + /// An `ALTER COLUMN` (`Statement::AlterTable`) operation #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1790,7 +1896,7 @@ impl fmt::Display for ColumnOption { GeneratedAs::Always => "ALWAYS", GeneratedAs::ByDefault => "BY DEFAULT", // ExpStored goes with an expression, handled above - GeneratedAs::ExpStored => unreachable!(), + GeneratedAs::ExpStored => "", }; write!(f, "GENERATED {when} AS IDENTITY")?; if sequence_options.is_some() { @@ -2803,7 +2909,9 @@ impl fmt::Display for CreateTable { if let Some(file_format) = self.file_format { write!(f, " STORED AS {file_format}")?; } - write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; + if let Some(location) = &self.location { + write!(f, " LOCATION '{location}'")?; + } } match &self.table_options { @@ -3878,8 +3986,11 @@ pub enum AlterTableType { /// Iceberg, /// Dynamic table type - /// + /// Dynamic, + /// External table type + /// + External, } /// ALTER TABLE statement @@ -3909,6 +4020,7 @@ impl fmt::Display for AlterTable { match &self.table_type { Some(AlterTableType::Iceberg) => write!(f, "ALTER ICEBERG TABLE ")?, Some(AlterTableType::Dynamic) => write!(f, "ALTER DYNAMIC TABLE ")?, + Some(AlterTableType::External) => write!(f, "ALTER EXTERNAL TABLE ")?, None => write!(f, "ALTER TABLE ")?, } @@ -3979,18 +4091,8 @@ pub struct CreateOperator { pub left_arg: Option, /// RIGHTARG parameter (right operand type) pub right_arg: Option, - /// COMMUTATOR parameter (commutator operator) - pub commutator: Option, - /// NEGATOR parameter (negator operator) - pub negator: Option, - /// RESTRICT parameter (restriction selectivity function) - pub restrict: Option, - /// JOIN parameter (join selectivity function) - pub join: Option, - /// HASHES flag - pub hashes: bool, - /// MERGES flag - pub merges: bool, + /// Operator options (COMMUTATOR, NEGATOR, RESTRICT, JOIN, HASHES, MERGES) + pub options: Vec, } /// CREATE OPERATOR FAMILY statement @@ -4042,23 +4144,9 @@ impl fmt::Display for CreateOperator { if let Some(right_arg) = &self.right_arg { params.push(format!("RIGHTARG = {}", right_arg)); } - if let Some(commutator) = &self.commutator { - params.push(format!("COMMUTATOR = {}", commutator)); - } - if let Some(negator) = &self.negator { - params.push(format!("NEGATOR = {}", negator)); - } - if let Some(restrict) = &self.restrict { - params.push(format!("RESTRICT = {}", restrict)); - } - if let Some(join) = &self.join { - params.push(format!("JOIN = {}", join)); - } - if self.hashes { - params.push("HASHES".to_string()); - } - if self.merges { - params.push("MERGES".to_string()); + + for option in &self.options { + params.push(option.to_string()); } write!(f, "{}", params.join(", "))?; @@ -4110,25 +4198,25 @@ impl fmt::Display for OperatorArgTypes { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorClassItem { - /// OPERATOR clause + /// `OPERATOR` clause Operator { - strategy_number: u32, + strategy_number: u64, operator_name: ObjectName, /// Optional operator argument types op_types: Option, - /// FOR SEARCH or FOR ORDER BY + /// `FOR SEARCH` or `FOR ORDER BY` purpose: Option, }, - /// FUNCTION clause + /// `FUNCTION` clause Function { - support_number: u32, + support_number: u64, /// Optional function argument types for the operator class op_types: Option>, function_name: ObjectName, /// Function argument types argument_types: Vec, }, - /// STORAGE clause + /// `STORAGE` clause Storage { storage_type: DataType }, } @@ -4325,3 +4413,189 @@ impl Spanned for DropOperatorClass { Span::empty() } } + +/// An item in an ALTER OPERATOR FAMILY ADD statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + operator_name: ObjectName, + /// Operator argument types + op_types: Vec, + /// `FOR SEARCH` or `FOR ORDER BY` + purpose: Option, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Optional operator argument types for the function + op_types: Option>, + function_name: ObjectName, + /// Function argument types + argument_types: Vec, + }, +} + +/// An item in an ALTER OPERATOR FAMILY DROP statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyDropItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + /// Operator argument types + op_types: Vec, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Operator argument types for the function + op_types: Vec, + }, +} + +impl fmt::Display for OperatorFamilyItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + write!( + f, + "OPERATOR {strategy_number} {operator_name} ({})", + display_comma_separated(op_types) + )?; + if let Some(purpose) = purpose { + write!(f, " {purpose}")?; + } + Ok(()) + } + OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + write!(f, "FUNCTION {support_number}")?; + if let Some(types) = op_types { + write!(f, " ({})", display_comma_separated(types))?; + } + write!(f, " {function_name}")?; + if !argument_types.is_empty() { + write!(f, "({})", display_comma_separated(argument_types))?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for OperatorFamilyDropItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + } => { + write!( + f, + "OPERATOR {strategy_number} ({})", + display_comma_separated(op_types) + ) + } + OperatorFamilyDropItem::Function { + support_number, + op_types, + } => { + write!( + f, + "FUNCTION {support_number} ({})", + display_comma_separated(op_types) + ) + } + } + } +} + +/// `ALTER OPERATOR FAMILY` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorFamily { + /// Operator family name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorFamilyOperation, +} + +/// An [AlterOperatorFamily] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorFamilyOperation { + /// `ADD { OPERATOR ... | FUNCTION ... } [, ...]` + Add { + /// List of operator family items to add + items: Vec, + }, + /// `DROP { OPERATOR ... | FUNCTION ... } [, ...]` + Drop { + /// List of operator family items to drop + items: Vec, + }, + /// `RENAME TO new_name` + RenameTo { new_name: ObjectName }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { schema_name: ObjectName }, +} + +impl fmt::Display for AlterOperatorFamily { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER OPERATOR FAMILY {} USING {}", + self.name, self.using + )?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorFamilyOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorFamilyOperation::Add { items } => { + write!(f, "ADD {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::Drop { items } => { + write!(f, "DROP {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorFamilyOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorFamilyOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d6009ce8..d740b140 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -24,13 +24,16 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::display_utils::{indented_list, Indent, SpaceOrNewline}; +use crate::{ + ast::display_separated, + display_utils::{indented_list, Indent, SpaceOrNewline}, +}; use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, - OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, - UpdateTableFromKind, + OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, TableFactor, + TableObject, TableWithJoins, UpdateTableFromKind, Values, }; /// INSERT statement. @@ -310,3 +313,334 @@ impl Display for Update { Ok(()) } } + +/// A `MERGE` statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Merge { + /// The `MERGE` token that starts the statement. + pub merge_token: AttachedToken, + /// optional INTO keyword + pub into: bool, + /// Specifies the table to merge + pub table: TableFactor, + /// Specifies the table or subquery to join with the target table + pub source: TableFactor, + /// Specifies the expression on which to join the target table and source + pub on: Box, + /// Specifies the actions to perform when values match or do not match. + pub clauses: Vec, + // Specifies the output to save changes in MSSQL + pub output: Option, +} + +impl Display for Merge { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MERGE{int} {table} USING {source} ", + int = if self.into { " INTO" } else { "" }, + table = self.table, + source = self.source, + )?; + write!(f, "ON {on} ", on = self.on)?; + write!(f, "{}", display_separated(&self.clauses, " "))?; + if let Some(ref output) = self.output { + write!(f, " {output}")?; + } + Ok(()) + } +} + +/// A `WHEN` clause within a `MERGE` Statement +/// +/// Example: +/// ```sql +/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeClause { + /// The `WHEN` token that starts the sub-expression. + pub when_token: AttachedToken, + pub clause_kind: MergeClauseKind, + pub predicate: Option, + pub action: MergeAction, +} + +impl Display for MergeClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let MergeClause { + when_token: _, + clause_kind, + predicate, + action, + } = self; + + write!(f, "WHEN {clause_kind}")?; + if let Some(pred) = predicate { + write!(f, " AND {pred}")?; + } + write!(f, " THEN {action}") + } +} + +/// Variant of `WHEN` clause used within a `MERGE` Statement. +/// +/// Example: +/// ```sql +/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeClauseKind { + /// `WHEN MATCHED` + Matched, + /// `WHEN NOT MATCHED` + NotMatched, + /// `WHEN MATCHED BY TARGET` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedByTarget, + /// `WHEN MATCHED BY SOURCE` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedBySource, +} + +impl Display for MergeClauseKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeClauseKind::Matched => write!(f, "MATCHED"), + MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), + MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), + MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), + } + } +} + +/// Underlying statement of a `WHEN` clause within a `MERGE` Statement +/// +/// Example +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeAction { + /// An `INSERT` clause + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// ``` + Insert(MergeInsertExpr), + /// An `UPDATE` clause + /// + /// Example: + /// ```sql + /// UPDATE SET quantity = T.quantity + S.quantity + /// ``` + Update(MergeUpdateExpr), + /// A plain `DELETE` clause + Delete { + /// The `DELETE` token that starts the sub-expression. + delete_token: AttachedToken, + }, +} + +impl Display for MergeAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeAction::Insert(insert) => { + write!(f, "INSERT {insert}") + } + MergeAction::Update(update) => { + write!(f, "UPDATE {update}") + } + MergeAction::Delete { .. } => { + write!(f, "DELETE") + } + } + } +} + +/// The type of expression used to insert rows within a `MERGE` statement. +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeInsertKind { + /// The insert expression is defined from an explicit `VALUES` clause + /// + /// Example: + /// ```sql + /// INSERT VALUES(product, quantity) + /// ``` + Values(Values), + /// The insert expression is defined using only the `ROW` keyword. + /// + /// Example: + /// ```sql + /// INSERT ROW + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + Row, +} + +impl Display for MergeInsertKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeInsertKind::Values(values) => { + write!(f, "{values}") + } + MergeInsertKind::Row => { + write!(f, "ROW") + } + } + } +} + +/// The expression used to insert rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// INSERT ROW +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeInsertExpr { + /// The `INSERT` token that starts the sub-expression. + pub insert_token: AttachedToken, + /// Columns (if any) specified by the insert. + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// INSERT (product, quantity) ROW + /// ``` + pub columns: Vec, + /// The token, `[VALUES | ROW]` starting `kind`. + pub kind_token: AttachedToken, + /// The insert type used by the statement. + pub kind: MergeInsertKind, + /// An optional condition to restrict the insertion (Oracle specific) + pub insert_predicate: Option, +} + +impl Display for MergeInsertExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.columns.is_empty() { + write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; + } + write!(f, "{}", self.kind)?; + if let Some(predicate) = self.insert_predicate.as_ref() { + write!(f, " WHERE {}", predicate)?; + } + Ok(()) + } +} + +/// The expression used to update rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// UPDATE SET quantity = T.quantity + S.quantity +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeUpdateExpr { + /// The `UPDATE` token that starts the sub-expression. + pub update_token: AttachedToken, + /// The update assiment expressions + pub assignments: Vec, + /// `where_clause` for the update (Oralce specific) + pub update_predicate: Option, + /// `delete_clause` for the update "delete where" (Oracle specific) + pub delete_predicate: Option, +} + +impl Display for MergeUpdateExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SET {}", display_comma_separated(&self.assignments))?; + if let Some(predicate) = self.update_predicate.as_ref() { + write!(f, " WHERE {predicate}")?; + } + if let Some(predicate) = self.delete_predicate.as_ref() { + write!(f, " DELETE WHERE {predicate}")?; + } + Ok(()) + } +} + +/// A `OUTPUT` Clause in the end of a `MERGE` Statement +/// +/// Example: +/// OUTPUT $action, deleted.* INTO dbo.temp_products; +/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OutputClause { + Output { + output_token: AttachedToken, + select_items: Vec, + into_table: Option, + }, + Returning { + returning_token: AttachedToken, + select_items: Vec, + }, +} + +impl fmt::Display for OutputClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OutputClause::Output { + output_token: _, + select_items, + into_table, + } => { + f.write_str("OUTPUT ")?; + display_comma_separated(select_items).fmt(f)?; + if let Some(into_table) = into_table { + f.write_str(" ")?; + into_table.fmt(f)?; + } + Ok(()) + } + OutputClause::Returning { + returning_token: _, + select_items, + } => { + f.write_str("RETURNING ")?; + display_comma_separated(select_items).fmt(f) + } + } + } +} diff --git a/src/ast/helpers/stmt_data_loading.rs b/src/ast/helpers/stmt_data_loading.rs index 92a72727..62ee77ce 100644 --- a/src/ast/helpers/stmt_data_loading.rs +++ b/src/ast/helpers/stmt_data_loading.rs @@ -99,15 +99,15 @@ impl fmt::Display for StageParamsObject { impl fmt::Display for StageLoadSelectItem { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.alias.is_some() { - write!(f, "{}.", self.alias.as_ref().unwrap())?; + if let Some(alias) = &self.alias { + write!(f, "{alias}.")?; } write!(f, "${}", self.file_col_num)?; - if self.element.is_some() { - write!(f, ":{}", self.element.as_ref().unwrap())?; + if let Some(element) = &self.element { + write!(f, ":{element}")?; } - if self.item_as.is_some() { - write!(f, " AS {}", self.item_as.as_ref().unwrap())?; + if let Some(item_as) = &self.item_as { + write!(f, " AS {item_as}")?; } Ok(()) } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 44d50c13..46767860 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -59,7 +59,8 @@ pub use self::dcl::{ AlterRoleOperation, CreateRole, ResetConfig, RoleOption, SecondaryRoles, SetConfigValue, Use, }; pub use self::ddl::{ - Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, + Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, + AlterOperatorFamily, AlterOperatorFamilyOperation, AlterOperatorOperation, AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, @@ -71,13 +72,17 @@ pub use self::ddl::{ DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorPurpose, Owner, Partition, ProcedureParam, - ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, - Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, + OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, + OperatorOption, OperatorPurpose, Owner, Partition, ProcedureParam, ReferentialAction, + RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; -pub use self::dml::{Delete, Insert, Update}; +pub use self::dml::{ + Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, OutputClause, Update, +}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, @@ -107,7 +112,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - NormalizationForm, TrimWhereField, Value, ValueWithSpan, + NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan, }; use crate::ast::helpers::key_value_options::KeyValueOptions; @@ -133,6 +138,7 @@ mod query; mod spans; pub use spans::Spanned; +pub mod comments; mod trigger; mod value; @@ -344,6 +350,12 @@ impl From> for ObjectName { } } +impl From for ObjectName { + fn from(ident: Ident) -> Self { + ObjectName(vec![ObjectNamePart::Identifier(ident)]) + } +} + impl fmt::Display for ObjectName { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", display_separated(&self.0, ".")) @@ -3396,6 +3408,16 @@ pub enum Statement { /// ``` AlterType(AlterType), /// ```sql + /// ALTER OPERATOR + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html) + AlterOperator(AlterOperator), + /// ```sql + /// ALTER OPERATOR FAMILY + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html) + AlterOperatorFamily(AlterOperatorFamily), + /// ```sql /// ALTER ROLE /// ``` AlterRole { @@ -4082,22 +4104,7 @@ pub enum Statement { /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) /// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/statements/merge-transact-sql?view=sql-server-ver16) - Merge { - /// The `MERGE` token that starts the statement. - merge_token: AttachedToken, - /// optional INTO keyword - into: bool, - /// Specifies the table to merge - table: TableFactor, - /// Specifies the table or subquery to join with the target table - source: TableFactor, - /// Specifies the expression on which to join the target table and source - on: Box, - /// Specifies the actions to perform when values match or do not match. - clauses: Vec, - // Specifies the output to save changes in MSSQL - output: Option, - }, + Merge(Merge), /// ```sql /// CACHE [ FLAG ] TABLE [ OPTIONS('K1' = 'V1', 'K2' = V2) ] [ AS ] [ ] /// ``` @@ -4971,6 +4978,10 @@ impl fmt::Display for Statement { Statement::AlterType(AlterType { name, operation }) => { write!(f, "ALTER TYPE {name} {operation}") } + Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"), + Statement::AlterOperatorFamily(alter_operator_family) => { + write!(f, "{alter_operator_family}") + } Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } @@ -5514,27 +5525,7 @@ impl fmt::Display for Statement { Statement::ReleaseSavepoint { name } => { write!(f, "RELEASE SAVEPOINT {name}") } - Statement::Merge { - merge_token: _, - into, - table, - source, - on, - clauses, - output, - } => { - write!( - f, - "MERGE{int} {table} USING {source} ", - int = if *into { " INTO" } else { "" } - )?; - write!(f, "ON {on} ")?; - write!(f, "{}", display_separated(clauses, " "))?; - if let Some(output) = output { - write!(f, " {output}")?; - } - Ok(()) - } + Statement::Merge(merge) => merge.fmt(f), Statement::Cache { table_name, table_flag, @@ -8559,257 +8550,6 @@ impl fmt::Display for CopyLegacyCsvOption { } } -/// Variant of `WHEN` clause used within a `MERGE` Statement. -/// -/// Example: -/// ```sql -/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeClauseKind { - /// `WHEN MATCHED` - Matched, - /// `WHEN NOT MATCHED` - NotMatched, - /// `WHEN MATCHED BY TARGET` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedByTarget, - /// `WHEN MATCHED BY SOURCE` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedBySource, -} - -impl Display for MergeClauseKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeClauseKind::Matched => write!(f, "MATCHED"), - MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), - MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), - MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), - } - } -} - -/// The type of expression used to insert rows within a `MERGE` statement. -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeInsertKind { - /// The insert expression is defined from an explicit `VALUES` clause - /// - /// Example: - /// ```sql - /// INSERT VALUES(product, quantity) - /// ``` - Values(Values), - /// The insert expression is defined using only the `ROW` keyword. - /// - /// Example: - /// ```sql - /// INSERT ROW - /// ``` - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - Row, -} - -impl Display for MergeInsertKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeInsertKind::Values(values) => { - write!(f, "{values}") - } - MergeInsertKind::Row => { - write!(f, "ROW") - } - } - } -} - -/// The expression used to insert rows within a `MERGE` statement. -/// -/// Examples -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// INSERT ROW -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeInsertExpr { - /// The `INSERT` token that starts the sub-expression. - pub insert_token: AttachedToken, - /// Columns (if any) specified by the insert. - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// INSERT (product, quantity) ROW - /// ``` - pub columns: Vec, - /// The token, `[VALUES | ROW]` starting `kind`. - pub kind_token: AttachedToken, - /// The insert type used by the statement. - pub kind: MergeInsertKind, -} - -impl Display for MergeInsertExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if !self.columns.is_empty() { - write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; - } - write!(f, "{}", self.kind) - } -} - -/// Underlying statement of a when clause within a `MERGE` Statement -/// -/// Example -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeAction { - /// An `INSERT` clause - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// ``` - Insert(MergeInsertExpr), - /// An `UPDATE` clause - /// - /// Example: - /// ```sql - /// UPDATE SET quantity = T.quantity + S.quantity - /// ``` - Update { - /// The `UPDATE` token that starts the sub-expression. - update_token: AttachedToken, - assignments: Vec, - }, - /// A plain `DELETE` clause - Delete { - /// The `DELETE` token that starts the sub-expression. - delete_token: AttachedToken, - }, -} - -impl Display for MergeAction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeAction::Insert(insert) => { - write!(f, "INSERT {insert}") - } - MergeAction::Update { assignments, .. } => { - write!(f, "UPDATE SET {}", display_comma_separated(assignments)) - } - MergeAction::Delete { .. } => { - write!(f, "DELETE") - } - } - } -} - -/// A when clause within a `MERGE` Statement -/// -/// Example: -/// ```sql -/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeClause { - /// The `WHEN` token that starts the sub-expression. - pub when_token: AttachedToken, - pub clause_kind: MergeClauseKind, - pub predicate: Option, - pub action: MergeAction, -} - -impl Display for MergeClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let MergeClause { - when_token: _, - clause_kind, - predicate, - action, - } = self; - - write!(f, "WHEN {clause_kind}")?; - if let Some(pred) = predicate { - write!(f, " AND {pred}")?; - } - write!(f, " THEN {action}") - } -} - -/// A Output Clause in the end of a 'MERGE' Statement -/// -/// Example: -/// OUTPUT $action, deleted.* INTO dbo.temp_products; -/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum OutputClause { - Output { - output_token: AttachedToken, - select_items: Vec, - into_table: Option, - }, - Returning { - returning_token: AttachedToken, - select_items: Vec, - }, -} - -impl fmt::Display for OutputClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - OutputClause::Output { - output_token: _, - select_items, - into_table, - } => { - f.write_str("OUTPUT ")?; - display_comma_separated(select_items).fmt(f)?; - if let Some(into_table) = into_table { - f.write_str(" ")?; - into_table.fmt(f)?; - } - Ok(()) - } - OutputClause::Returning { - returning_token: _, - select_items, - } => { - f.write_str("RETURNING ")?; - display_comma_separated(select_items).fmt(f) - } - } - } -} - #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -9814,8 +9554,8 @@ impl fmt::Display for ShowCharset { } else { write!(f, " CHARACTER SET")?; } - if self.filter.is_some() { - write!(f, " {}", self.filter.as_ref().unwrap())?; + if let Some(filter) = &self.filter { + write!(f, " {filter}")?; } Ok(()) } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 684cc5b0..d4e84315 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -28,7 +28,7 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, + comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget, AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, @@ -38,15 +38,15 @@ use super::{ FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause, - MatchRecognizePattern, Measure, MergeAction, MergeClause, MergeInsertExpr, MergeInsertKind, - NamedParenthesizedList, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, - OnConflictAction, OnInsert, OpenStatement, OrderBy, OrderByExpr, OrderByKind, OutputClause, - Partition, PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, - ReferentialAction, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, - SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, - TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, - TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, - WildcardAdditionalOptions, With, WithFill, + MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName, + ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy, + OrderByExpr, OrderByKind, OutputClause, Partition, PivotValueSource, ProjectionSelect, Query, + RaiseStatement, RaiseStatementValue, ReferentialAction, RenameSelectItem, ReplaceSelectElement, + ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, + SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, + TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, + ViewColumnDef, WhileStatement, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -252,6 +252,7 @@ impl Spanned for Values { /// - [Statement::CreateSecret] /// - [Statement::CreateRole] /// - [Statement::AlterType] +/// - [Statement::AlterOperator] /// - [Statement::AlterRole] /// - [Statement::AttachDatabase] /// - [Statement::AttachDuckDBDatabase] @@ -401,6 +402,8 @@ impl Spanned for Statement { ), // These statements need to be implemented Statement::AlterType { .. } => Span::empty(), + Statement::AlterOperator { .. } => Span::empty(), + Statement::AlterOperatorFamily { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), @@ -451,20 +454,7 @@ impl Spanned for Statement { Statement::Explain { .. } => Span::empty(), Statement::Savepoint { .. } => Span::empty(), Statement::ReleaseSavepoint { .. } => Span::empty(), - Statement::Merge { - merge_token, - into: _, - table: _, - source: _, - on, - clauses, - output, - } => union_spans( - [merge_token.0.span, on.span()] - .into_iter() - .chain(clauses.iter().map(Spanned::span)) - .chain(output.iter().map(Spanned::span)), - ), + Statement::Merge(merge) => merge.span(), Statement::Cache { .. } => Span::empty(), Statement::UNCache { .. } => Span::empty(), Statement::CreateSequence { .. } => Span::empty(), @@ -925,6 +915,17 @@ impl Spanned for Update { } } +impl Spanned for Merge { + fn span(&self) -> Span { + union_spans( + [self.merge_token.0.span, self.on.span()] + .into_iter() + .chain(self.clauses.iter().map(Spanned::span)) + .chain(self.output.iter().map(Spanned::span)), + ) + } +} + impl Spanned for FromTable { fn span(&self) -> Span { match self { @@ -1145,7 +1146,7 @@ impl Spanned for AlterTableOperation { AlterTableOperation::DropClusteringKey => Span::empty(), AlterTableOperation::SuspendRecluster => Span::empty(), AlterTableOperation::ResumeRecluster => Span::empty(), - AlterTableOperation::Refresh => Span::empty(), + AlterTableOperation::Refresh { .. } => Span::empty(), AlterTableOperation::Suspend => Span::empty(), AlterTableOperation::Resume => Span::empty(), AlterTableOperation::Algorithm { .. } => Span::empty(), @@ -2419,12 +2420,7 @@ impl Spanned for MergeAction { fn span(&self) -> Span { match self { MergeAction::Insert(expr) => expr.span(), - MergeAction::Update { - update_token, - assignments, - } => union_spans( - core::iter::once(update_token.0.span).chain(assignments.iter().map(Spanned::span)), - ), + MergeAction::Update(expr) => expr.span(), MergeAction::Delete { delete_token } => delete_token.0.span, } } @@ -2442,7 +2438,19 @@ impl Spanned for MergeInsertExpr { }, ] .into_iter() - .chain(self.columns.iter().map(|i| i.span)), + .chain(self.insert_predicate.iter().map(Spanned::span)) + .chain(self.columns.iter().map(|i| i.span())), + ) + } +} + +impl Spanned for MergeUpdateExpr { + fn span(&self) -> Span { + union_spans( + core::iter::once(self.update_token.0.span) + .chain(self.assignments.iter().map(Spanned::span)) + .chain(self.update_predicate.iter().map(Spanned::span)) + .chain(self.delete_predicate.iter().map(Spanned::span)), ) } } @@ -2470,6 +2478,12 @@ impl Spanned for OutputClause { } } +impl Spanned for comments::CommentWithSpan { + fn span(&self) -> Span { + self.span + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; @@ -2766,7 +2780,7 @@ WHERE id = 1 assert_eq!(stmt_span.end, (16, 67).into()); // ~ individual tokens within the statement - let Statement::Merge { + let Statement::Merge(Merge { merge_token, into: _, table: _, @@ -2774,7 +2788,7 @@ WHERE id = 1 on: _, clauses, output, - } = &r[0] + }) = &r[0] else { panic!("not a MERGE statement"); }; @@ -2812,10 +2826,12 @@ WHERE id = 1 clauses[1].when_token.0.span, Span::new(Location::new(12, 17), Location::new(12, 21)) ); - if let MergeAction::Update { + if let MergeAction::Update(MergeUpdateExpr { update_token, assignments: _, - } = &clauses[1].action + update_predicate: _, + delete_predicate: _, + }) = &clauses[1].action { assert_eq!( update_token.0.span, @@ -2888,7 +2904,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Returning { returning_token, .. }) = output @@ -2922,7 +2938,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Output { output_token, .. }) = output { assert_eq!( output_token.0.span, @@ -2935,4 +2951,44 @@ WHERE id = 1 panic!("not a MERGE statement"); }; } + + #[test] + fn test_merge_statement_spans_with_update_predicates() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN MATCHED THEN + UPDATE set a.x = a.x + b.x + WHERE b.x != 2 + DELETE WHERE a.x <> 3"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(6, 36)) + ); + } + + #[test] + fn test_merge_statement_spans_with_insert_predicate() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN NOT MATCHED THEN + INSERT VALUES (b.x, b.y) WHERE b.x != 2 +-- qed +"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(4, 52)) + ); + } } diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a67..ccbb12a3 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// X'hex value' HexStringLiteral(String), @@ -207,6 +213,8 @@ impl Value { | Value::NationalStringLiteral(s) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), + Value::QuoteDelimitedStringLiteral(s) => Some(s.value), + Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value), _ => None, } } @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(v) => v.fmt(f), + Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), @@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString { } } +/// A quote delimited string literal, e.g. `Q'_abc_'`. +/// +/// See [Value::QuoteDelimitedStringLiteral] and/or +/// [Value::NationalQuoteDelimitedStringLiteral]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct QuoteDelimitedString { + /// the quote start character; i.e. the character _after_ the opening `Q'` + pub start_quote: char, + /// the string literal value itself + pub value: String, + /// the quote end character; i.e. the character _before_ the closing `'` + pub end_quote: char, +} + +impl fmt::Display for QuoteDelimitedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b52..bbedbc05 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,4 +195,8 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 83c6da48..1a416e4d 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -603,7 +603,7 @@ pub trait Dialect: Debug + Any { false } - /// Return true if the dialect supports specifying multiple options + /// Returns true if the dialect supports specifying multiple options /// in a `CREATE TABLE` statement for the structure of the new table. For example: /// `CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a` fn supports_create_table_multi_schema_info_sources(&self) -> bool { @@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any { fn supports_semantic_view_table_factor(&self) -> bool { false } + + /// Support quote delimited string literals, e.g. `Q'{...}'` + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + fn supports_quote_delimited_string(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index e1902b38..faf3402c 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -21,14 +21,12 @@ use crate::ast::{ GranteesType, IfStatement, Statement, }; use crate::dialect::Dialect; -use crate::keywords::{self, Keyword}; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::{vec, vec::Vec}; -const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[Keyword::IF, Keyword::ELSE]; - /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] pub struct MsSqlDialect {} @@ -128,8 +126,22 @@ impl Dialect for MsSqlDialect { &[GranteesType::Public] } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { - !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) + fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as select item aliases in MSSQL + // regardless of whether the alias is explicit or implicit + Keyword::IF | Keyword::ELSE => false, + _ => explicit || self.is_column_alias(kw, parser), + } + } + + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as table aliases in MSSQL + // regardless of whether the alias is explicit or implicit + Keyword::IF | Keyword::ELSE => false, + _ => explicit || self.is_table_alias(kw, parser), + } } fn parse_statement(&self, parser: &mut Parser) -> Option> { diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index 0d6aee5e..54c2ace5 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -15,7 +15,14 @@ // specific language governing permissions and limitations // under the License. -use super::Dialect; +use log::debug; + +use crate::{ + parser::{Parser, ParserError}, + tokenizer::Token, +}; + +use super::{Dialect, Precedence}; /// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) #[derive(Debug)] @@ -75,7 +82,21 @@ impl Dialect for OracleDialect { true } + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let t = parser.peek_token(); + debug!("get_next_precedence() {t:?}"); + + match t.token { + Token::StringConcat => Some(Ok(self.prec_value(Precedence::PlusMinus))), + _ => None, + } + } + fn supports_group_by_expr(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 4cfaddce..ed01c128 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -221,6 +221,11 @@ impl Dialect for SnowflakeDialect { return Some(parse_alter_dynamic_table(parser)); } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::EXTERNAL, Keyword::TABLE]) { + // ALTER EXTERNAL TABLE + return Some(parse_alter_external_table(parser)); + } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) { // ALTER SESSION let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { @@ -619,7 +624,7 @@ fn parse_alter_dynamic_table(parser: &mut Parser) -> Result Result +fn parse_alter_external_table(parser: &mut Parser) -> Result { + let if_exists = parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = parser.parse_object_name(true)?; + + // Parse the operation (REFRESH for now) + let operation = if parser.parse_keyword(Keyword::REFRESH) { + // Optional subpath for refreshing specific partitions + let subpath = match parser.peek_token().token { + Token::SingleQuotedString(s) => { + parser.next_token(); + Some(s) + } + _ => None, + }; + AlterTableOperation::Refresh { subpath } + } else { + return parser.expected("REFRESH after ALTER EXTERNAL TABLE", parser.peek_token()); + }; + + let end_token = if parser.peek_token_ref().token == Token::SemiColon { + parser.peek_token_ref().clone() + } else { + parser.get_current_token().clone() + }; + + Ok(Statement::AlterTable(AlterTable { + name: table_name, + if_exists, + only: false, + operations: vec![operation], + location: None, + on_cluster: None, + table_type: Some(AlterTableType::External), + end_token: AttachedToken(end_token), + })) +} + /// Parse snowflake alter session. /// fn parse_alter_session(parser: &mut Parser, set: bool) -> Result { diff --git a/src/keywords.rs b/src/keywords.rs index 827df1ce..f06842ec 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -67,6 +67,15 @@ macro_rules! define_keywords { pub const ALL_KEYWORDS: &[&str] = &[ $($ident),* ]; + + impl core::fmt::Display for Keyword { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + Keyword::NoKeyword => write!(f, "NoKeyword"), + $(Keyword::$ident => write!(f, "{}", $ident),)* + } + } + } }; } diff --git a/src/lib.rs b/src/lib.rs index dbfd1791..4050173c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,6 +153,7 @@ // Splitting complex nodes (expressions, statements, types) into separate types // would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. #![allow(clippy::large_enum_variant)] +#![forbid(clippy::unreachable)] // Allow proc-macros to find this crate extern crate self as sqlparser; diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 00000000..2bc1544f --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,242 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for a `MERGE` statement + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, vec, vec::Vec}; + +use crate::{ + ast::{ + Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + MergeUpdateExpr, ObjectName, OutputClause, SetExpr, Statement, + }, + dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, + keywords::Keyword, + parser::IsOptional, + tokenizer::TokenWithSpan, +}; + +use super::{Parser, ParserError}; + +impl Parser<'_> { + /// Parse a `MERGE` statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(super) fn parse_merge_setexpr_boxed( + &mut self, + merge_token: TokenWithSpan, + ) -> Result, ParserError> { + Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) + } + + pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword_is(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword_is(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses()?; + let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { + Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), + None => None, + }; + + Ok(Statement::Merge(Merge { + merge_token: merge_token.into(), + into, + table, + source, + on: Box::new(on), + clauses, + output, + })) + } + + fn parse_merge_clauses(&mut self) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if !(self.parse_keyword(Keyword::WHEN)) { + break; + } + let when_token = self.get_current_token().clone(); + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword_is(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword_is(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + } + + let update_token = self.get_current_token().clone(); + self.expect_keyword_is(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let update_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let delete_predicate = if self.parse_keyword(Keyword::DELETE) { + let _ = self.expect_keyword(Keyword::WHERE)?; + Some(self.parse_expr()?) + } else { + None + }; + MergeAction::Update(MergeUpdateExpr { + update_token: update_token.into(), + assignments, + update_predicate, + delete_predicate, + }) + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("DELETE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let delete_token = self.get_current_token().clone(); + MergeAction::Delete { + delete_token: delete_token.into(), + } + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("INSERT is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let insert_token = self.get_current_token().clone(); + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_merge_clause_insert_columns(is_mysql)?; + let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + (MergeInsertKind::Row, self.get_current_token().clone()) + } else { + self.expect_keyword_is(Keyword::VALUES)?; + let values_token = self.get_current_token().clone(); + let values = self.parse_values(is_mysql, false)?; + (MergeInsertKind::Values(values), values_token) + }; + let insert_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + MergeAction::Insert(MergeInsertExpr { + insert_token: insert_token.into(), + columns, + kind_token: kind_token.into(), + kind, + insert_predicate, + }) + } + _ => { + return parser_err!( + "expected UPDATE, DELETE or INSERT in merge clause", + self.peek_token_ref().span.start + ); + } + }; + clauses.push(MergeClause { + when_token: when_token.into(), + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } + + fn parse_merge_clause_insert_columns( + &mut self, + allow_empty: bool, + ) -> Result, ParserError> { + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) + } + + fn parse_output( + &mut self, + start_keyword: Keyword, + start_token: TokenWithSpan, + ) -> Result { + let select_items = self.parse_projection()?; + let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { + self.expect_keyword_is(Keyword::INTO)?; + Some(self.parse_select_into()?) + } else { + None + }; + + Ok(if start_keyword == Keyword::OUTPUT { + OutputClause::Output { + output_token: start_token.into(), + select_items, + into_table, + } + } else { + OutputClause::Returning { + returning_token: start_token.into(), + select_items, + } + }) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b2fa3b16..d1c4fe05 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -32,21 +32,22 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::{ - key_value_options::{ - KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, - }, - stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, -}; use crate::ast::Statement::CreatePolicy; use crate::ast::*; +use crate::ast::{ + comments, + helpers::{ + key_value_options::{ + KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, + }, + stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, + }, +}; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; use sqlparser::parser::ParserState::ColumnDefinition; -mod alter; - #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { TokenizerError(String), @@ -61,6 +62,9 @@ macro_rules! parser_err { }; } +mod alter; +mod merge; + #[cfg(feature = "std")] /// Implementation [`RecursionCounter`] if std is available mod recursion { @@ -529,6 +533,44 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses the given `sql` into an Abstract Syntax Tree (AST), returning + /// also encountered source code comments. + /// + /// See [Parser::parse_sql]. + pub fn parse_sql_with_comments( + dialect: &'a dyn Dialect, + sql: &str, + ) -> Result<(Vec, comments::Comments), ParserError> { + let mut p = Parser::new(dialect).try_with_sql(sql)?; + p.parse_statements().map(|stmts| (stmts, p.into_comments())) + } + + /// Consumes this parser returning comments from the parsed token stream. + fn into_comments(self) -> comments::Comments { + let mut comments = comments::Comments::default(); + for t in self.tokens.into_iter() { + match t.token { + Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::SingleLine { + content: comment, + prefix, + }, + span: t.span, + }); + } + Token::Whitespace(Whitespace::MultiLineComment(comment)) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::MultiLine(comment), + span: t.span, + }); + } + _ => {} + } + } + comments + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { @@ -1194,7 +1236,11 @@ impl<'a> Parser<'a> { let mut id_parts: Vec = vec![match t { Token::Word(w) => w.into_ident(next_token.span), Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => unreachable!(), // We matched above + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected token type".to_string(), + )) + } }]; while self.consume_token(&Token::Period) { @@ -1222,6 +1268,15 @@ impl<'a> Parser<'a> { Token::Mul => { return Ok(Expr::Wildcard(AttachedToken(next_token))); } + // Handle parenthesized wildcard: (*) + Token::LParen => { + let [maybe_mul, maybe_rparen] = self.peek_tokens_ref(); + if maybe_mul.token == Token::Mul && maybe_rparen.token == Token::RParen { + let mul_token = self.next_token(); // consume Mul + self.next_token(); // consume RParen + return Ok(Expr::Wildcard(AttachedToken(mul_token))); + } + } _ => (), }; @@ -1641,7 +1696,11 @@ impl<'a> Parser<'a> { Token::PGSquareRoot => UnaryOperator::PGSquareRoot, Token::PGCubeRoot => UnaryOperator::PGCubeRoot, Token::AtSign => UnaryOperator::PGAbs, - _ => unreachable!(), + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected unary operator token".to_string(), + )) + } }; Ok(Expr::UnaryOp { op, @@ -1704,23 +1763,29 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Some(lambda) = self.try_parse_lambda()? { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = + if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda()? { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => return Err(ParserError::ParserError( + "Internal parser error: parse_comma_separated returned empty list" + .to_string(), + )), + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; Ok(expr) } @@ -2716,6 +2781,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -3591,7 +3658,9 @@ impl<'a> Parser<'a> { right: Box::new(right), is_some: keyword == Keyword::SOME, }, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{ALL, ANY, SOME}}, got {unexpected_keyword:?}"), + )), }) } else { Ok(Expr::BinaryOp { @@ -5590,13 +5659,14 @@ impl<'a> Parser<'a> { } else { None }; - let option = self - .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) - .map(|keyword| match keyword { - Keyword::CASCADE => ReferentialAction::Cascade, - Keyword::RESTRICT => ReferentialAction::Restrict, - _ => unreachable!(), - }); + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{CASCADE, RESTRICT}}, got {unexpected_keyword:?}"), + )), + None => None, + }; Ok(Statement::DropTrigger(DropTrigger { if_exists, trigger_name, @@ -5646,7 +5716,9 @@ impl<'a> Parser<'a> { match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { Keyword::ROW => TriggerObject::Row, Keyword::STATEMENT => TriggerObject::Statement, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in ROW/STATEMENT"), + )), }; Some(if include_each { @@ -5709,7 +5781,9 @@ impl<'a> Parser<'a> { Keyword::INSTEAD => self .expect_keyword_is(Keyword::OF) .map(|_| TriggerPeriod::InsteadOf)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger period"), + )), }, ) } @@ -5733,7 +5807,9 @@ impl<'a> Parser<'a> { } Keyword::DELETE => TriggerEvent::Delete, Keyword::TRUNCATE => TriggerEvent::Truncate, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger event"), + )), }, ) } @@ -5767,7 +5843,9 @@ impl<'a> Parser<'a> { { Keyword::FUNCTION => TriggerExecBodyType::Function, Keyword::PROCEDURE => TriggerExecBodyType::Procedure, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger exec body"), + )), }, func_desc: self.parse_function_desc()?, }) @@ -6284,7 +6362,9 @@ impl<'a> Parser<'a> { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, Some(Keyword::SESSION_USER) => Owner::SessionUser, - Some(_) => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in owner"), + )), None => { match self.parse_identifier() { Ok(ident) => Owner::Ident(ident), @@ -6346,7 +6426,9 @@ impl<'a> Parser<'a> { Some(match keyword { Keyword::PERMISSIVE => CreatePolicyType::Permissive, Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy type"), + )), }) } else { None @@ -6366,7 +6448,9 @@ impl<'a> Parser<'a> { Keyword::INSERT => CreatePolicyCommand::Insert, Keyword::UPDATE => CreatePolicyCommand::Update, Keyword::DELETE => CreatePolicyCommand::Delete, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy command"), + )), }) } else { None @@ -6479,12 +6563,7 @@ impl<'a> Parser<'a> { let mut is_procedure = false; let mut left_arg: Option = None; let mut right_arg: Option = None; - let mut commutator: Option = None; - let mut negator: Option = None; - let mut restrict: Option = None; - let mut join: Option = None; - let mut hashes = false; - let mut merges = false; + let mut options: Vec = Vec::new(); loop { let keyword = self.expect_one_of_keywords(&[ @@ -6501,11 +6580,11 @@ impl<'a> Parser<'a> { ])?; match keyword { - Keyword::HASHES if !hashes => { - hashes = true; + Keyword::HASHES if !options.iter().any(|o| matches!(o, OperatorOption::Hashes)) => { + options.push(OperatorOption::Hashes); } - Keyword::MERGES if !merges => { - merges = true; + Keyword::MERGES if !options.iter().any(|o| matches!(o, OperatorOption::Merges)) => { + options.push(OperatorOption::Merges); } Keyword::FUNCTION | Keyword::PROCEDURE if function.is_none() => { self.expect_token(&Token::Eq)?; @@ -6520,33 +6599,49 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Eq)?; right_arg = Some(self.parse_data_type()?); } - Keyword::COMMUTATOR if commutator.is_none() => { + Keyword::COMMUTATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Commutator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - commutator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Commutator(op)); } else { - commutator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Commutator(self.parse_operator_name()?)); } } - Keyword::NEGATOR if negator.is_none() => { + Keyword::NEGATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Negator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - negator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Negator(op)); } else { - negator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Negator(self.parse_operator_name()?)); } } - Keyword::RESTRICT if restrict.is_none() => { + Keyword::RESTRICT + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Restrict(_))) => + { self.expect_token(&Token::Eq)?; - restrict = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Restrict(Some( + self.parse_object_name(false)?, + ))); } - Keyword::JOIN if join.is_none() => { + Keyword::JOIN if !options.iter().any(|o| matches!(o, OperatorOption::Join(_))) => { self.expect_token(&Token::Eq)?; - join = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Join(Some(self.parse_object_name(false)?))); } _ => { return Err(ParserError::ParserError(format!( @@ -6575,12 +6670,7 @@ impl<'a> Parser<'a> { is_procedure, left_arg, right_arg, - commutator, - negator, - restrict, - join, - hashes, - merges, + options, })) } @@ -6620,7 +6710,7 @@ impl<'a> Parser<'a> { let mut items = vec![]; loop { if self.parse_keyword(Keyword::OPERATOR) { - let strategy_number = self.parse_literal_uint()? as u32; + let strategy_number = self.parse_literal_uint()?; let operator_name = self.parse_operator_name()?; // Optional operator argument types @@ -6655,7 +6745,7 @@ impl<'a> Parser<'a> { purpose, }); } else if self.parse_keyword(Keyword::FUNCTION) { - let support_number = self.parse_literal_uint()? as u32; + let support_number = self.parse_literal_uint()?; // Optional operator types let op_types = @@ -6997,7 +7087,9 @@ impl<'a> Parser<'a> { match keyword { Keyword::WITH => Some(true), Keyword::WITHOUT => Some(false), - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in cursor hold"), + )), } } None => None, @@ -9764,7 +9856,9 @@ impl<'a> Parser<'a> { Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{PART, PARTITION}}, got {unexpected_keyword:?}"), + )), } } @@ -9780,6 +9874,7 @@ impl<'a> Parser<'a> { Keyword::ICEBERG, Keyword::SCHEMA, Keyword::USER, + Keyword::OPERATOR, ])?; match object_type { Keyword::SCHEMA => { @@ -9812,12 +9907,21 @@ impl<'a> Parser<'a> { operation, }) } + Keyword::OPERATOR => { + if self.parse_keyword(Keyword::FAMILY) { + self.parse_alter_operator_family() + } else { + self.parse_alter_operator() + } + } Keyword::ROLE => self.parse_alter_role(), Keyword::POLICY => self.parse_alter_policy(), Keyword::CONNECTOR => self.parse_alter_connector(), Keyword::USER => self.parse_alter_user(), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{VIEW, TYPE, TABLE, INDEX, ROLE, POLICY, CONNECTOR, ICEBERG, SCHEMA, USER, OPERATOR}}, got {unexpected_keyword:?}"), + )), } } @@ -9931,6 +10035,280 @@ impl<'a> Parser<'a> { } } + /// Parse a [Statement::AlterOperator] + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-alteroperator.html) + pub fn parse_alter_operator(&mut self) -> Result { + let name = self.parse_operator_name()?; + + // Parse (left_type, right_type) + self.expect_token(&Token::LParen)?; + + let left_type = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_data_type()?) + }; + + self.expect_token(&Token::Comma)?; + let right_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + // Parse the operation + let operation = if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = if self.parse_keyword(Keyword::CURRENT_ROLE) { + Owner::CurrentRole + } else if self.parse_keyword(Keyword::CURRENT_USER) { + Owner::CurrentUser + } else if self.parse_keyword(Keyword::SESSION_USER) { + Owner::SessionUser + } else { + Owner::Ident(self.parse_identifier()?) + }; + AlterOperatorOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorOperation::SetSchema { schema_name } + } else if self.parse_keyword(Keyword::SET) { + self.expect_token(&Token::LParen)?; + + let mut options = Vec::new(); + loop { + let keyword = self.expect_one_of_keywords(&[ + Keyword::RESTRICT, + Keyword::JOIN, + Keyword::COMMUTATOR, + Keyword::NEGATOR, + Keyword::HASHES, + Keyword::MERGES, + ])?; + + match keyword { + Keyword::RESTRICT => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Restrict(proc_name)); + } + Keyword::JOIN => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Join(proc_name)); + } + Keyword::COMMUTATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Commutator(op_name)); + } + Keyword::NEGATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Negator(op_name)); + } + Keyword::HASHES => { + options.push(OperatorOption::Hashes); + } + Keyword::MERGES => { + options.push(OperatorOption::Merges); + } + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in operator option"), + )), + } + + if !self.consume_token(&Token::Comma) { + break; + } + } + + self.expect_token(&Token::RParen)?; + AlterOperatorOperation::Set { options } + } else { + return self.expected_ref( + "OWNER TO, SET SCHEMA, or SET after ALTER OPERATOR", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperator(AlterOperator { + name, + left_type, + right_type, + operation, + })) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_operator(&mut self) -> Result { + let strategy_number = self.parse_literal_uint()?; + let operator_name = self.parse_operator_name()?; + + // Operator argument types (required for ALTER OPERATOR FAMILY) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + // Optional purpose + let purpose = if self.parse_keyword(Keyword::FOR) { + if self.parse_keyword(Keyword::SEARCH) { + Some(OperatorPurpose::ForSearch) + } else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let sort_family = self.parse_object_name(false)?; + Some(OperatorPurpose::ForOrderBy { sort_family }) + } else { + return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + } + } else { + None + }; + + Ok(OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_function(&mut self) -> Result { + let support_number = self.parse_literal_uint()?; + + // Optional operator types + let op_types = if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; + + let function_name = self.parse_object_name(false)?; + + // Function argument types + let argument_types = if self.consume_token(&Token::LParen) { + if self.peek_token() == Token::RParen { + self.expect_token(&Token::RParen)?; + vec![] + } else { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + types + } + } else { + vec![] + }; + + Ok(OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + }) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_operator( + &mut self, + ) -> Result { + let strategy_number = self.parse_literal_uint()?; + + // Operator argument types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_function( + &mut self, + ) -> Result { + let support_number = self.parse_literal_uint()?; + + // Operator types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Function { + support_number, + op_types, + }) + } + + /// Parse an operator family item for ADD operations (dispatches to operator or function parsing) + fn parse_operator_family_add_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_add_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_add_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse an operator family item for DROP operations (dispatches to operator or function parsing) + fn parse_operator_family_drop_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_drop_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_drop_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse a [Statement::AlterOperatorFamily] + /// See + pub fn parse_alter_operator_family(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keyword(Keyword::ADD) { + let items = self.parse_comma_separated(Parser::parse_operator_family_add_item)?; + AlterOperatorFamilyOperation::Add { items } + } else if self.parse_keyword(Keyword::DROP) { + let items = self.parse_comma_separated(Parser::parse_operator_family_drop_item)?; + AlterOperatorFamilyOperation::Drop { items } + } else if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorFamilyOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "ADD, DROP, RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR FAMILY", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperatorFamily(AlterOperatorFamily { + name, + using, + operation, + })) + } + // Parse a [Statement::AlterSchema] // ALTER SCHEMA [ IF EXISTS ] schema_name pub fn parse_alter_schema(&mut self) -> Result { @@ -10502,6 +10880,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(v) => { + ok_value(Value::QuoteDelimitedStringLiteral(v)) + } + Token::NationalQuoteDelimitedStringLiteral(v) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(v)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } @@ -11349,16 +11733,17 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - // By default, if a word is located after the `AS` keyword we consider it an alias - // as long as it's not reserved. + // Accepts a keyword as an alias if the AS keyword explicitly indicate an alias or if the + // caller provided a list of reserved keywords and the keyword is not on that list. Token::Word(w) - if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) => + if reserved_kwds.is_some() + && (after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword))) => { Ok(Some(w.into_ident(next_token.span))) } - // This pattern allows for customizing the acceptance of words as aliases based on the caller's - // context, such as to what SQL element this word is a potential alias of (select item alias, table name - // alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords. + // Accepts a keyword as alias based on the caller's context, such as to what SQL element + // this word is a potential alias of using the validator call-back. This allows for + // dialect-specific logic. Token::Word(w) if validator(after_as, &w.keyword, self) => { Ok(Some(w.into_ident(next_token.span))) } @@ -11729,7 +12114,7 @@ impl<'a> Parser<'a> { token => { return Err(ParserError::ParserError(format!( "Unexpected token in identifier: {token}" - )))? + )))?; } } } @@ -12233,16 +12618,6 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?))) } - /// Parse a MERGE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_merge_setexpr_boxed( - &mut self, - merge_token: TokenWithSpan, - ) -> Result, ParserError> { - Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) - } - pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -14175,7 +14550,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } return Ok(table); @@ -14433,7 +14810,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } @@ -15532,7 +15911,9 @@ impl<'a> Parser<'a> { } } Some(Keyword::TABLE) | None => Some(GrantObjects::Tables(objects?)), - _ => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in grant objects"), + )), } } } else { @@ -16402,7 +16783,9 @@ impl<'a> Parser<'a> { let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { Keyword::MIN => HavingBoundKind::Min, Keyword::MAX => HavingBoundKind::Max, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in having bound"), + )), }; clauses.push(FunctionArgumentClause::Having(HavingBound( kind, @@ -16739,10 +17122,10 @@ impl<'a> Parser<'a> { fn parse_order_by_expr_inner( &mut self, with_operator_class: bool, - ) -> Result<(OrderByExpr, Option), ParserError> { + ) -> Result<(OrderByExpr, Option), ParserError> { let expr = self.parse_expr()?; - let operator_class: Option = if with_operator_class { + let operator_class: Option = if with_operator_class { // We check that if non of the following keywords are present, then we parse an // identifier as operator class. if self @@ -16751,7 +17134,7 @@ impl<'a> Parser<'a> { { None } else { - self.maybe_parse(|parser| parser.parse_identifier())? + self.maybe_parse(|parser| parser.parse_object_name(false))? } } else { None @@ -16930,7 +17313,9 @@ impl<'a> Parser<'a> { let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { Keyword::UPDATE => LockType::Update, Keyword::SHARE => LockType::Share, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{UPDATE, SHARE}}, got {unexpected_keyword:?}"), + )), }; let of = if self.parse_keyword(Keyword::OF) { Some(self.parse_object_name(false)?) @@ -17214,7 +17599,11 @@ impl<'a> Parser<'a> { { None } else { + let has_parentheses = self.consume_token(&Token::LParen); let name = self.parse_object_name(false)?; + if has_parentheses { + self.expect_token(&Token::RParen)?; + } Some(name) }; @@ -17313,153 +17702,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if !(self.parse_keyword(Keyword::WHEN)) { - break; - } - let when_token = self.get_current_token().clone(); - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword_is(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword_is(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - } - - let update_token = self.get_current_token().clone(); - self.expect_keyword_is(Keyword::SET)?; - MergeAction::Update { - update_token: update_token.into(), - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("DELETE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let delete_token = self.get_current_token().clone(); - MergeAction::Delete { - delete_token: delete_token.into(), - } - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("INSERT is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let insert_token = self.get_current_token().clone(); - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - (MergeInsertKind::Row, self.get_current_token().clone()) - } else { - self.expect_keyword_is(Keyword::VALUES)?; - let values_token = self.get_current_token().clone(); - let values = self.parse_values(is_mysql, false)?; - (MergeInsertKind::Values(values), values_token) - }; - MergeAction::Insert(MergeInsertExpr { - insert_token: insert_token.into(), - columns, - kind_token: kind_token.into(), - kind, - }) - } - _ => { - return parser_err!( - "expected UPDATE, DELETE or INSERT in merge clause", - self.peek_token_ref().span.start - ); - } - }; - clauses.push(MergeClause { - when_token: when_token.into(), - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - - fn parse_output( - &mut self, - start_keyword: Keyword, - start_token: TokenWithSpan, - ) -> Result { - let select_items = self.parse_projection()?; - let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { - self.expect_keyword_is(Keyword::INTO)?; - Some(self.parse_select_into()?) - } else { - None - }; - - Ok(if start_keyword == Keyword::OUTPUT { - OutputClause::Output { - output_token: start_token.into(), - select_items, - into_table, - } - } else { - OutputClause::Returning { - returning_token: start_token.into(), - select_items, - } - }) - } - fn parse_select_into(&mut self) -> Result { let temporary = self .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) @@ -17476,32 +17718,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword_is(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword_is(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { - Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), - None => None, - }; - - Ok(Statement::Merge { - merge_token: merge_token.into(), - into, - table, - source, - on: Box::new(on), - clauses, - output, - }) - } - fn parse_pragma_value(&mut self) -> Result { match self.parse_value()?.value { v @ Value::SingleQuotedString(_) => Ok(v), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1..8666563a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -46,7 +46,10 @@ use crate::dialect::{ SnowflakeDialect, }; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; -use crate::{ast::DollarQuotedString, dialect::HiveDialect}; +use crate::{ + ast::{DollarQuotedString, QuoteDelimitedString}, + dialect::HiveDialect, +}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -98,6 +101,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +301,8 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f), + Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1043,18 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') + if self.dialect.supports_quote_delimited_string() => + { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[n, q]) + .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(String::from_iter([n, q]), chars); + Ok(Some(Token::make_word(&s, None))) + } + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1062,16 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[q]) + .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(q, chars); + Ok(Some(Token::make_word(&s, None))) + } + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1684,7 +1717,7 @@ impl<'a> Tokenizer<'a> { } } Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp), - _ => self.consume_and_return(chars, Token::Question), + _ => Ok(Some(Token::Question)), } } '?' => { @@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string expecting `chars.next()` to deliver a quote. + /// + /// See + fn tokenize_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + literal_prefix: &[char], + ) -> Result { + let literal_start_loc = chars.location(); + chars.next(); + + let start_quote_loc = chars.location(); + let (start_quote, end_quote) = match chars.next() { + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + start_quote_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''", + String::from_iter(literal_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + + // read the string literal until the "quote character" following a by literal quote + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(QuoteDelimitedString { + start_quote, + value, + end_quote, + }); + } + } + value.push(ch); + } + + self.tokenizer_error(literal_start_loc, "Unterminated string literal") + } + /// Read a quoted string. fn tokenize_quoted_string( &self, @@ -4059,4 +4147,23 @@ mod tests { panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); } } + + #[test] + fn tokenize_question_mark() { + let dialect = PostgreSqlDialect {}; + let sql = "SELECT x ? y"; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + compare( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("x", None), + Token::Whitespace(Whitespace::Space), + Token::Question, + Token::Whitespace(Whitespace::Space), + Token::make_word("y", None), + ], + ) + } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f2b9f2af..24b9efca 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1806,15 +1806,16 @@ fn parse_merge() { ); let insert_action = MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity")], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into()], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, explicit_row: false, rows: vec![vec![Expr::value(number("1")), Expr::value(number("2"))]], }), + insert_predicate: None, }); - let update_action = MergeAction::Update { + let update_action = MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -1826,17 +1827,19 @@ fn parse_merge() { value: Expr::value(number("2")), }, ], - }; + update_predicate: None, + delete_predicate: None, + }); match bigquery_and_generic().verified_stmt(sql) { - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - } => { + }) => { assert!(!into); assert_eq!( TableFactor::Table { @@ -1917,9 +1920,13 @@ fn parse_merge() { predicate: Some(Expr::value(number("1"))), action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1928,9 +1935,13 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1941,7 +1952,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1952,7 +1964,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1975,7 +1988,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("a"), Ident::new("b"),], + columns: vec![Ident::new("a").into(), Ident::new("b").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -1984,7 +1997,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, MergeClause { @@ -2002,7 +2016,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, ], diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs new file mode 100644 index 00000000..34442ca3 --- /dev/null +++ b/tests/sqlparser_comments.rs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test comment extraction from SQL source code. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::comments::{Comment, CommentWithSpan}, + dialect::GenericDialect, + parser::Parser, + tokenizer::Span, +}; + +#[test] +fn parse_sql_with_comments() { + let sql = r#" +-- second line comment +select * from /* inline comment after `from` */ dual; + +/*select +some +more*/ + + -- end-of-script-with-no-newline"#; + + let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) { + Ok((_, comments)) => comments, + Err(e) => panic!("Invalid sql script: {e}"), + }; + + assert_eq!( + Vec::from(comments), + vec![ + CommentWithSpan { + comment: Comment::SingleLine { + content: " second line comment\n".into(), + prefix: "--".into() + }, + span: Span::new((2, 1).into(), (3, 1).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine(" inline comment after `from` ".into()), + span: Span::new((3, 15).into(), (3, 48).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine("select\nsome\nmore".into()), + span: Span::new((5, 1).into(), (7, 7).into()) + }, + CommentWithSpan { + comment: Comment::SingleLine { + content: " end-of-script-with-no-newline".into(), + prefix: "--".into() + }, + span: Span::new((9, 3).into(), (9, 35).into()), + } + ] + ); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ccad67e3..9f549e4d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9793,22 +9793,22 @@ fn parse_merge() { let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; match (verified_stmt(sql), verified_stmt(sql_no_into)) { ( - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - }, - Statement::Merge { + }), + Statement::Merge(Merge { into: no_into, table: table_no_into, source: source_no_into, on: on_no_into, clauses: clauses_no_into, .. - }, + }), ) => { assert!(into); assert!(!no_into); @@ -9921,7 +9921,11 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("A"), Ident::new("B"), Ident::new("C")], + columns: vec![ + Ident::new("A").into(), + Ident::new("B").into(), + Ident::new("C").into() + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -9941,6 +9945,7 @@ fn parse_merge() { ]), ]] }), + insert_predicate: None, }), }, MergeClause { @@ -9956,7 +9961,7 @@ fn parse_merge() { (Value::SingleQuotedString("a".to_string())).with_empty_span() )), }), - action: MergeAction::Update { + action: MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -9980,7 +9985,9 @@ fn parse_merge() { ]), }, ], - }, + update_predicate: None, + delete_predicate: None, + }), }, MergeClause { when_token: AttachedToken::empty(), @@ -9999,6 +10006,45 @@ fn parse_merge() { let sql = "MERGE INTO s.bar AS dest USING newArrivals AS S ON (1 > 1) WHEN NOT MATCHED THEN INSERT VALUES (stg.A, stg.B, stg.C)"; verified_stmt(sql); + + // MERGE with predicates + let sql = "\ +MERGE INTO FOO \ +USING FOO_IMPORT \ +ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN MATCHED THEN \ +UPDATE SET FOO.NAME = FOO_IMPORT.NAME \ +WHERE 1 = 1 \ +DELETE WHERE FOO.NAME LIKE '%.DELETE' \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (FOO_IMPORT.ID, UPPER(FOO_IMPORT.NAME)) \ +WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; + all_dialects().verified_stmt(sql); + + // MERGE with simple insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with qualified insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with schema qualified insert columns + let sql = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); } #[test] @@ -12074,6 +12120,8 @@ fn parse_execute_stored_procedure() { } _ => unreachable!(), } + // Test optional parentheses around procedure name + ms_and_generic().one_statement_parses_to("EXEC ('name')", "EXECUTE 'name'"); } #[test] @@ -17905,3 +17953,22 @@ fn test_parse_set_session_authorization() { })) ); } + +#[test] +fn parse_select_parenthesized_wildcard() { + // Test SELECT DISTINCT(*) which uses a parenthesized wildcard + // The parentheses are syntactic sugar and get normalized to just * + let sql = "SELECT DISTINCT (*) FROM table1"; + let canonical = "SELECT DISTINCT * FROM table1"; + let select = all_dialects().verified_only_select_with_canonical(sql, canonical); + assert_eq!(select.distinct, Some(Distinct::Distinct)); + assert_eq!(select.projection.len(), 1); + assert!(matches!(select.projection[0], SelectItem::Wildcard(_))); + + // Also test without spaces: SELECT DISTINCT(*) + let sql_no_spaces = "SELECT DISTINCT(*) FROM table1"; + let select2 = all_dialects().verified_only_select_with_canonical(sql_no_spaces, canonical); + assert_eq!(select2.distinct, Some(Distinct::Distinct)); + assert_eq!(select2.projection.len(), 1); + assert!(matches!(select2.projection[0], SelectItem::Wildcard(_))); +} diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 386bab7f..1b094851 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -34,10 +34,12 @@ fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; let serdeproperties = r#"CREATE EXTERNAL TABLE IF NOT EXISTS db.table (a STRING, b STRING, c STRING) PARTITIONED BY (d STRING, e STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde.config' WITH SERDEPROPERTIES ('prop_a' = 'a', 'prop_b' = 'b') STORED AS TEXTFILE LOCATION 's3://...' TBLPROPERTIES ('prop_c' = 'c')"#; + let externaltable = r#"CREATE EXTERNAL TABLE t (c INT)"#; hive().verified_stmt(sql); hive().verified_stmt(iof); hive().verified_stmt(serdeproperties); + hive().verified_stmt(externaltable); } #[test] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 37e8e962..70e0aab4 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2501,8 +2501,45 @@ fn test_tsql_no_semicolon_delimiter() { DECLARE @X AS NVARCHAR(MAX)='x' DECLARE @Y AS NVARCHAR(MAX)='y' "#; - let stmts = tsql().parse_sql_statements(sql).unwrap(); assert_eq!(stmts.len(), 2); assert!(stmts.iter().all(|s| matches!(s, Statement::Declare { .. }))); + + let sql = r#" +SELECT col FROM tbl +IF x=1 + SELECT 1 +ELSE + SELECT 2 + "#; + let stmts = tsql().parse_sql_statements(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(matches!(&stmts[0], Statement::Query(_))); + assert!(matches!(&stmts[1], Statement::If(_))); +} + +#[test] +fn test_sql_keywords_as_table_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT * FROM tbl {explicit}{kw}")) + .is_err()); + } + } +} + +#[test] +fn test_sql_keywords_as_column_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT col {explicit}{kw} FROM tbl")) + .is_err()); + } + } } diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs new file mode 100644 index 00000000..68366036 --- /dev/null +++ b/tests/sqlparser_oracle.rs @@ -0,0 +1,335 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Test SQL syntax, specific to [sqlparser::dialect::OracleDialect]. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, + dialect::OracleDialect, + parser::ParserError, + tokenizer::Span, +}; +use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects}; + +mod test_utils; + +fn oracle() -> TestedDialects { + TestedDialects::new(vec![Box::new(OracleDialect)]) +} + +/// Convenience constructor for [QuoteDelimitedstring]. +fn quote_delimited_string( + start_quote: char, + value: &'static str, + end_quote: char, +) -> QuoteDelimitedString { + QuoteDelimitedString { + start_quote, + value: value.into(), + end_quote, + } +} + +/// Oracle: `||` has a lower precedence than `*` and `/` +#[test] +fn muldiv_have_higher_precedence_than_strconcat() { + // ............... A .. B ...... C .. D ........... + let sql = "SELECT 3 / 5 || 'asdf' || 7 * 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // (C || D) + &Expr::BinaryOp { + // (A || B) + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Divide, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString("asdf".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + // D + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("7").into())), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value(number("9").into())), + }), + } + ); +} + +/// Oracle: `+`, `-`, and `||` have the same precedence and parse from left-to-right +#[test] +fn plusminus_have_same_precedence_as_strconcat() { + // ................ A .. B .... C .. D ............ + let sql = "SELECT 3 + 5 || '.3' || 7 - 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // D + &Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + // B + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString(".3".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(number("7").into())), + }), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("9").into())) + } + ); +} + +#[test] +fn parse_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_invalid_quote_delimited_strings() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + // ~ invalid quote delimiter + for q in [' ', '\t', '\r', '\n'] { + assert_eq!( + dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with quote char {q:?}" + ); + } + // ~ invalid eof after quote + assert_eq!( + dialect.parse_sql_statements("SELECT Q'"), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with EOF quote char" + ); + // ~ unterminated string + assert_eq!( + dialect.parse_sql_statements("SELECT Q'|asdfa...."), + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 9".into() + )), + "with EOF quote char" + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "select q'!a'b'c!d!' from dual"; + let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = dialect.verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string( + '!', "a'b'c!d", '!' + )) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 96e04145..9f4564ef 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -23,15 +23,11 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::ast::{ - DataType, DropBehavior, DropOperator, DropOperatorClass, DropOperatorSignature, -}; -use sqlparser::tokenizer::Span; -use test_utils::*; - use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::parser::ParserError; +use sqlparser::tokenizer::Span; +use test_utils::*; #[test] fn parse_create_table_generated_always_as_identity() { @@ -2572,11 +2568,17 @@ fn parse_create_indices_with_operator_classes() { IndexType::SPGiST, IndexType::Custom("CustomIndexType".into()), ]; - let operator_classes: [Option; 4] = [ + let operator_classes: [Option; 4] = [ None, - Some("gin_trgm_ops".into()), - Some("gist_trgm_ops".into()), - Some("totally_not_valid".into()), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gin_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gist_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "totally_not_valid", + ))])), ]; for expected_index_type in indices { @@ -2713,6 +2715,36 @@ fn parse_create_indices_with_operator_classes() { } } +#[test] +fn parse_create_index_with_schema_qualified_operator_class() { + let sql = "CREATE INDEX my_index ON my_table USING HNSW (embedding public.vector_cosine_ops)"; + + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { columns, .. }) => { + assert_eq!(1, columns.len()); + let idx_col = &columns[0]; + + // Verify the column name + match &idx_col.column.expr { + Expr::Identifier(ident) => { + assert_eq!("embedding", ident.value); + } + _ => panic!("Expected identifier expression"), + } + + // Verify the schema-qualified operator class + assert_eq!( + Some(ObjectName(vec![ + ObjectNamePart::Identifier(Ident::new("public")), + ObjectNamePart::Identifier(Ident::new("vector_cosine_ops")), + ])), + idx_col.operator_class + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_bloom() { let sql = @@ -6715,24 +6747,26 @@ fn parse_create_operator() { length: 255, unit: None }))), - commutator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new(">") - ])), - negator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new("<=") - ])), - restrict: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("sel_func") - ])), - join: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("join_func") - ])), - hashes: true, - merges: true, + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new(">") + ])), + OperatorOption::Negator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new("<=") + ])), + OperatorOption::Restrict(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("sel_func") + ]))), + OperatorOption::Join(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("join_func") + ]))), + OperatorOption::Hashes, + OperatorOption::Merges, + ], }) ); @@ -6748,12 +6782,7 @@ fn parse_create_operator() { is_procedure: false, left_arg: None, right_arg: None, - commutator: None, - negator: None, - restrict: None, - join: None, - hashes: false, - merges: false, + options: vec![], }) ); } @@ -6778,13 +6807,9 @@ fn parse_create_operator() { ), ] { match pg().verified_stmt(&format!("CREATE OPERATOR {name} (FUNCTION = f)")) { - Statement::CreateOperator(CreateOperator { - name, - hashes: false, - merges: false, - .. - }) => { + Statement::CreateOperator(CreateOperator { name, options, .. }) => { assert_eq!(name, expected_name); + assert!(options.is_empty()); } _ => unreachable!(), } @@ -6920,6 +6945,592 @@ fn parse_drop_operator() { assert!(pg().parse_sql_statements(sql).is_err()); } +#[test] +fn parse_alter_operator() { + use sqlparser::ast::{AlterOperator, AlterOperatorOperation, OperatorOption, Owner}; + + // Test ALTER OPERATOR ... OWNER TO with different owner types + for (owner_sql, owner_ast) in [ + ("joe", Owner::Ident(Ident::new("joe"))), + ("CURRENT_USER", Owner::CurrentUser), + ("CURRENT_ROLE", Owner::CurrentRole), + ("SESSION_USER", Owner::SessionUser), + ] { + for (op_name, op_name_ast, left_type_sql, left_type_ast, right_type_sql, right_type_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "INTEGER", + Some(DataType::Integer(None)), + "INTEGER", + DataType::Integer(None), + ), + ( + "~", + ObjectName::from(vec![Ident::new("~")]), + "NONE", + None, + "BIT", + DataType::Bit(None), + ), + ( + "@@", + ObjectName::from(vec![Ident::new("@@")]), + "TEXT", + Some(DataType::Text), + "TEXT", + DataType::Text, + ), + ] { + let sql = format!( + "ALTER OPERATOR {} ({}, {}) OWNER TO {}", + op_name, left_type_sql, right_type_sql, owner_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast.clone(), + left_type: left_type_ast.clone(), + right_type: right_type_ast.clone(), + operation: AlterOperatorOperation::OwnerTo(owner_ast.clone()), + }) + ); + } + } + + // Test ALTER OPERATOR ... SET SCHEMA + for (op_name, op_name_ast, schema_name, schema_name_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "new_schema", + ObjectName::from(vec![Ident::new("new_schema")]), + ), + ( + "myschema.@@", + ObjectName::from(vec![Ident::new("myschema"), Ident::new("@@")]), + "other_schema", + ObjectName::from(vec![Ident::new("other_schema")]), + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (TEXT, TEXT) SET SCHEMA {}", + op_name, schema_name + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast, + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::SetSchema { + schema_name: schema_name_ast, + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with RESTRICT and JOIN + for (restrict_val, restrict_ast, join_val, join_ast) in [ + ( + "_int_contsel", + Some(ObjectName::from(vec![Ident::new("_int_contsel")])), + "_int_contjoinsel", + Some(ObjectName::from(vec![Ident::new("_int_contjoinsel")])), + ), + ( + "NONE", + None, + "my_joinsel", + Some(ObjectName::from(vec![Ident::new("my_joinsel")])), + ), + ( + "my_sel", + Some(ObjectName::from(vec![Ident::new("my_sel")])), + "NONE", + None, + ), + ] { + let sql = format!( + "ALTER OPERATOR && (TEXT, TEXT) SET (RESTRICT = {}, JOIN = {})", + restrict_val, join_val + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("&&")]), + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Restrict(restrict_ast), + OperatorOption::Join(join_ast), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with COMMUTATOR and NEGATOR + for (operator, commutator, negator) in [("&&", "&&", ">"), ("+", "+", "-"), ("<", "<", ">=")] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET (COMMUTATOR = {}, NEGATOR = {})", + operator, commutator, negator + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new(commutator)])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new(negator)])), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with HASHES and MERGES (individually and combined) + for (operator, options_sql, options_ast) in [ + ("=", "HASHES", vec![OperatorOption::Hashes]), + ("<", "MERGES", vec![OperatorOption::Merges]), + ( + "<=", + "HASHES, MERGES", + vec![OperatorOption::Hashes, OperatorOption::Merges], + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET ({})", + operator, options_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: options_ast + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with multiple options combined + let sql = + "ALTER OPERATOR + (INTEGER, INTEGER) SET (COMMUTATOR = +, NEGATOR = -, HASHES, MERGES)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("+")]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new("+")])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new("-")])), + OperatorOption::Hashes, + OperatorOption::Merges, + ], + }, + }) + ); +} + +#[test] +fn parse_alter_operator_family() { + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD OPERATOR 1 < (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("<")]), + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + purpose: None, + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR SEARCH + let sql = + "ALTER OPERATOR FAMILY text_ops USING btree ADD OPERATOR 1 @@ (TEXT, TEXT) FOR SEARCH"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("text_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("@@")]), + op_types: vec![DataType::Text, DataType::Text], + purpose: Some(OperatorPurpose::ForSearch), + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD FUNCTION 1 btint42cmp(INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Function { + support_number: 1, + op_types: None, + function_name: ObjectName::from(vec![Ident::new("btint42cmp")]), + argument_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP OPERATOR 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Operator { + strategy_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP FUNCTION 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Function { + support_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... RENAME TO + let sql = "ALTER OPERATOR FAMILY old_ops USING btree RENAME TO new_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("old_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("new_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... OWNER TO + let sql = "ALTER OPERATOR FAMILY my_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR FAMILY ... SET SCHEMA + let sql = "ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing operator name in ADD OPERATOR + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Missing parentheses in DROP OPERATOR + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 INT4, INT2") + .is_err()); + + // Invalid operator name (empty) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid operator name (special characters) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 @#$ (INT4, INT2)" + ) + .is_err()); + + // Negative strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR -1 < (INT4, INT2)" + ) + .is_err()); + + // Non-integer strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1.5 < (INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2" + ) + .is_err()); + + // Missing opening parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < INT4, INT2)" + ) + .is_err()); + + // Empty operator types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < ()") + .is_err()); + + // Invalid data type (using punctuation) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (@#$%, INT2)" + ) + .is_err()); + + // Incomplete FOR clause + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR" + ) + .is_err()); + + // Invalid FOR clause keyword + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR INVALID" + ) + .is_err()); + + // FOR ORDER BY without sort family + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid function name + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 123invalid(INT4, INT2)" + ) + .is_err()); + + // Negative support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION -1 func(INT4, INT2)" + ) + .is_err()); + + // Non-integer support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1.5 func(INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in function operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2 func()" + ) + .is_err()); + + // Missing closing parenthesis in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(INT4, INT2" + ) + .is_err()); + + // Invalid data type in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(@#$%, INT2)" + ) + .is_err()); + + // DROP OPERATOR with FOR clause (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2) FOR SEARCH" + ) + .is_err()); + + // DROP FUNCTION with function arguments (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 (INT4, INT2) func(INT4)" + ) + .is_err()); + + // Multiple ADD items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // Multiple DROP items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // RENAME TO with invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree RENAME TO 123invalid") + .is_err()); + + // OWNER TO with invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree OWNER TO 123invalid") + .is_err()); + + // SET SCHEMA with invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Schema-qualified operator family name with invalid schema + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY 123invalid.my_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Missing operator family name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY USING btree ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) EXTRA" + ) + .is_err()); + + // Incomplete statement + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD") + .is_err()); + + // Very long numbers + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 999999999999999999999 < (INT4, INT2)") + .is_err()); + + // Multiple FOR clauses + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH FOR ORDER BY sort_family") + .is_err()); + + // FOR SEARCH with extra tokens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH EXTRA") + .is_err()); + + // FOR ORDER BY with invalid sort family + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY 123invalid") + .is_err()); + + // Function with empty operator types but missing function args parens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 () func") + .is_err()); + + // Function with mismatched parentheses + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4 func(INT2" + ) + .is_err()); + + // DROP with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 ()") + .is_err()); + + // DROP FUNCTION with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 ()") + .is_err()); +} + #[test] fn parse_drop_operator_family() { for if_exists in [true, false] { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 22a63266..37e9f8cb 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4635,3 +4635,12 @@ fn test_alter_dynamic_table() { snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table SUSPEND"); snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table RESUME"); } + +#[test] +fn test_alter_external_table() { + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH 'year=2025/month=12/'"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH"); + snowflake() + .verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH 'year=2025/month=12/'"); +}