From 9b8a2d1e226a024758a4dbbaaf47fafe67a9619d Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 16 Dec 2025 12:30:30 +0100 Subject: [PATCH 1/5] Extract source comments (#2107) Co-authored-by: Ifeanyi Ubah --- src/ast/comments.rs | 329 ++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 1 + src/ast/spans.rs | 8 +- src/parser/mod.rs | 53 +++++- tests/sqlparser_comments.rs | 75 ++++++++ 5 files changed, 459 insertions(+), 7 deletions(-) create mode 100644 src/ast/comments.rs create mode 100644 tests/sqlparser_comments.rs diff --git a/src/ast/comments.rs b/src/ast/comments.rs new file mode 100644 index 00000000..1f5b3102 --- /dev/null +++ b/src/ast/comments.rs @@ -0,0 +1,329 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Provides a representation of source code comments in parsed SQL code. +//! +//! See [Comments::find] for an example. + +#[cfg(not(feature = "std"))] +use alloc::{string::String, vec::Vec}; + +use core::{ + ops::{Bound, Deref, RangeBounds}, + slice, +}; + +use crate::tokenizer::{Location, Span}; + +/// An opaque container for comments from a parse SQL source code. +#[derive(Default, Debug)] +pub struct Comments(Vec); + +impl Comments { + /// Accepts `comment` if its the first or is located strictly after the + /// last accepted comment. In other words, this method will skip the + /// comment if its comming out of order (as encountered in the parsed + /// source code.) + pub(crate) fn offer(&mut self, comment: CommentWithSpan) { + if self + .0 + .last() + .map(|last| last.span < comment.span) + .unwrap_or(true) + { + self.0.push(comment); + } + } + + /// Finds comments starting within the given location range. The order of + /// iterator reflects the order of the comments as encountered in the parsed + /// source code. + /// + /// # Example + /// ```rust + /// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location}; + /// + /// let sql = r#"/* + /// header comment ... + /// ... spanning multiple lines + /// */ + /// + /// -- first statement + /// SELECT 'hello' /* world */ FROM DUAL; + /// + /// -- second statement + /// SELECT 123 FROM DUAL; + /// + /// -- trailing comment + /// "#; + /// + /// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap(); + /// + /// // all comments appearing before line seven, i.e. before the first statement itself + /// assert_eq!( + /// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::>(), + /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]); + /// + /// // all comments appearing within the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::>(), + /// &[" world "]); + /// + /// // all comments appearing within or after the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::>(), + /// &[" world ", " second statement\n", " trailing comment\n"]); + /// ``` + /// + /// The [Spanned](crate::ast::Spanned) trait allows you to access location + /// information for certain AST nodes. + pub fn find>(&self, range: R) -> Iter<'_> { + let (start, end) = ( + self.start_index(range.start_bound()), + self.end_index(range.end_bound()), + ); + debug_assert!((0..=self.0.len()).contains(&start)); + debug_assert!((0..=self.0.len()).contains(&end)); + // in case the user specified a reverse range + Iter(if start <= end { + self.0[start..end].iter() + } else { + self.0[0..0].iter() + }) + } + + /// Find the index of the first comment starting "before" the given location. + /// + /// The returned index is _inclusive_ and within the range of `0..=self.0.len()`. + fn start_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Unbounded => 0, + } + } + + /// Find the index of the first comment starting "after" the given location. + /// + /// The returned index is _exclusive_ and within the range of `0..=self.0.len()`. + fn end_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Unbounded => self.0.len(), + } + } +} + +impl From for Vec { + fn from(comments: Comments) -> Self { + comments.0 + } +} + +/// A source code comment with information of its entire span. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CommentWithSpan { + /// The source code comment iself + pub comment: Comment, + /// The span of the comment including its markers + pub span: Span, +} + +impl Deref for CommentWithSpan { + type Target = Comment; + + fn deref(&self) -> &Self::Target { + &self.comment + } +} + +/// A unified type of the different source code comment formats. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Comment { + /// A single line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + SingleLine { content: String, prefix: String }, + + /// A multi-line comment, typically enclosed in `/* .. */` markers. The + /// string represents the content excluding the markers. + MultiLine(String), +} + +impl Comment { + /// Retrieves the content of the comment as string slice. + pub fn as_str(&self) -> &str { + match self { + Comment::SingleLine { content, prefix: _ } => content.as_str(), + Comment::MultiLine(content) => content.as_str(), + } + } +} + +impl Deref for Comment { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +/// An opaque iterator implementation over comments served by [Comments::find]. +pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>); + +impl<'a> Iterator for Iter<'a> { + type Item = &'a CommentWithSpan; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find() { + let comments = { + // ``` + // -- abc + // /* hello */--, world + // /* def + // ghi + // jkl + // */ + // ``` + let mut c = Comments(Vec::new()); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: " abc".into(), + prefix: "--".into(), + }, + span: Span::new((1, 1).into(), (1, 7).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" hello ".into()), + span: Span::new((2, 3).into(), (2, 14).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: ", world".into(), + prefix: "--".into(), + }, + span: Span::new((2, 14).into(), (2, 21).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()), + span: Span::new((3, 3).into(), (7, 1).into()), + }); + c + }; + + fn find>(comments: &Comments, range: R) -> Vec<&str> { + comments.find(range).map(|c| c.as_str()).collect::>() + } + + // ~ end-points only -------------------------------------------------- + assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new()); + assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]); + assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ start-points only ------------------------------------------------ + assert_eq!( + find(&comments, Location::new(1000, 1000)..), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(2, 14)..), + vec![", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(2, 15)..), + vec![" def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(0, 0)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + + // ~ ranges ----------------------------------------------------------- + assert_eq!( + find(&comments, Location::new(2, 1)..Location::new(1, 1)), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 3)), + vec![" abc"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 10)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 14)), + vec![" abc", " hello ", ", world"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ find everything -------------------------------------------------- + assert_eq!( + find(&comments, ..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6cb4c336..23cde478 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -136,6 +136,7 @@ mod query; mod spans; pub use spans::Spanned; +pub mod comments; mod trigger; mod value; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index de8fba75..2ec797db 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -28,7 +28,7 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, + comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget, AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, @@ -2477,6 +2477,12 @@ impl Spanned for OutputClause { } } +impl Spanned for comments::CommentWithSpan { + fn span(&self) -> Span { + self.span + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 54fb3273..2b82d009 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -32,14 +32,17 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::{ - key_value_options::{ - KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, - }, - stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, -}; use crate::ast::Statement::CreatePolicy; use crate::ast::*; +use crate::ast::{ + comments, + helpers::{ + key_value_options::{ + KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, + }, + stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, + }, +}; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; @@ -530,6 +533,44 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses the given `sql` into an Abstract Syntax Tree (AST), returning + /// also encountered source code comments. + /// + /// See [Parser::parse_sql]. + pub fn parse_sql_with_comments( + dialect: &'a dyn Dialect, + sql: &str, + ) -> Result<(Vec, comments::Comments), ParserError> { + let mut p = Parser::new(dialect).try_with_sql(sql)?; + p.parse_statements().map(|stmts| (stmts, p.into_comments())) + } + + /// Consumes this parser returning comments from the parsed token stream. + fn into_comments(self) -> comments::Comments { + let mut comments = comments::Comments::default(); + for t in self.tokens.into_iter() { + match t.token { + Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::SingleLine { + content: comment, + prefix, + }, + span: t.span, + }); + } + Token::Whitespace(Whitespace::MultiLineComment(comment)) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::MultiLine(comment), + span: t.span, + }); + } + _ => {} + } + } + comments + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs new file mode 100644 index 00000000..34442ca3 --- /dev/null +++ b/tests/sqlparser_comments.rs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test comment extraction from SQL source code. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::comments::{Comment, CommentWithSpan}, + dialect::GenericDialect, + parser::Parser, + tokenizer::Span, +}; + +#[test] +fn parse_sql_with_comments() { + let sql = r#" +-- second line comment +select * from /* inline comment after `from` */ dual; + +/*select +some +more*/ + + -- end-of-script-with-no-newline"#; + + let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) { + Ok((_, comments)) => comments, + Err(e) => panic!("Invalid sql script: {e}"), + }; + + assert_eq!( + Vec::from(comments), + vec![ + CommentWithSpan { + comment: Comment::SingleLine { + content: " second line comment\n".into(), + prefix: "--".into() + }, + span: Span::new((2, 1).into(), (3, 1).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine(" inline comment after `from` ".into()), + span: Span::new((3, 15).into(), (3, 48).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine("select\nsome\nmore".into()), + span: Span::new((5, 1).into(), (7, 7).into()) + }, + CommentWithSpan { + comment: Comment::SingleLine { + content: " end-of-script-with-no-newline".into(), + prefix: "--".into() + }, + span: Span::new((9, 3).into(), (9, 35).into()), + } + ] + ); +} From cdeed32294609e31779c13b48bd19e505bf65ea8 Mon Sep 17 00:00:00 2001 From: Denis Goncharenko Date: Tue, 16 Dec 2025 13:07:11 +0100 Subject: [PATCH 2/5] PostgreSQL: Support schema-qualified operator classes in CREATE INDEX (#2131) --- src/ast/ddl.rs | 2 +- src/parser/mod.rs | 6 ++--- tests/sqlparser_postgres.rs | 44 +++++++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 8ccd533c..d0aed448 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -61,7 +61,7 @@ use crate::tokenizer::{Span, Token}; #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IndexColumn { pub column: OrderByExpr, - pub operator_class: Option, + pub operator_class: Option, } impl From for IndexColumn { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b82d009..3ba4ba57 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16933,10 +16933,10 @@ impl<'a> Parser<'a> { fn parse_order_by_expr_inner( &mut self, with_operator_class: bool, - ) -> Result<(OrderByExpr, Option), ParserError> { + ) -> Result<(OrderByExpr, Option), ParserError> { let expr = self.parse_expr()?; - let operator_class: Option = if with_operator_class { + let operator_class: Option = if with_operator_class { // We check that if non of the following keywords are present, then we parse an // identifier as operator class. if self @@ -16945,7 +16945,7 @@ impl<'a> Parser<'a> { { None } else { - self.maybe_parse(|parser| parser.parse_identifier())? + self.maybe_parse(|parser| parser.parse_object_name(false))? } } else { None diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 11512cf8..d595a0a2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2572,11 +2572,17 @@ fn parse_create_indices_with_operator_classes() { IndexType::SPGiST, IndexType::Custom("CustomIndexType".into()), ]; - let operator_classes: [Option; 4] = [ + let operator_classes: [Option; 4] = [ None, - Some("gin_trgm_ops".into()), - Some("gist_trgm_ops".into()), - Some("totally_not_valid".into()), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gin_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gist_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "totally_not_valid", + ))])), ]; for expected_index_type in indices { @@ -2713,6 +2719,36 @@ fn parse_create_indices_with_operator_classes() { } } +#[test] +fn parse_create_index_with_schema_qualified_operator_class() { + let sql = "CREATE INDEX my_index ON my_table USING HNSW (embedding public.vector_cosine_ops)"; + + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { columns, .. }) => { + assert_eq!(1, columns.len()); + let idx_col = &columns[0]; + + // Verify the column name + match &idx_col.column.expr { + Expr::Identifier(ident) => { + assert_eq!("embedding", ident.value); + } + _ => panic!("Expected identifier expression"), + } + + // Verify the schema-qualified operator class + assert_eq!( + Some(ObjectName(vec![ + ObjectNamePart::Identifier(Ident::new("public")), + ObjectNamePart::Identifier(Ident::new("vector_cosine_ops")), + ])), + idx_col.operator_class + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_bloom() { let sql = From f84887d0049105c7b84621d65b71e1ee640e18e9 Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 16 Dec 2025 19:04:11 +0100 Subject: [PATCH 3/5] Oracle: Support for quote delimited strings (#2130) --- src/ast/mod.rs | 2 +- src/ast/value.rs | 32 ++++++ src/dialect/generic.rs | 4 + src/dialect/mod.rs | 7 ++ src/dialect/oracle.rs | 4 + src/parser/merge.rs | 2 +- src/parser/mod.rs | 10 ++ src/tokenizer.rs | 92 ++++++++++++++- tests/sqlparser_oracle.rs | 234 +++++++++++++++++++++++++++++++++++++- 9 files changed, 381 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 23cde478..f1e79b0d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -110,7 +110,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - NormalizationForm, TrimWhereField, Value, ValueWithSpan, + NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan, }; use crate::ast::helpers::key_value_options::KeyValueOptions; diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a67..ccbb12a3 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// X'hex value' HexStringLiteral(String), @@ -207,6 +213,8 @@ impl Value { | Value::NationalStringLiteral(s) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), + Value::QuoteDelimitedStringLiteral(s) => Some(s.value), + Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value), _ => None, } } @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(v) => v.fmt(f), + Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), @@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString { } } +/// A quote delimited string literal, e.g. `Q'_abc_'`. +/// +/// See [Value::QuoteDelimitedStringLiteral] and/or +/// [Value::NationalQuoteDelimitedStringLiteral]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct QuoteDelimitedString { + /// the quote start character; i.e. the character _after_ the opening `Q'` + pub start_quote: char, + /// the string literal value itself + pub value: String, + /// the quote end character; i.e. the character _before_ the closing `'` + pub end_quote: char, +} + +impl fmt::Display for QuoteDelimitedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b52..bbedbc05 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,4 +195,8 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1d99d863..1a416e4d 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any { fn supports_semantic_view_table_factor(&self) -> bool { false } + + /// Support quote delimited string literals, e.g. `Q'{...}'` + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + fn supports_quote_delimited_string(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index f8bb0e15..54c2ace5 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -95,4 +95,8 @@ impl Dialect for OracleDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/parser/merge.rs b/src/parser/merge.rs index b2283b67..2bc1544f 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -13,7 +13,7 @@ //! SQL Parser for a `MERGE` statement #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; +use alloc::{boxed::Box, format, vec, vec::Vec}; use crate::{ ast::{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3ba4ba57..ade3c250 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(v) => { + ok_value(Value::QuoteDelimitedStringLiteral(v)) + } + Token::NationalQuoteDelimitedStringLiteral(v) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(v)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1..2ae17cf4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -46,7 +46,10 @@ use crate::dialect::{ SnowflakeDialect, }; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; -use crate::{ast::DollarQuotedString, dialect::HiveDialect}; +use crate::{ + ast::{DollarQuotedString, QuoteDelimitedString}, + dialect::HiveDialect, +}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -98,6 +101,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +301,8 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f), + Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1043,18 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') + if self.dialect.supports_quote_delimited_string() => + { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[n, q]) + .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(String::from_iter([n, q]), chars); + Ok(Some(Token::make_word(&s, None))) + } + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1062,16 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[q]) + .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(q, chars); + Ok(Some(Token::make_word(&s, None))) + } + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string expecting `chars.next()` to deliver a quote. + /// + /// See + fn tokenize_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + literal_prefix: &[char], + ) -> Result { + let literal_start_loc = chars.location(); + chars.next(); + + let start_quote_loc = chars.location(); + let (start_quote, end_quote) = match chars.next() { + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + start_quote_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''", + String::from_iter(literal_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + + // read the string literal until the "quote character" following a by literal quote + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(QuoteDelimitedString { + start_quote, + value, + end_quote, + }); + } + } + value.push(ch); + } + + self.tokenizer_error(literal_start_loc, "Unterminated string literal") + } + /// Read a quoted string. fn tokenize_quoted_string( &self, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 09fd4191..68366036 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,11 +21,12 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Value, ValueWithSpan}, + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, dialect::OracleDialect, + parser::ParserError, tokenizer::Span, }; -use test_utils::{expr_from_projection, number, TestedDialects}; +use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects}; mod test_utils; @@ -33,6 +34,19 @@ fn oracle() -> TestedDialects { TestedDialects::new(vec![Box::new(OracleDialect)]) } +/// Convenience constructor for [QuoteDelimitedstring]. +fn quote_delimited_string( + start_quote: char, + value: &'static str, + end_quote: char, +) -> QuoteDelimitedString { + QuoteDelimitedString { + start_quote, + value: value.into(), + end_quote, + } +} + /// Oracle: `||` has a lower precedence than `*` and `/` #[test] fn muldiv_have_higher_precedence_than_strconcat() { @@ -103,3 +117,219 @@ fn plusminus_have_same_precedence_as_strconcat() { } ); } + +#[test] +fn parse_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_invalid_quote_delimited_strings() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + // ~ invalid quote delimiter + for q in [' ', '\t', '\r', '\n'] { + assert_eq!( + dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with quote char {q:?}" + ); + } + // ~ invalid eof after quote + assert_eq!( + dialect.parse_sql_statements("SELECT Q'"), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with EOF quote char" + ); + // ~ unterminated string + assert_eq!( + dialect.parse_sql_statements("SELECT Q'|asdfa...."), + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 9".into() + )), + "with EOF quote char" + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "select q'!a'b'c!d!' from dual"; + let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = dialect.verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string( + '!', "a'b'c!d", '!' + )) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} From d78dbc97a1f65ce7ee353fea181a6c3bb15a5050 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 18 Dec 2025 05:34:48 +0100 Subject: [PATCH 4/5] Added support for `ALTER OPERATOR FAMILY` syntax (#2125) --- src/ast/ddl.rs | 198 +++++++++++++++++- src/ast/mod.rs | 30 ++- src/ast/spans.rs | 1 + src/parser/mod.rs | 176 +++++++++++++++- tests/sqlparser_postgres.rs | 398 +++++++++++++++++++++++++++++++++++- 5 files changed, 778 insertions(+), 25 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d0aed448..4e042a36 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -4198,25 +4198,25 @@ impl fmt::Display for OperatorArgTypes { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorClassItem { - /// OPERATOR clause + /// `OPERATOR` clause Operator { - strategy_number: u32, + strategy_number: u64, operator_name: ObjectName, /// Optional operator argument types op_types: Option, - /// FOR SEARCH or FOR ORDER BY + /// `FOR SEARCH` or `FOR ORDER BY` purpose: Option, }, - /// FUNCTION clause + /// `FUNCTION` clause Function { - support_number: u32, + support_number: u64, /// Optional function argument types for the operator class op_types: Option>, function_name: ObjectName, /// Function argument types argument_types: Vec, }, - /// STORAGE clause + /// `STORAGE` clause Storage { storage_type: DataType }, } @@ -4413,3 +4413,189 @@ impl Spanned for DropOperatorClass { Span::empty() } } + +/// An item in an ALTER OPERATOR FAMILY ADD statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + operator_name: ObjectName, + /// Operator argument types + op_types: Vec, + /// `FOR SEARCH` or `FOR ORDER BY` + purpose: Option, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Optional operator argument types for the function + op_types: Option>, + function_name: ObjectName, + /// Function argument types + argument_types: Vec, + }, +} + +/// An item in an ALTER OPERATOR FAMILY DROP statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyDropItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + /// Operator argument types + op_types: Vec, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Operator argument types for the function + op_types: Vec, + }, +} + +impl fmt::Display for OperatorFamilyItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + write!( + f, + "OPERATOR {strategy_number} {operator_name} ({})", + display_comma_separated(op_types) + )?; + if let Some(purpose) = purpose { + write!(f, " {purpose}")?; + } + Ok(()) + } + OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + write!(f, "FUNCTION {support_number}")?; + if let Some(types) = op_types { + write!(f, " ({})", display_comma_separated(types))?; + } + write!(f, " {function_name}")?; + if !argument_types.is_empty() { + write!(f, "({})", display_comma_separated(argument_types))?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for OperatorFamilyDropItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + } => { + write!( + f, + "OPERATOR {strategy_number} ({})", + display_comma_separated(op_types) + ) + } + OperatorFamilyDropItem::Function { + support_number, + op_types, + } => { + write!( + f, + "FUNCTION {support_number} ({})", + display_comma_separated(op_types) + ) + } + } + } +} + +/// `ALTER OPERATOR FAMILY` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorFamily { + /// Operator family name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorFamilyOperation, +} + +/// An [AlterOperatorFamily] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorFamilyOperation { + /// `ADD { OPERATOR ... | FUNCTION ... } [, ...]` + Add { + /// List of operator family items to add + items: Vec, + }, + /// `DROP { OPERATOR ... | FUNCTION ... } [, ...]` + Drop { + /// List of operator family items to drop + items: Vec, + }, + /// `RENAME TO new_name` + RenameTo { new_name: ObjectName }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { schema_name: ObjectName }, +} + +impl fmt::Display for AlterOperatorFamily { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER OPERATOR FAMILY {} USING {}", + self.name, self.using + )?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorFamilyOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorFamilyOperation::Add { items } => { + write!(f, "ADD {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::Drop { items } => { + write!(f, "DROP {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorFamilyOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorFamilyOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f1e79b0d..46767860 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -60,22 +60,24 @@ pub use self::dcl::{ }; pub use self::ddl::{ Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, - AlterOperatorOperation, AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, - AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterType, - AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, - AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, - ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, + AlterOperatorFamily, AlterOperatorFamilyOperation, AlterOperatorOperation, + AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, + AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, + AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, + ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, + ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorOption, OperatorPurpose, Owner, Partition, - ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, - TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, - UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, + OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, + OperatorOption, OperatorPurpose, Owner, Partition, ProcedureParam, ReferentialAction, + RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, + UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, + UserDefinedTypeStorage, ViewColumnDef, }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, @@ -3411,6 +3413,11 @@ pub enum Statement { /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html) AlterOperator(AlterOperator), /// ```sql + /// ALTER OPERATOR FAMILY + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html) + AlterOperatorFamily(AlterOperatorFamily), + /// ```sql /// ALTER ROLE /// ``` AlterRole { @@ -4972,6 +4979,9 @@ impl fmt::Display for Statement { write!(f, "ALTER TYPE {name} {operation}") } Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"), + Statement::AlterOperatorFamily(alter_operator_family) => { + write!(f, "{alter_operator_family}") + } Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 2ec797db..d4e84315 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -403,6 +403,7 @@ impl Spanned for Statement { // These statements need to be implemented Statement::AlterType { .. } => Span::empty(), Statement::AlterOperator { .. } => Span::empty(), + Statement::AlterOperatorFamily { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ade3c250..74b06ec8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6701,7 +6701,7 @@ impl<'a> Parser<'a> { let mut items = vec![]; loop { if self.parse_keyword(Keyword::OPERATOR) { - let strategy_number = self.parse_literal_uint()? as u32; + let strategy_number = self.parse_literal_uint()?; let operator_name = self.parse_operator_name()?; // Optional operator argument types @@ -6736,7 +6736,7 @@ impl<'a> Parser<'a> { purpose, }); } else if self.parse_keyword(Keyword::FUNCTION) { - let support_number = self.parse_literal_uint()? as u32; + let support_number = self.parse_literal_uint()?; // Optional operator types let op_types = @@ -9898,7 +9898,13 @@ impl<'a> Parser<'a> { operation, }) } - Keyword::OPERATOR => self.parse_alter_operator(), + Keyword::OPERATOR => { + if self.parse_keyword(Keyword::FAMILY) { + self.parse_alter_operator_family() + } else { + self.parse_alter_operator() + } + } Keyword::ROLE => self.parse_alter_role(), Keyword::POLICY => self.parse_alter_policy(), Keyword::CONNECTOR => self.parse_alter_connector(), @@ -10130,6 +10136,170 @@ impl<'a> Parser<'a> { })) } + /// Parse an operator item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_operator(&mut self) -> Result { + let strategy_number = self.parse_literal_uint()?; + let operator_name = self.parse_operator_name()?; + + // Operator argument types (required for ALTER OPERATOR FAMILY) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + // Optional purpose + let purpose = if self.parse_keyword(Keyword::FOR) { + if self.parse_keyword(Keyword::SEARCH) { + Some(OperatorPurpose::ForSearch) + } else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let sort_family = self.parse_object_name(false)?; + Some(OperatorPurpose::ForOrderBy { sort_family }) + } else { + return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + } + } else { + None + }; + + Ok(OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_function(&mut self) -> Result { + let support_number = self.parse_literal_uint()?; + + // Optional operator types + let op_types = if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; + + let function_name = self.parse_object_name(false)?; + + // Function argument types + let argument_types = if self.consume_token(&Token::LParen) { + if self.peek_token() == Token::RParen { + self.expect_token(&Token::RParen)?; + vec![] + } else { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + types + } + } else { + vec![] + }; + + Ok(OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + }) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_operator( + &mut self, + ) -> Result { + let strategy_number = self.parse_literal_uint()?; + + // Operator argument types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_function( + &mut self, + ) -> Result { + let support_number = self.parse_literal_uint()?; + + // Operator types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Function { + support_number, + op_types, + }) + } + + /// Parse an operator family item for ADD operations (dispatches to operator or function parsing) + fn parse_operator_family_add_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_add_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_add_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse an operator family item for DROP operations (dispatches to operator or function parsing) + fn parse_operator_family_drop_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_drop_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_drop_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse a [Statement::AlterOperatorFamily] + /// See + pub fn parse_alter_operator_family(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keyword(Keyword::ADD) { + let items = self.parse_comma_separated(Parser::parse_operator_family_add_item)?; + AlterOperatorFamilyOperation::Add { items } + } else if self.parse_keyword(Keyword::DROP) { + let items = self.parse_comma_separated(Parser::parse_operator_family_drop_item)?; + AlterOperatorFamilyOperation::Drop { items } + } else if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorFamilyOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "ADD, DROP, RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR FAMILY", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperatorFamily(AlterOperatorFamily { + name, + using, + operation, + })) + } + // Parse a [Statement::AlterSchema] // ALTER SCHEMA [ IF EXISTS ] schema_name pub fn parse_alter_schema(&mut self) -> Result { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d595a0a2..9f4564ef 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -23,15 +23,11 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::ast::{ - DataType, DropBehavior, DropOperator, DropOperatorClass, DropOperatorSignature, -}; -use sqlparser::tokenizer::Span; -use test_utils::*; - use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::parser::ParserError; +use sqlparser::tokenizer::Span; +use test_utils::*; #[test] fn parse_create_table_generated_always_as_identity() { @@ -7145,6 +7141,396 @@ fn parse_alter_operator() { ); } +#[test] +fn parse_alter_operator_family() { + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD OPERATOR 1 < (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("<")]), + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + purpose: None, + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR SEARCH + let sql = + "ALTER OPERATOR FAMILY text_ops USING btree ADD OPERATOR 1 @@ (TEXT, TEXT) FOR SEARCH"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("text_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("@@")]), + op_types: vec![DataType::Text, DataType::Text], + purpose: Some(OperatorPurpose::ForSearch), + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD FUNCTION 1 btint42cmp(INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Function { + support_number: 1, + op_types: None, + function_name: ObjectName::from(vec![Ident::new("btint42cmp")]), + argument_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP OPERATOR 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Operator { + strategy_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP FUNCTION 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Function { + support_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... RENAME TO + let sql = "ALTER OPERATOR FAMILY old_ops USING btree RENAME TO new_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("old_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("new_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... OWNER TO + let sql = "ALTER OPERATOR FAMILY my_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR FAMILY ... SET SCHEMA + let sql = "ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing operator name in ADD OPERATOR + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Missing parentheses in DROP OPERATOR + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 INT4, INT2") + .is_err()); + + // Invalid operator name (empty) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid operator name (special characters) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 @#$ (INT4, INT2)" + ) + .is_err()); + + // Negative strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR -1 < (INT4, INT2)" + ) + .is_err()); + + // Non-integer strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1.5 < (INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2" + ) + .is_err()); + + // Missing opening parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < INT4, INT2)" + ) + .is_err()); + + // Empty operator types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < ()") + .is_err()); + + // Invalid data type (using punctuation) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (@#$%, INT2)" + ) + .is_err()); + + // Incomplete FOR clause + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR" + ) + .is_err()); + + // Invalid FOR clause keyword + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR INVALID" + ) + .is_err()); + + // FOR ORDER BY without sort family + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid function name + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 123invalid(INT4, INT2)" + ) + .is_err()); + + // Negative support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION -1 func(INT4, INT2)" + ) + .is_err()); + + // Non-integer support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1.5 func(INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in function operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2 func()" + ) + .is_err()); + + // Missing closing parenthesis in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(INT4, INT2" + ) + .is_err()); + + // Invalid data type in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(@#$%, INT2)" + ) + .is_err()); + + // DROP OPERATOR with FOR clause (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2) FOR SEARCH" + ) + .is_err()); + + // DROP FUNCTION with function arguments (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 (INT4, INT2) func(INT4)" + ) + .is_err()); + + // Multiple ADD items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // Multiple DROP items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // RENAME TO with invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree RENAME TO 123invalid") + .is_err()); + + // OWNER TO with invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree OWNER TO 123invalid") + .is_err()); + + // SET SCHEMA with invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Schema-qualified operator family name with invalid schema + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY 123invalid.my_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Missing operator family name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY USING btree ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) EXTRA" + ) + .is_err()); + + // Incomplete statement + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD") + .is_err()); + + // Very long numbers + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 999999999999999999999 < (INT4, INT2)") + .is_err()); + + // Multiple FOR clauses + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH FOR ORDER BY sort_family") + .is_err()); + + // FOR SEARCH with extra tokens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH EXTRA") + .is_err()); + + // FOR ORDER BY with invalid sort family + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY 123invalid") + .is_err()); + + // Function with empty operator types but missing function args parens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 () func") + .is_err()); + + // Function with mismatched parentheses + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4 func(INT2" + ) + .is_err()); + + // DROP with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 ()") + .is_err()); + + // DROP FUNCTION with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 ()") + .is_err()); +} + #[test] fn parse_drop_operator_family() { for if_exists in [true, false] { From 39418cfebbfe0c028d780614e8b60ef8f0c98ce7 Mon Sep 17 00:00:00 2001 From: jnlt3 Date: Thu, 18 Dec 2025 15:06:55 +0300 Subject: [PATCH 5/5] PostgreSQL Tokenization: Fix unexpected characters after question mark being silently ignored (#2129) --- src/tokenizer.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 2ae17cf4..8666563a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1717,7 +1717,7 @@ impl<'a> Tokenizer<'a> { } } Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp), - _ => self.consume_and_return(chars, Token::Question), + _ => Ok(Some(Token::Question)), } } '?' => { @@ -4147,4 +4147,23 @@ mod tests { panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); } } + + #[test] + fn tokenize_question_mark() { + let dialect = PostgreSqlDialect {}; + let sql = "SELECT x ? y"; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + compare( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("x", None), + Token::Whitespace(Whitespace::Space), + Token::Question, + Token::Whitespace(Whitespace::Space), + Token::make_word("y", None), + ], + ) + } }