Merge branch 'main' into feature/databricks-timestamp-timetravel

This commit is contained in:
James Vorderbruggen 2025-12-18 06:52:06 -06:00 committed by GitHub
commit d0bed07152
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1682 additions and 47 deletions

329
src/ast/comments.rs Normal file
View file

@ -0,0 +1,329 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Provides a representation of source code comments in parsed SQL code.
//!
//! See [Comments::find] for an example.
#[cfg(not(feature = "std"))]
use alloc::{string::String, vec::Vec};
use core::{
ops::{Bound, Deref, RangeBounds},
slice,
};
use crate::tokenizer::{Location, Span};
/// An opaque container for comments from a parse SQL source code.
#[derive(Default, Debug)]
pub struct Comments(Vec<CommentWithSpan>);
impl Comments {
/// Accepts `comment` if its the first or is located strictly after the
/// last accepted comment. In other words, this method will skip the
/// comment if its comming out of order (as encountered in the parsed
/// source code.)
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
if self
.0
.last()
.map(|last| last.span < comment.span)
.unwrap_or(true)
{
self.0.push(comment);
}
}
/// Finds comments starting within the given location range. The order of
/// iterator reflects the order of the comments as encountered in the parsed
/// source code.
///
/// # Example
/// ```rust
/// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location};
///
/// let sql = r#"/*
/// header comment ...
/// ... spanning multiple lines
/// */
///
/// -- first statement
/// SELECT 'hello' /* world */ FROM DUAL;
///
/// -- second statement
/// SELECT 123 FROM DUAL;
///
/// -- trailing comment
/// "#;
///
/// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap();
///
/// // all comments appearing before line seven, i.e. before the first statement itself
/// assert_eq!(
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
///
/// // all comments appearing within the first statement
/// assert_eq!(
/// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::<Vec<_>>(),
/// &[" world "]);
///
/// // all comments appearing within or after the first statement
/// assert_eq!(
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
/// &[" world ", " second statement\n", " trailing comment\n"]);
/// ```
///
/// The [Spanned](crate::ast::Spanned) trait allows you to access location
/// information for certain AST nodes.
pub fn find<R: RangeBounds<Location>>(&self, range: R) -> Iter<'_> {
let (start, end) = (
self.start_index(range.start_bound()),
self.end_index(range.end_bound()),
);
debug_assert!((0..=self.0.len()).contains(&start));
debug_assert!((0..=self.0.len()).contains(&end));
// in case the user specified a reverse range
Iter(if start <= end {
self.0[start..end].iter()
} else {
self.0[0..0].iter()
})
}
/// Find the index of the first comment starting "before" the given location.
///
/// The returned index is _inclusive_ and within the range of `0..=self.0.len()`.
fn start_index(&self, location: Bound<&Location>) -> usize {
match location {
Bound::Included(location) => {
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
Ok(i) => i,
Err(i) => i,
}
}
Bound::Excluded(location) => {
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
Ok(i) => i + 1,
Err(i) => i,
}
}
Bound::Unbounded => 0,
}
}
/// Find the index of the first comment starting "after" the given location.
///
/// The returned index is _exclusive_ and within the range of `0..=self.0.len()`.
fn end_index(&self, location: Bound<&Location>) -> usize {
match location {
Bound::Included(location) => {
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
Ok(i) => i + 1,
Err(i) => i,
}
}
Bound::Excluded(location) => {
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
Ok(i) => i,
Err(i) => i,
}
}
Bound::Unbounded => self.0.len(),
}
}
}
impl From<Comments> for Vec<CommentWithSpan> {
fn from(comments: Comments) -> Self {
comments.0
}
}
/// A source code comment with information of its entire span.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CommentWithSpan {
/// The source code comment iself
pub comment: Comment,
/// The span of the comment including its markers
pub span: Span,
}
impl Deref for CommentWithSpan {
type Target = Comment;
fn deref(&self) -> &Self::Target {
&self.comment
}
}
/// A unified type of the different source code comment formats.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Comment {
/// A single line comment, typically introduced with a prefix and spanning
/// until end-of-line or end-of-file in the source code.
///
/// Note: `content` will include the terminating new-line character, if any.
SingleLine { content: String, prefix: String },
/// A multi-line comment, typically enclosed in `/* .. */` markers. The
/// string represents the content excluding the markers.
MultiLine(String),
}
impl Comment {
/// Retrieves the content of the comment as string slice.
pub fn as_str(&self) -> &str {
match self {
Comment::SingleLine { content, prefix: _ } => content.as_str(),
Comment::MultiLine(content) => content.as_str(),
}
}
}
impl Deref for Comment {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
/// An opaque iterator implementation over comments served by [Comments::find].
pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>);
impl<'a> Iterator for Iter<'a> {
type Item = &'a CommentWithSpan;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find() {
let comments = {
// ```
// -- abc
// /* hello */--, world
// /* def
// ghi
// jkl
// */
// ```
let mut c = Comments(Vec::new());
c.offer(CommentWithSpan {
comment: Comment::SingleLine {
content: " abc".into(),
prefix: "--".into(),
},
span: Span::new((1, 1).into(), (1, 7).into()),
});
c.offer(CommentWithSpan {
comment: Comment::MultiLine(" hello ".into()),
span: Span::new((2, 3).into(), (2, 14).into()),
});
c.offer(CommentWithSpan {
comment: Comment::SingleLine {
content: ", world".into(),
prefix: "--".into(),
},
span: Span::new((2, 14).into(), (2, 21).into()),
});
c.offer(CommentWithSpan {
comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()),
span: Span::new((3, 3).into(), (7, 1).into()),
});
c
};
fn find<R: RangeBounds<Location>>(comments: &Comments, range: R) -> Vec<&str> {
comments.find(range).map(|c| c.as_str()).collect::<Vec<_>>()
}
// ~ end-points only --------------------------------------------------
assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new());
assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]);
assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]);
assert_eq!(
find(&comments, ..=Location::new(2, 3)),
vec![" abc", " hello "]
);
assert_eq!(
find(&comments, ..=Location::new(2, 3)),
vec![" abc", " hello "]
);
assert_eq!(
find(&comments, ..Location::new(2, 15)),
vec![" abc", " hello ", ", world"]
);
// ~ start-points only ------------------------------------------------
assert_eq!(
find(&comments, Location::new(1000, 1000)..),
Vec::<&str>::new()
);
assert_eq!(
find(&comments, Location::new(2, 14)..),
vec![", world", " def\n ghi\n jkl\n"]
);
assert_eq!(
find(&comments, Location::new(2, 15)..),
vec![" def\n ghi\n jkl\n"]
);
assert_eq!(
find(&comments, Location::new(0, 0)..),
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
);
assert_eq!(
find(&comments, Location::new(1, 1)..),
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
);
// ~ ranges -----------------------------------------------------------
assert_eq!(
find(&comments, Location::new(2, 1)..Location::new(1, 1)),
Vec::<&str>::new()
);
assert_eq!(
find(&comments, Location::new(1, 1)..Location::new(2, 3)),
vec![" abc"]
);
assert_eq!(
find(&comments, Location::new(1, 1)..=Location::new(2, 3)),
vec![" abc", " hello "]
);
assert_eq!(
find(&comments, Location::new(1, 1)..=Location::new(2, 10)),
vec![" abc", " hello "]
);
assert_eq!(
find(&comments, Location::new(1, 1)..=Location::new(2, 14)),
vec![" abc", " hello ", ", world"]
);
assert_eq!(
find(&comments, Location::new(1, 1)..Location::new(2, 15)),
vec![" abc", " hello ", ", world"]
);
// ~ find everything --------------------------------------------------
assert_eq!(
find(&comments, ..),
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
);
}
}

View file

@ -61,7 +61,7 @@ use crate::tokenizer::{Span, Token};
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct IndexColumn {
pub column: OrderByExpr,
pub operator_class: Option<Ident>,
pub operator_class: Option<ObjectName>,
}
impl From<Ident> for IndexColumn {
@ -4198,25 +4198,25 @@ impl fmt::Display for OperatorArgTypes {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum OperatorClassItem {
/// OPERATOR clause
/// `OPERATOR` clause
Operator {
strategy_number: u32,
strategy_number: u64,
operator_name: ObjectName,
/// Optional operator argument types
op_types: Option<OperatorArgTypes>,
/// FOR SEARCH or FOR ORDER BY
/// `FOR SEARCH` or `FOR ORDER BY`
purpose: Option<OperatorPurpose>,
},
/// FUNCTION clause
/// `FUNCTION` clause
Function {
support_number: u32,
support_number: u64,
/// Optional function argument types for the operator class
op_types: Option<Vec<DataType>>,
function_name: ObjectName,
/// Function argument types
argument_types: Vec<DataType>,
},
/// STORAGE clause
/// `STORAGE` clause
Storage { storage_type: DataType },
}
@ -4413,3 +4413,189 @@ impl Spanned for DropOperatorClass {
Span::empty()
}
}
/// An item in an ALTER OPERATOR FAMILY ADD statement
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum OperatorFamilyItem {
/// `OPERATOR` clause
Operator {
strategy_number: u64,
operator_name: ObjectName,
/// Operator argument types
op_types: Vec<DataType>,
/// `FOR SEARCH` or `FOR ORDER BY`
purpose: Option<OperatorPurpose>,
},
/// `FUNCTION` clause
Function {
support_number: u64,
/// Optional operator argument types for the function
op_types: Option<Vec<DataType>>,
function_name: ObjectName,
/// Function argument types
argument_types: Vec<DataType>,
},
}
/// An item in an ALTER OPERATOR FAMILY DROP statement
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum OperatorFamilyDropItem {
/// `OPERATOR` clause
Operator {
strategy_number: u64,
/// Operator argument types
op_types: Vec<DataType>,
},
/// `FUNCTION` clause
Function {
support_number: u64,
/// Operator argument types for the function
op_types: Vec<DataType>,
},
}
impl fmt::Display for OperatorFamilyItem {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
OperatorFamilyItem::Operator {
strategy_number,
operator_name,
op_types,
purpose,
} => {
write!(
f,
"OPERATOR {strategy_number} {operator_name} ({})",
display_comma_separated(op_types)
)?;
if let Some(purpose) = purpose {
write!(f, " {purpose}")?;
}
Ok(())
}
OperatorFamilyItem::Function {
support_number,
op_types,
function_name,
argument_types,
} => {
write!(f, "FUNCTION {support_number}")?;
if let Some(types) = op_types {
write!(f, " ({})", display_comma_separated(types))?;
}
write!(f, " {function_name}")?;
if !argument_types.is_empty() {
write!(f, "({})", display_comma_separated(argument_types))?;
}
Ok(())
}
}
}
}
impl fmt::Display for OperatorFamilyDropItem {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
OperatorFamilyDropItem::Operator {
strategy_number,
op_types,
} => {
write!(
f,
"OPERATOR {strategy_number} ({})",
display_comma_separated(op_types)
)
}
OperatorFamilyDropItem::Function {
support_number,
op_types,
} => {
write!(
f,
"FUNCTION {support_number} ({})",
display_comma_separated(op_types)
)
}
}
}
}
/// `ALTER OPERATOR FAMILY` statement
/// See <https://www.postgresql.org/docs/current/sql-alteropfamily.html>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct AlterOperatorFamily {
/// Operator family name (can be schema-qualified)
pub name: ObjectName,
/// Index method (btree, hash, gist, gin, etc.)
pub using: Ident,
/// The operation to perform
pub operation: AlterOperatorFamilyOperation,
}
/// An [AlterOperatorFamily] operation
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum AlterOperatorFamilyOperation {
/// `ADD { OPERATOR ... | FUNCTION ... } [, ...]`
Add {
/// List of operator family items to add
items: Vec<OperatorFamilyItem>,
},
/// `DROP { OPERATOR ... | FUNCTION ... } [, ...]`
Drop {
/// List of operator family items to drop
items: Vec<OperatorFamilyDropItem>,
},
/// `RENAME TO new_name`
RenameTo { new_name: ObjectName },
/// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }`
OwnerTo(Owner),
/// `SET SCHEMA new_schema`
SetSchema { schema_name: ObjectName },
}
impl fmt::Display for AlterOperatorFamily {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"ALTER OPERATOR FAMILY {} USING {}",
self.name, self.using
)?;
write!(f, " {}", self.operation)
}
}
impl fmt::Display for AlterOperatorFamilyOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AlterOperatorFamilyOperation::Add { items } => {
write!(f, "ADD {}", display_comma_separated(items))
}
AlterOperatorFamilyOperation::Drop { items } => {
write!(f, "DROP {}", display_comma_separated(items))
}
AlterOperatorFamilyOperation::RenameTo { new_name } => {
write!(f, "RENAME TO {new_name}")
}
AlterOperatorFamilyOperation::OwnerTo(owner) => {
write!(f, "OWNER TO {owner}")
}
AlterOperatorFamilyOperation::SetSchema { schema_name } => {
write!(f, "SET SCHEMA {schema_name}")
}
}
}
}
impl Spanned for AlterOperatorFamily {
fn span(&self) -> Span {
Span::empty()
}
}

View file

@ -60,22 +60,24 @@ pub use self::dcl::{
};
pub use self::ddl::{
Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator,
AlterOperatorOperation, AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable,
AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterType,
AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename,
AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions,
ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain,
AlterOperatorFamily, AlterOperatorFamilyOperation, AlterOperatorOperation,
AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm,
AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue,
AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue,
ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy,
ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain,
CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass,
CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial,
DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily,
DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters,
IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder,
IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption,
OperatorArgTypes, OperatorClassItem, OperatorOption, OperatorPurpose, Owner, Partition,
ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption,
TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef,
UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation,
UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef,
OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem,
OperatorOption, OperatorPurpose, Owner, Partition, ProcedureParam, ReferentialAction,
RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate,
UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength,
UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption,
UserDefinedTypeStorage, ViewColumnDef,
};
pub use self::dml::{
Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr,
@ -110,7 +112,7 @@ pub use self::trigger::{
pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
NormalizationForm, TrimWhereField, Value, ValueWithSpan,
NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
};
use crate::ast::helpers::key_value_options::KeyValueOptions;
@ -136,6 +138,7 @@ mod query;
mod spans;
pub use spans::Spanned;
pub mod comments;
mod trigger;
mod value;
@ -3410,6 +3413,11 @@ pub enum Statement {
/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html)
AlterOperator(AlterOperator),
/// ```sql
/// ALTER OPERATOR FAMILY
/// ```
/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html)
AlterOperatorFamily(AlterOperatorFamily),
/// ```sql
/// ALTER ROLE
/// ```
AlterRole {
@ -4971,6 +4979,9 @@ impl fmt::Display for Statement {
write!(f, "ALTER TYPE {name} {operation}")
}
Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"),
Statement::AlterOperatorFamily(alter_operator_family) => {
write!(f, "{alter_operator_family}")
}
Statement::AlterRole { name, operation } => {
write!(f, "ALTER ROLE {name} {operation}")
}

View file

@ -28,7 +28,7 @@ use core::iter;
use crate::tokenizer::Span;
use super::{
dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget,
AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef,
ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements,
@ -403,6 +403,7 @@ impl Spanned for Statement {
// These statements need to be implemented
Statement::AlterType { .. } => Span::empty(),
Statement::AlterOperator { .. } => Span::empty(),
Statement::AlterOperatorFamily { .. } => Span::empty(),
Statement::AlterRole { .. } => Span::empty(),
Statement::AlterSession { .. } => Span::empty(),
Statement::AttachDatabase { .. } => Span::empty(),
@ -2477,6 +2478,12 @@ impl Spanned for OutputClause {
}
}
impl Spanned for comments::CommentWithSpan {
fn span(&self) -> Span {
self.span
}
}
#[cfg(test)]
pub mod tests {
use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect};

View file

@ -167,6 +167,12 @@ pub enum Value {
TripleDoubleQuotedRawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// X'hex value'
HexStringLiteral(String),
@ -207,6 +213,8 @@ impl Value {
| Value::NationalStringLiteral(s)
| Value::HexStringLiteral(s) => Some(s),
Value::DollarQuotedString(s) => Some(s.value),
Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
_ => None,
}
}
@ -242,6 +250,8 @@ impl fmt::Display for Value {
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
}
}
/// A quote delimited string literal, e.g. `Q'_abc_'`.
///
/// See [Value::QuoteDelimitedStringLiteral] and/or
/// [Value::NationalQuoteDelimitedStringLiteral].
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct QuoteDelimitedString {
/// the quote start character; i.e. the character _after_ the opening `Q'`
pub start_quote: char,
/// the string literal value itself
pub value: String,
/// the quote end character; i.e. the character _before_ the closing `'`
pub end_quote: char,
}
impl fmt::Display for QuoteDelimitedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]

View file

@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
fn supports_interval_options(&self) -> bool {
true
}
fn supports_quote_delimited_string(&self) -> bool {
true
}
}

View file

@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {
fn supports_semantic_view_table_factor(&self) -> bool {
false
}
/// Support quote delimited string literals, e.g. `Q'{...}'`
///
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
fn supports_quote_delimited_string(&self) -> bool {
false
}
}
/// This represents the operators for which precedence must be defined

View file

@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
fn supports_group_by_expr(&self) -> bool {
true
}
fn supports_quote_delimited_string(&self) -> bool {
true
}
}

View file

@ -13,7 +13,7 @@
//! SQL Parser for a `MERGE` statement
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
use alloc::{boxed::Box, format, vec, vec::Vec};
use crate::{
ast::{

View file

@ -32,14 +32,17 @@ use recursion::RecursionCounter;
use IsLateral::*;
use IsOptional::*;
use crate::ast::helpers::{
key_value_options::{
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
},
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
};
use crate::ast::Statement::CreatePolicy;
use crate::ast::*;
use crate::ast::{
comments,
helpers::{
key_value_options::{
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
},
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
},
};
use crate::dialect::*;
use crate::keywords::{Keyword, ALL_KEYWORDS};
use crate::tokenizer::*;
@ -530,6 +533,44 @@ impl<'a> Parser<'a> {
Parser::new(dialect).try_with_sql(sql)?.parse_statements()
}
/// Parses the given `sql` into an Abstract Syntax Tree (AST), returning
/// also encountered source code comments.
///
/// See [Parser::parse_sql].
pub fn parse_sql_with_comments(
dialect: &'a dyn Dialect,
sql: &str,
) -> Result<(Vec<Statement>, comments::Comments), ParserError> {
let mut p = Parser::new(dialect).try_with_sql(sql)?;
p.parse_statements().map(|stmts| (stmts, p.into_comments()))
}
/// Consumes this parser returning comments from the parsed token stream.
fn into_comments(self) -> comments::Comments {
let mut comments = comments::Comments::default();
for t in self.tokens.into_iter() {
match t.token {
Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => {
comments.offer(comments::CommentWithSpan {
comment: comments::Comment::SingleLine {
content: comment,
prefix,
},
span: t.span,
});
}
Token::Whitespace(Whitespace::MultiLineComment(comment)) => {
comments.offer(comments::CommentWithSpan {
comment: comments::Comment::MultiLine(comment),
span: t.span,
});
}
_ => {}
}
}
comments
}
/// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.),
/// stopping before the statement separator, if any.
pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
@ -1713,6 +1754,8 @@ impl<'a> Parser<'a> {
| Token::TripleSingleQuotedRawStringLiteral(_)
| Token::TripleDoubleQuotedRawStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
@ -2729,6 +2772,8 @@ impl<'a> Parser<'a> {
| Token::EscapedStringLiteral(_)
| Token::UnicodeStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
_ => self.expected(
"either filler, WITH, or WITHOUT in LISTAGG",
@ -6656,7 +6701,7 @@ impl<'a> Parser<'a> {
let mut items = vec![];
loop {
if self.parse_keyword(Keyword::OPERATOR) {
let strategy_number = self.parse_literal_uint()? as u32;
let strategy_number = self.parse_literal_uint()?;
let operator_name = self.parse_operator_name()?;
// Optional operator argument types
@ -6691,7 +6736,7 @@ impl<'a> Parser<'a> {
purpose,
});
} else if self.parse_keyword(Keyword::FUNCTION) {
let support_number = self.parse_literal_uint()? as u32;
let support_number = self.parse_literal_uint()?;
// Optional operator types
let op_types =
@ -9853,7 +9898,13 @@ impl<'a> Parser<'a> {
operation,
})
}
Keyword::OPERATOR => self.parse_alter_operator(),
Keyword::OPERATOR => {
if self.parse_keyword(Keyword::FAMILY) {
self.parse_alter_operator_family()
} else {
self.parse_alter_operator()
}
}
Keyword::ROLE => self.parse_alter_role(),
Keyword::POLICY => self.parse_alter_policy(),
Keyword::CONNECTOR => self.parse_alter_connector(),
@ -10085,6 +10136,170 @@ impl<'a> Parser<'a> {
}))
}
/// Parse an operator item for ALTER OPERATOR FAMILY ADD operations
fn parse_operator_family_add_operator(&mut self) -> Result<OperatorFamilyItem, ParserError> {
let strategy_number = self.parse_literal_uint()?;
let operator_name = self.parse_operator_name()?;
// Operator argument types (required for ALTER OPERATOR FAMILY)
self.expect_token(&Token::LParen)?;
let op_types = self.parse_comma_separated(Parser::parse_data_type)?;
self.expect_token(&Token::RParen)?;
// Optional purpose
let purpose = if self.parse_keyword(Keyword::FOR) {
if self.parse_keyword(Keyword::SEARCH) {
Some(OperatorPurpose::ForSearch)
} else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
let sort_family = self.parse_object_name(false)?;
Some(OperatorPurpose::ForOrderBy { sort_family })
} else {
return self.expected("SEARCH or ORDER BY after FOR", self.peek_token());
}
} else {
None
};
Ok(OperatorFamilyItem::Operator {
strategy_number,
operator_name,
op_types,
purpose,
})
}
/// Parse a function item for ALTER OPERATOR FAMILY ADD operations
fn parse_operator_family_add_function(&mut self) -> Result<OperatorFamilyItem, ParserError> {
let support_number = self.parse_literal_uint()?;
// Optional operator types
let op_types = if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen {
let types = self.parse_comma_separated(Parser::parse_data_type)?;
self.expect_token(&Token::RParen)?;
Some(types)
} else if self.consume_token(&Token::LParen) {
self.expect_token(&Token::RParen)?;
Some(vec![])
} else {
None
};
let function_name = self.parse_object_name(false)?;
// Function argument types
let argument_types = if self.consume_token(&Token::LParen) {
if self.peek_token() == Token::RParen {
self.expect_token(&Token::RParen)?;
vec![]
} else {
let types = self.parse_comma_separated(Parser::parse_data_type)?;
self.expect_token(&Token::RParen)?;
types
}
} else {
vec![]
};
Ok(OperatorFamilyItem::Function {
support_number,
op_types,
function_name,
argument_types,
})
}
/// Parse an operator item for ALTER OPERATOR FAMILY DROP operations
fn parse_operator_family_drop_operator(
&mut self,
) -> Result<OperatorFamilyDropItem, ParserError> {
let strategy_number = self.parse_literal_uint()?;
// Operator argument types (required for DROP)
self.expect_token(&Token::LParen)?;
let op_types = self.parse_comma_separated(Parser::parse_data_type)?;
self.expect_token(&Token::RParen)?;
Ok(OperatorFamilyDropItem::Operator {
strategy_number,
op_types,
})
}
/// Parse a function item for ALTER OPERATOR FAMILY DROP operations
fn parse_operator_family_drop_function(
&mut self,
) -> Result<OperatorFamilyDropItem, ParserError> {
let support_number = self.parse_literal_uint()?;
// Operator types (required for DROP)
self.expect_token(&Token::LParen)?;
let op_types = self.parse_comma_separated(Parser::parse_data_type)?;
self.expect_token(&Token::RParen)?;
Ok(OperatorFamilyDropItem::Function {
support_number,
op_types,
})
}
/// Parse an operator family item for ADD operations (dispatches to operator or function parsing)
fn parse_operator_family_add_item(&mut self) -> Result<OperatorFamilyItem, ParserError> {
if self.parse_keyword(Keyword::OPERATOR) {
self.parse_operator_family_add_operator()
} else if self.parse_keyword(Keyword::FUNCTION) {
self.parse_operator_family_add_function()
} else {
self.expected("OPERATOR or FUNCTION", self.peek_token())
}
}
/// Parse an operator family item for DROP operations (dispatches to operator or function parsing)
fn parse_operator_family_drop_item(&mut self) -> Result<OperatorFamilyDropItem, ParserError> {
if self.parse_keyword(Keyword::OPERATOR) {
self.parse_operator_family_drop_operator()
} else if self.parse_keyword(Keyword::FUNCTION) {
self.parse_operator_family_drop_function()
} else {
self.expected("OPERATOR or FUNCTION", self.peek_token())
}
}
/// Parse a [Statement::AlterOperatorFamily]
/// See <https://www.postgresql.org/docs/current/sql-alteropfamily.html>
pub fn parse_alter_operator_family(&mut self) -> Result<Statement, ParserError> {
let name = self.parse_object_name(false)?;
self.expect_keyword(Keyword::USING)?;
let using = self.parse_identifier()?;
let operation = if self.parse_keyword(Keyword::ADD) {
let items = self.parse_comma_separated(Parser::parse_operator_family_add_item)?;
AlterOperatorFamilyOperation::Add { items }
} else if self.parse_keyword(Keyword::DROP) {
let items = self.parse_comma_separated(Parser::parse_operator_family_drop_item)?;
AlterOperatorFamilyOperation::Drop { items }
} else if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) {
let new_name = self.parse_object_name(false)?;
AlterOperatorFamilyOperation::RenameTo { new_name }
} else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) {
let owner = self.parse_owner()?;
AlterOperatorFamilyOperation::OwnerTo(owner)
} else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) {
let schema_name = self.parse_object_name(false)?;
AlterOperatorFamilyOperation::SetSchema { schema_name }
} else {
return self.expected_ref(
"ADD, DROP, RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR FAMILY",
self.peek_token_ref(),
);
};
Ok(Statement::AlterOperatorFamily(AlterOperatorFamily {
name,
using,
operation,
}))
}
// Parse a [Statement::AlterSchema]
// ALTER SCHEMA [ IF EXISTS ] schema_name
pub fn parse_alter_schema(&mut self) -> Result<Statement, ParserError> {
@ -10656,6 +10871,12 @@ impl<'a> Parser<'a> {
Token::NationalStringLiteral(ref s) => {
ok_value(Value::NationalStringLiteral(s.to_string()))
}
Token::QuoteDelimitedStringLiteral(v) => {
ok_value(Value::QuoteDelimitedStringLiteral(v))
}
Token::NationalQuoteDelimitedStringLiteral(v) => {
ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
}
Token::EscapedStringLiteral(ref s) => {
ok_value(Value::EscapedStringLiteral(s.to_string()))
}
@ -16897,10 +17118,10 @@ impl<'a> Parser<'a> {
fn parse_order_by_expr_inner(
&mut self,
with_operator_class: bool,
) -> Result<(OrderByExpr, Option<Ident>), ParserError> {
) -> Result<(OrderByExpr, Option<ObjectName>), ParserError> {
let expr = self.parse_expr()?;
let operator_class: Option<Ident> = if with_operator_class {
let operator_class: Option<ObjectName> = if with_operator_class {
// We check that if non of the following keywords are present, then we parse an
// identifier as operator class.
if self
@ -16909,7 +17130,7 @@ impl<'a> Parser<'a> {
{
None
} else {
self.maybe_parse(|parser| parser.parse_identifier())?
self.maybe_parse(|parser| parser.parse_object_name(false))?
}
} else {
None

View file

@ -29,10 +29,10 @@ use alloc::{
vec,
vec::Vec,
};
use core::iter::Peekable;
use core::num::NonZeroU8;
use core::str::Chars;
use core::{cmp, fmt};
use core::{iter::Peekable, str};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -46,7 +46,10 @@ use crate::dialect::{
SnowflakeDialect,
};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
use crate::{
ast::{DollarQuotedString, QuoteDelimitedString},
dialect::HiveDialect,
};
/// SQL Token enumeration
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@ -98,6 +101,12 @@ pub enum Token {
TripleDoubleQuotedRawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
EscapedStringLiteral(String),
/// Unicode string literal: i.e: U&'first \000A second'
@ -292,6 +301,8 @@ impl fmt::Display for Token {
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@ -1032,6 +1043,18 @@ impl<'a> Tokenizer<'a> {
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
Some(&q @ 'q') | Some(&q @ 'Q')
if self.dialect.supports_quote_delimited_string() =>
{
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[n, q])
.map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(String::from_iter([n, q]), chars);
Ok(Some(Token::make_word(&s, None)))
}
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word(n, chars);
@ -1039,6 +1062,16 @@ impl<'a> Tokenizer<'a> {
}
}
}
q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[q])
.map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(q, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
let starting_loc = chars.location();
@ -1684,7 +1717,7 @@ impl<'a> Tokenizer<'a> {
}
}
Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp),
_ => self.consume_and_return(chars, Token::Question),
_ => Ok(Some(Token::Question)),
}
}
'?' => {
@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
)
}
/// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
///
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
fn tokenize_quote_delimited_string(
&self,
chars: &mut State,
// the prefix that introduced the possible literal or word,
// e.g. "Q" or "nq"
literal_prefix: &[char],
) -> Result<QuoteDelimitedString, TokenizerError> {
let literal_start_loc = chars.location();
chars.next();
let start_quote_loc = chars.location();
let (start_quote, end_quote) = match chars.next() {
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
return self.tokenizer_error(
start_quote_loc,
format!(
"Invalid space, tab, newline, or EOF after '{}''",
String::from_iter(literal_prefix)
),
);
}
Some(c) => (
c,
match c {
'[' => ']',
'{' => '}',
'<' => '>',
'(' => ')',
c => c,
},
),
};
// read the string literal until the "quote character" following a by literal quote
let mut value = String::new();
while let Some(ch) = chars.next() {
if ch == end_quote {
if let Some('\'') = chars.peek() {
chars.next(); // ~ consume the quote
return Ok(QuoteDelimitedString {
start_quote,
value,
end_quote,
});
}
}
value.push(ch);
}
self.tokenizer_error(literal_start_loc, "Unterminated string literal")
}
/// Read a quoted string.
fn tokenize_quoted_string(
&self,
@ -4059,4 +4147,23 @@ mod tests {
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
}
}
#[test]
fn tokenize_question_mark() {
let dialect = PostgreSqlDialect {};
let sql = "SELECT x ? y";
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
compare(
tokens,
vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::make_word("x", None),
Token::Whitespace(Whitespace::Space),
Token::Question,
Token::Whitespace(Whitespace::Space),
Token::make_word("y", None),
],
)
}
}

View file

@ -0,0 +1,75 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#![warn(clippy::all)]
//! Test comment extraction from SQL source code.
#[cfg(test)]
use pretty_assertions::assert_eq;
use sqlparser::{
ast::comments::{Comment, CommentWithSpan},
dialect::GenericDialect,
parser::Parser,
tokenizer::Span,
};
#[test]
fn parse_sql_with_comments() {
let sql = r#"
-- second line comment
select * from /* inline comment after `from` */ dual;
/*select
some
more*/
-- end-of-script-with-no-newline"#;
let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) {
Ok((_, comments)) => comments,
Err(e) => panic!("Invalid sql script: {e}"),
};
assert_eq!(
Vec::from(comments),
vec![
CommentWithSpan {
comment: Comment::SingleLine {
content: " second line comment\n".into(),
prefix: "--".into()
},
span: Span::new((2, 1).into(), (3, 1).into()),
},
CommentWithSpan {
comment: Comment::MultiLine(" inline comment after `from` ".into()),
span: Span::new((3, 15).into(), (3, 48).into()),
},
CommentWithSpan {
comment: Comment::MultiLine("select\nsome\nmore".into()),
span: Span::new((5, 1).into(), (7, 7).into())
},
CommentWithSpan {
comment: Comment::SingleLine {
content: " end-of-script-with-no-newline".into(),
prefix: "--".into()
},
span: Span::new((9, 3).into(), (9, 35).into()),
}
]
);
}

View file

@ -21,11 +21,12 @@
use pretty_assertions::assert_eq;
use sqlparser::{
ast::{BinaryOperator, Expr, Value, ValueWithSpan},
ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan},
dialect::OracleDialect,
parser::ParserError,
tokenizer::Span,
};
use test_utils::{expr_from_projection, number, TestedDialects};
use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects};
mod test_utils;
@ -33,6 +34,19 @@ fn oracle() -> TestedDialects {
TestedDialects::new(vec![Box::new(OracleDialect)])
}
/// Convenience constructor for [QuoteDelimitedstring].
fn quote_delimited_string(
start_quote: char,
value: &'static str,
end_quote: char,
) -> QuoteDelimitedString {
QuoteDelimitedString {
start_quote,
value: value.into(),
end_quote,
}
}
/// Oracle: `||` has a lower precedence than `*` and `/`
#[test]
fn muldiv_have_higher_precedence_than_strconcat() {
@ -103,3 +117,219 @@ fn plusminus_have_same_precedence_as_strconcat() {
}
);
}
#[test]
fn parse_quote_delimited_string() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT Q'.abc.', \
Q'Xab'cX', \
Q'|abc'''|', \
Q'{abc}d}', \
Q'[]abc[]', \
Q'<a'bc>', \
Q'<<<a'bc>', \
Q'('abc'('abc)', \
Q'(abc'def))', \
Q'(abc'def)))' \
FROM dual";
let select = dialect.verified_only_select(sql);
assert_eq!(10, select.projection.len());
assert_eq!(
&Expr::Value(
Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X')))
.with_empty_span()
),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|')))
.with_empty_span()
),
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}')))
.with_empty_span()
),
expr_from_projection(&select.projection[3])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']')))
.with_empty_span()
),
expr_from_projection(&select.projection[4])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>')))
.with_empty_span()
),
expr_from_projection(&select.projection[5])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<<a'bc", '>')))
.with_empty_span()
),
expr_from_projection(&select.projection[6])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[7])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[8])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[9])
);
}
#[test]
fn parse_invalid_quote_delimited_strings() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
// ~ invalid quote delimiter
for q in [' ', '\t', '\r', '\n'] {
assert_eq!(
dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")),
Err(ParserError::TokenizerError(
"Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into()
)),
"with quote char {q:?}"
);
}
// ~ invalid eof after quote
assert_eq!(
dialect.parse_sql_statements("SELECT Q'"),
Err(ParserError::TokenizerError(
"Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into()
)),
"with EOF quote char"
);
// ~ unterminated string
assert_eq!(
dialect.parse_sql_statements("SELECT Q'|asdfa...."),
Err(ParserError::TokenizerError(
"Unterminated string literal at Line: 1, Column: 9".into()
)),
"with EOF quote char"
);
}
#[test]
fn parse_quote_delimited_string_lowercase() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "select q'!a'b'c!d!' from dual";
let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual");
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_quote_delimited_string_but_is_a_word() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT q, quux, q.abc FROM dual q";
let select = dialect.verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "q")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "quux")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "q"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}
#[test]
fn parse_national_quote_delimited_string() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT NQ'.abc.' FROM dual";
let select = dialect.verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_national_quote_delimited_string_lowercase() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
for prefix in ["nq", "Nq", "nQ", "NQ"] {
let select = dialect.verified_only_select_with_canonical(
&format!("select {prefix}'!a'b'c!d!' from dual"),
"SELECT NQ'!a'b'c!d!' FROM dual",
);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string(
'!', "a'b'c!d", '!'
))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
}
#[test]
fn parse_national_quote_delimited_string_but_is_a_word() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT nq, nqoo, nq.abc FROM dual q";
let select = dialect.verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nq")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "nq"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}

View file

@ -23,15 +23,11 @@
mod test_utils;
use helpers::attached_token::AttachedToken;
use sqlparser::ast::{
DataType, DropBehavior, DropOperator, DropOperatorClass, DropOperatorSignature,
};
use sqlparser::tokenizer::Span;
use test_utils::*;
use sqlparser::ast::*;
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect};
use sqlparser::parser::ParserError;
use sqlparser::tokenizer::Span;
use test_utils::*;
#[test]
fn parse_create_table_generated_always_as_identity() {
@ -2572,11 +2568,17 @@ fn parse_create_indices_with_operator_classes() {
IndexType::SPGiST,
IndexType::Custom("CustomIndexType".into()),
];
let operator_classes: [Option<Ident>; 4] = [
let operator_classes: [Option<ObjectName>; 4] = [
None,
Some("gin_trgm_ops".into()),
Some("gist_trgm_ops".into()),
Some("totally_not_valid".into()),
Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new(
"gin_trgm_ops",
))])),
Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new(
"gist_trgm_ops",
))])),
Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new(
"totally_not_valid",
))])),
];
for expected_index_type in indices {
@ -2713,6 +2715,36 @@ fn parse_create_indices_with_operator_classes() {
}
}
#[test]
fn parse_create_index_with_schema_qualified_operator_class() {
let sql = "CREATE INDEX my_index ON my_table USING HNSW (embedding public.vector_cosine_ops)";
match pg().verified_stmt(sql) {
Statement::CreateIndex(CreateIndex { columns, .. }) => {
assert_eq!(1, columns.len());
let idx_col = &columns[0];
// Verify the column name
match &idx_col.column.expr {
Expr::Identifier(ident) => {
assert_eq!("embedding", ident.value);
}
_ => panic!("Expected identifier expression"),
}
// Verify the schema-qualified operator class
assert_eq!(
Some(ObjectName(vec![
ObjectNamePart::Identifier(Ident::new("public")),
ObjectNamePart::Identifier(Ident::new("vector_cosine_ops")),
])),
idx_col.operator_class
);
}
_ => unreachable!(),
}
}
#[test]
fn parse_create_bloom() {
let sql =
@ -7109,6 +7141,396 @@ fn parse_alter_operator() {
);
}
#[test]
fn parse_alter_operator_family() {
// Test ALTER OPERATOR FAMILY ... ADD OPERATOR
let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD OPERATOR 1 < (INT4, INT2)";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("integer_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::Add {
items: vec![OperatorFamilyItem::Operator {
strategy_number: 1,
operator_name: ObjectName::from(vec![Ident::new("<")]),
op_types: vec![DataType::Int4(None), DataType::Int2(None)],
purpose: None,
}],
},
})
);
// Test ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR SEARCH
let sql =
"ALTER OPERATOR FAMILY text_ops USING btree ADD OPERATOR 1 @@ (TEXT, TEXT) FOR SEARCH";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("text_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::Add {
items: vec![OperatorFamilyItem::Operator {
strategy_number: 1,
operator_name: ObjectName::from(vec![Ident::new("@@")]),
op_types: vec![DataType::Text, DataType::Text],
purpose: Some(OperatorPurpose::ForSearch),
}],
},
})
);
// Test ALTER OPERATOR FAMILY ... ADD FUNCTION
let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD FUNCTION 1 btint42cmp(INT4, INT2)";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("integer_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::Add {
items: vec![OperatorFamilyItem::Function {
support_number: 1,
op_types: None,
function_name: ObjectName::from(vec![Ident::new("btint42cmp")]),
argument_types: vec![DataType::Int4(None), DataType::Int2(None)],
}],
},
})
);
// Test ALTER OPERATOR FAMILY ... DROP OPERATOR
let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP OPERATOR 1 (INT4, INT2)";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("integer_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::Drop {
items: vec![OperatorFamilyDropItem::Operator {
strategy_number: 1,
op_types: vec![DataType::Int4(None), DataType::Int2(None)],
}],
},
})
);
// Test ALTER OPERATOR FAMILY ... DROP FUNCTION
let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP FUNCTION 1 (INT4, INT2)";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("integer_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::Drop {
items: vec![OperatorFamilyDropItem::Function {
support_number: 1,
op_types: vec![DataType::Int4(None), DataType::Int2(None)],
}],
},
})
);
// Test ALTER OPERATOR FAMILY ... RENAME TO
let sql = "ALTER OPERATOR FAMILY old_ops USING btree RENAME TO new_ops";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("old_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::RenameTo {
new_name: ObjectName::from(vec![Ident::new("new_ops")]),
},
})
);
// Test ALTER OPERATOR FAMILY ... OWNER TO
let sql = "ALTER OPERATOR FAMILY my_ops USING btree OWNER TO joe";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("my_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::OwnerTo(Owner::Ident(Ident::new("joe"))),
})
);
// Test ALTER OPERATOR FAMILY ... SET SCHEMA
let sql = "ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA new_schema";
assert_eq!(
pg_and_generic().verified_stmt(sql),
Statement::AlterOperatorFamily(AlterOperatorFamily {
name: ObjectName::from(vec![Ident::new("my_ops")]),
using: Ident::new("btree"),
operation: AlterOperatorFamilyOperation::SetSchema {
schema_name: ObjectName::from(vec![Ident::new("new_schema")]),
},
})
);
// Test error cases
// Missing USING clause
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops ADD OPERATOR 1 < (INT4, INT2)")
.is_err());
// Invalid operation
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree INVALID_OPERATION")
.is_err());
// Missing operator name in ADD OPERATOR
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)"
)
.is_err());
// Missing function name in ADD FUNCTION
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)"
)
.is_err());
// Missing parentheses in DROP OPERATOR
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 INT4, INT2")
.is_err());
// Invalid operator name (empty)
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)"
)
.is_err());
// Invalid operator name (special characters)
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 @#$ (INT4, INT2)"
)
.is_err());
// Negative strategy number
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR -1 < (INT4, INT2)"
)
.is_err());
// Non-integer strategy number
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1.5 < (INT4, INT2)"
)
.is_err());
// Missing closing parenthesis in operator types
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2"
)
.is_err());
// Missing opening parenthesis in operator types
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < INT4, INT2)"
)
.is_err());
// Empty operator types
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < ()")
.is_err());
// Invalid data type (using punctuation)
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (@#$%, INT2)"
)
.is_err());
// Incomplete FOR clause
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR"
)
.is_err());
// Invalid FOR clause keyword
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR INVALID"
)
.is_err());
// FOR ORDER BY without sort family
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY"
)
.is_err());
// Missing function name in ADD FUNCTION
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)"
)
.is_err());
// Invalid function name
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 123invalid(INT4, INT2)"
)
.is_err());
// Negative support number
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION -1 func(INT4, INT2)"
)
.is_err());
// Non-integer support number
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1.5 func(INT4, INT2)"
)
.is_err());
// Missing closing parenthesis in function operator types
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2 func()"
)
.is_err());
// Missing closing parenthesis in function arguments
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(INT4, INT2"
)
.is_err());
// Invalid data type in function arguments
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(@#$%, INT2)"
)
.is_err());
// DROP OPERATOR with FOR clause (not allowed)
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2) FOR SEARCH"
)
.is_err());
// DROP FUNCTION with function arguments (not allowed)
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 (INT4, INT2) func(INT4)"
)
.is_err());
// Multiple ADD items with error in middle
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2), INVALID_ITEM"
)
.is_err());
// Multiple DROP items with error in middle
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2), INVALID_ITEM"
)
.is_err());
// RENAME TO with invalid new name
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree RENAME TO 123invalid")
.is_err());
// OWNER TO with invalid owner
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree OWNER TO 123invalid")
.is_err());
// SET SCHEMA with invalid schema name
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA 123invalid")
.is_err());
// Schema-qualified operator family name with invalid schema
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY 123invalid.my_ops USING btree ADD OPERATOR 1 < (INT4, INT2)"
)
.is_err());
// Missing operator family name
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY USING btree ADD OPERATOR 1 < (INT4, INT2)")
.is_err());
// Extra tokens at end
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) EXTRA"
)
.is_err());
// Incomplete statement
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD")
.is_err());
// Very long numbers
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 999999999999999999999 < (INT4, INT2)")
.is_err());
// Multiple FOR clauses
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH FOR ORDER BY sort_family")
.is_err());
// FOR SEARCH with extra tokens
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH EXTRA")
.is_err());
// FOR ORDER BY with invalid sort family
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY 123invalid")
.is_err());
// Function with empty operator types but missing function args parens
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 () func")
.is_err());
// Function with mismatched parentheses
assert!(pg()
.parse_sql_statements(
"ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4 func(INT2"
)
.is_err());
// DROP with empty types
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 ()")
.is_err());
// DROP FUNCTION with empty types
assert!(pg()
.parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 ()")
.is_err());
}
#[test]
fn parse_drop_operator_family() {
for if_exists in [true, false] {