Oracle: Support for quote delimited strings (#2130)
Some checks failed
license / Release Audit Tool (RAT) (push) Has been cancelled
Rust / codestyle (push) Has been cancelled
Rust / lint (push) Has been cancelled
Rust / benchmark-lint (push) Has been cancelled
Rust / compile (push) Has been cancelled
Rust / docs (push) Has been cancelled
Rust / compile-no-std (push) Has been cancelled
Rust / test (beta) (push) Has been cancelled
Rust / test (nightly) (push) Has been cancelled
Rust / test (stable) (push) Has been cancelled

This commit is contained in:
xitep 2025-12-16 19:04:11 +01:00 committed by GitHub
parent cdeed32294
commit f84887d004
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 381 additions and 6 deletions

View file

@ -110,7 +110,7 @@ pub use self::trigger::{
pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
NormalizationForm, TrimWhereField, Value, ValueWithSpan,
NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
};
use crate::ast::helpers::key_value_options::KeyValueOptions;

View file

@ -167,6 +167,12 @@ pub enum Value {
TripleDoubleQuotedRawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// X'hex value'
HexStringLiteral(String),
@ -207,6 +213,8 @@ impl Value {
| Value::NationalStringLiteral(s)
| Value::HexStringLiteral(s) => Some(s),
Value::DollarQuotedString(s) => Some(s.value),
Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
_ => None,
}
}
@ -242,6 +250,8 @@ impl fmt::Display for Value {
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
}
}
/// A quote delimited string literal, e.g. `Q'_abc_'`.
///
/// See [Value::QuoteDelimitedStringLiteral] and/or
/// [Value::NationalQuoteDelimitedStringLiteral].
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct QuoteDelimitedString {
/// the quote start character; i.e. the character _after_ the opening `Q'`
pub start_quote: char,
/// the string literal value itself
pub value: String,
/// the quote end character; i.e. the character _before_ the closing `'`
pub end_quote: char,
}
impl fmt::Display for QuoteDelimitedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]

View file

@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
fn supports_interval_options(&self) -> bool {
true
}
fn supports_quote_delimited_string(&self) -> bool {
true
}
}

View file

@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {
fn supports_semantic_view_table_factor(&self) -> bool {
false
}
/// Support quote delimited string literals, e.g. `Q'{...}'`
///
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
fn supports_quote_delimited_string(&self) -> bool {
false
}
}
/// This represents the operators for which precedence must be defined

View file

@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
fn supports_group_by_expr(&self) -> bool {
true
}
fn supports_quote_delimited_string(&self) -> bool {
true
}
}

View file

@ -13,7 +13,7 @@
//! SQL Parser for a `MERGE` statement
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
use alloc::{boxed::Box, format, vec, vec::Vec};
use crate::{
ast::{

View file

@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
| Token::TripleSingleQuotedRawStringLiteral(_)
| Token::TripleDoubleQuotedRawStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
| Token::EscapedStringLiteral(_)
| Token::UnicodeStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
_ => self.expected(
"either filler, WITH, or WITHOUT in LISTAGG",
@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
Token::NationalStringLiteral(ref s) => {
ok_value(Value::NationalStringLiteral(s.to_string()))
}
Token::QuoteDelimitedStringLiteral(v) => {
ok_value(Value::QuoteDelimitedStringLiteral(v))
}
Token::NationalQuoteDelimitedStringLiteral(v) => {
ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
}
Token::EscapedStringLiteral(ref s) => {
ok_value(Value::EscapedStringLiteral(s.to_string()))
}

View file

@ -29,10 +29,10 @@ use alloc::{
vec,
vec::Vec,
};
use core::iter::Peekable;
use core::num::NonZeroU8;
use core::str::Chars;
use core::{cmp, fmt};
use core::{iter::Peekable, str};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -46,7 +46,10 @@ use crate::dialect::{
SnowflakeDialect,
};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
use crate::{
ast::{DollarQuotedString, QuoteDelimitedString},
dialect::HiveDialect,
};
/// SQL Token enumeration
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@ -98,6 +101,12 @@ pub enum Token {
TripleDoubleQuotedRawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
EscapedStringLiteral(String),
/// Unicode string literal: i.e: U&'first \000A second'
@ -292,6 +301,8 @@ impl fmt::Display for Token {
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@ -1032,6 +1043,18 @@ impl<'a> Tokenizer<'a> {
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
Some(&q @ 'q') | Some(&q @ 'Q')
if self.dialect.supports_quote_delimited_string() =>
{
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[n, q])
.map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(String::from_iter([n, q]), chars);
Ok(Some(Token::make_word(&s, None)))
}
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word(n, chars);
@ -1039,6 +1062,16 @@ impl<'a> Tokenizer<'a> {
}
}
}
q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[q])
.map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(q, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
let starting_loc = chars.location();
@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
)
}
/// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
///
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
fn tokenize_quote_delimited_string(
&self,
chars: &mut State,
// the prefix that introduced the possible literal or word,
// e.g. "Q" or "nq"
literal_prefix: &[char],
) -> Result<QuoteDelimitedString, TokenizerError> {
let literal_start_loc = chars.location();
chars.next();
let start_quote_loc = chars.location();
let (start_quote, end_quote) = match chars.next() {
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
return self.tokenizer_error(
start_quote_loc,
format!(
"Invalid space, tab, newline, or EOF after '{}''",
String::from_iter(literal_prefix)
),
);
}
Some(c) => (
c,
match c {
'[' => ']',
'{' => '}',
'<' => '>',
'(' => ')',
c => c,
},
),
};
// read the string literal until the "quote character" following a by literal quote
let mut value = String::new();
while let Some(ch) = chars.next() {
if ch == end_quote {
if let Some('\'') = chars.peek() {
chars.next(); // ~ consume the quote
return Ok(QuoteDelimitedString {
start_quote,
value,
end_quote,
});
}
}
value.push(ch);
}
self.tokenizer_error(literal_start_loc, "Unterminated string literal")
}
/// Read a quoted string.
fn tokenize_quoted_string(
&self,

View file

@ -21,11 +21,12 @@
use pretty_assertions::assert_eq;
use sqlparser::{
ast::{BinaryOperator, Expr, Value, ValueWithSpan},
ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan},
dialect::OracleDialect,
parser::ParserError,
tokenizer::Span,
};
use test_utils::{expr_from_projection, number, TestedDialects};
use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects};
mod test_utils;
@ -33,6 +34,19 @@ fn oracle() -> TestedDialects {
TestedDialects::new(vec![Box::new(OracleDialect)])
}
/// Convenience constructor for [QuoteDelimitedstring].
fn quote_delimited_string(
start_quote: char,
value: &'static str,
end_quote: char,
) -> QuoteDelimitedString {
QuoteDelimitedString {
start_quote,
value: value.into(),
end_quote,
}
}
/// Oracle: `||` has a lower precedence than `*` and `/`
#[test]
fn muldiv_have_higher_precedence_than_strconcat() {
@ -103,3 +117,219 @@ fn plusminus_have_same_precedence_as_strconcat() {
}
);
}
#[test]
fn parse_quote_delimited_string() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT Q'.abc.', \
Q'Xab'cX', \
Q'|abc'''|', \
Q'{abc}d}', \
Q'[]abc[]', \
Q'<a'bc>', \
Q'<<<a'bc>', \
Q'('abc'('abc)', \
Q'(abc'def))', \
Q'(abc'def)))' \
FROM dual";
let select = dialect.verified_only_select(sql);
assert_eq!(10, select.projection.len());
assert_eq!(
&Expr::Value(
Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X')))
.with_empty_span()
),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|')))
.with_empty_span()
),
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}')))
.with_empty_span()
),
expr_from_projection(&select.projection[3])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']')))
.with_empty_span()
),
expr_from_projection(&select.projection[4])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>')))
.with_empty_span()
),
expr_from_projection(&select.projection[5])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<<a'bc", '>')))
.with_empty_span()
),
expr_from_projection(&select.projection[6])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[7])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[8])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')')))
.with_empty_span()
),
expr_from_projection(&select.projection[9])
);
}
#[test]
fn parse_invalid_quote_delimited_strings() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
// ~ invalid quote delimiter
for q in [' ', '\t', '\r', '\n'] {
assert_eq!(
dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")),
Err(ParserError::TokenizerError(
"Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into()
)),
"with quote char {q:?}"
);
}
// ~ invalid eof after quote
assert_eq!(
dialect.parse_sql_statements("SELECT Q'"),
Err(ParserError::TokenizerError(
"Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into()
)),
"with EOF quote char"
);
// ~ unterminated string
assert_eq!(
dialect.parse_sql_statements("SELECT Q'|asdfa...."),
Err(ParserError::TokenizerError(
"Unterminated string literal at Line: 1, Column: 9".into()
)),
"with EOF quote char"
);
}
#[test]
fn parse_quote_delimited_string_lowercase() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "select q'!a'b'c!d!' from dual";
let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual");
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_quote_delimited_string_but_is_a_word() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT q, quux, q.abc FROM dual q";
let select = dialect.verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "q")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "quux")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "q"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}
#[test]
fn parse_national_quote_delimited_string() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT NQ'.abc.' FROM dual";
let select = dialect.verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.'))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_national_quote_delimited_string_lowercase() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
for prefix in ["nq", "Nq", "nQ", "NQ"] {
let select = dialect.verified_only_select_with_canonical(
&format!("select {prefix}'!a'b'c!d!' from dual"),
"SELECT NQ'!a'b'c!d!' FROM dual",
);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string(
'!', "a'b'c!d", '!'
))
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
}
#[test]
fn parse_national_quote_delimited_string_but_is_a_word() {
let dialect = all_dialects_where(|d| d.supports_quote_delimited_string());
let sql = "SELECT nq, nqoo, nq.abc FROM dual q";
let select = dialect.verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nq")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "nq"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}