Support quote delimited strings

This commit is contained in:
Petr Novotnik 2025-11-30 19:30:35 +01:00
parent cdeed32294
commit 53af27d99e
4 changed files with 279 additions and 3 deletions

View file

@ -167,6 +167,12 @@ pub enum Value {
TripleDoubleQuotedRawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
QuoteDelimitedStringLiteral(char, String, char),
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
NationalQuoteDelimitedStringLiteral(char, String, char),
/// X'hex value'
HexStringLiteral(String),
@ -205,6 +211,8 @@ impl Value {
| Value::EscapedStringLiteral(s)
| Value::UnicodeStringLiteral(s)
| Value::NationalStringLiteral(s)
| Value::QuoteDelimitedStringLiteral(_, s, _)
| Value::NationalQuoteDelimitedStringLiteral(_, s, _)
| Value::HexStringLiteral(s) => Some(s),
Value::DollarQuotedString(s) => Some(s.value),
_ => None,
@ -242,6 +250,8 @@ impl fmt::Display for Value {
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),

View file

@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
| Token::TripleSingleQuotedRawStringLiteral(_)
| Token::TripleDoubleQuotedRawStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_, _, _)
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
| Token::HexStringLiteral(_) => {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
| Token::EscapedStringLiteral(_)
| Token::UnicodeStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_, _, _)
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
_ => self.expected(
"either filler, WITH, or WITHOUT in LISTAGG",
@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
Token::NationalStringLiteral(ref s) => {
ok_value(Value::NationalStringLiteral(s.to_string()))
}
Token::QuoteDelimitedStringLiteral(q1, s, q2) => {
ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2))
}
Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => {
ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2))
}
Token::EscapedStringLiteral(ref s) => {
ok_value(Value::EscapedStringLiteral(s.to_string()))
}

View file

@ -29,10 +29,10 @@ use alloc::{
vec,
vec::Vec,
};
use core::iter::Peekable;
use core::num::NonZeroU8;
use core::str::Chars;
use core::{cmp, fmt};
use core::{iter::Peekable, str};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
use crate::dialect::Dialect;
use crate::dialect::{
BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
SnowflakeDialect,
};
use crate::dialect::{Dialect, OracleDialect};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
@ -98,6 +98,12 @@ pub enum Token {
TripleDoubleQuotedRawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
QuoteDelimitedStringLiteral(char, String, char),
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
NationalQuoteDelimitedStringLiteral(char, String, char),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
EscapedStringLiteral(String),
/// Unicode string literal: i.e: U&'first \000A second'
@ -292,6 +298,10 @@ impl fmt::Display for Token {
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => {
write!(f, "NQ'{q1}{s}{q2}'")
}
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@ -1032,6 +1042,16 @@ impl<'a> Tokenizer<'a> {
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) =>
{
chars.next(); // consume and check the next char
self.tokenize_word_or_quote_delimited_string(
chars,
&[n, q],
Token::NationalQuoteDelimitedStringLiteral,
)
.map(Some)
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word(n, chars);
@ -1039,6 +1059,15 @@ impl<'a> Tokenizer<'a> {
}
}
}
q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => {
chars.next(); // consume and check the next char
self.tokenize_word_or_quote_delimited_string(
chars,
&[q],
Token::QuoteDelimitedStringLiteral,
)
.map(Some)
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
let starting_loc = chars.location();
@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> {
)
}
/// Reads a quote delimited string without "backslash escaping" or a word
/// depending on whether `chars.next()` delivers a `'`.
///
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html>
fn tokenize_word_or_quote_delimited_string(
&self,
chars: &mut State,
// the prefix that introduced the possible literal or word,
// e.g. "Q" or "nq"
word_prefix: &[char],
// turns an identified quote string literal,
// ie. `(start-quote-char, string-literal, end-quote-char)`
// into a token
as_literal: fn(char, String, char) -> Token,
) -> Result<Token, TokenizerError> {
match chars.peek() {
Some('\'') => {
chars.next();
// ~ determine the "quote character(s)"
let error_loc = chars.location();
let (start_quote_char, end_quote_char) = match chars.next() {
// ~ "newline" is not allowed by Oracle's SQL Reference,
// but works with sql*plus nevertheless
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
return self.tokenizer_error(
error_loc,
format!(
"Invalid space, tab, newline, or EOF after '{}''.",
String::from_iter(word_prefix)
),
);
}
Some(c) => (
c,
match c {
'[' => ']',
'{' => '}',
'<' => '>',
'(' => ')',
c => c,
},
),
};
// read the string literal until the "quote character" following a by literal quote
let mut s = String::new();
while let Some(ch) = chars.next() {
if ch == end_quote_char {
if let Some('\'') = chars.peek() {
chars.next(); // ~ consume the quote
return Ok(as_literal(start_quote_char, s, end_quote_char));
}
}
s.push(ch);
}
self.tokenizer_error(error_loc, "Unterminated string literal")
}
// ~ not a literal introduced with _token_prefix_, assm
_ => {
let s = self.tokenize_word(String::from_iter(word_prefix), chars);
Ok(Token::make_word(&s, None))
}
}
}
/// Read a quoted string.
fn tokenize_quoted_string(
&self,

View file

@ -21,7 +21,7 @@
use pretty_assertions::assert_eq;
use sqlparser::{
ast::{BinaryOperator, Expr, Value, ValueWithSpan},
ast::{BinaryOperator, Expr, Ident, Value, ValueWithSpan},
dialect::OracleDialect,
tokenizer::Span,
};
@ -103,3 +103,166 @@ fn plusminus_have_same_precedence_as_strconcat() {
}
);
}
#[test]
fn parse_quote_delimited_string() {
let sql = "SELECT Q'.abc.', \
Q'Xab'cX', \
Q'|abc'''|', \
Q'{abc}d}', \
Q'[]abc[]', \
Q'<a'bc>', \
Q'<<<a'bc>', \
Q'('abc'('abc)', \
Q'(abc'def))', \
Q'(abc'def)))' \
FROM dual";
let select = oracle().verified_only_select(sql);
assert_eq!(10, select.projection.len());
assert_eq!(
&Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span()
),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span()
),
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span()
),
expr_from_projection(&select.projection[3])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span()
),
expr_from_projection(&select.projection[4])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span()
),
expr_from_projection(&select.projection[5])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('<', "<<a'bc".into(), '>')).with_empty_span()
),
expr_from_projection(&select.projection[6])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span()
),
expr_from_projection(&select.projection[7])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span()
),
expr_from_projection(&select.projection[8])
);
assert_eq!(
&Expr::Value(
(Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span()
),
expr_from_projection(&select.projection[9])
);
}
#[test]
fn parse_quote_delimited_string_lowercase() {
let sql = "select q'!a'b'c!d!' from dual";
let select = oracle().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual");
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_quote_delimited_string_but_is_a_word() {
let sql = "SELECT q, quux, q.abc FROM dual q";
let select = oracle().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "q")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "quux")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "q"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}
#[test]
fn parse_national_quote_delimited_string() {
let sql = "SELECT NQ'.abc.' FROM dual";
let select = oracle().verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
#[test]
fn parse_national_quote_delimited_string_lowercase() {
for prefix in ["nq", "Nq", "nQ", "NQ"] {
let select = oracle().verified_only_select_with_canonical(
&format!("select {prefix}'!a'b'c!d!' from dual"),
"SELECT NQ'!a'b'c!d!' FROM dual",
);
assert_eq!(1, select.projection.len());
assert_eq!(
&Expr::Value(
Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!')
.with_empty_span()
),
expr_from_projection(&select.projection[0])
);
}
}
#[test]
fn parse_national_quote_delimited_string_but_is_a_word() {
let sql = "SELECT nq, nqoo, nq.abc FROM dual q";
let select = oracle().verified_only_select(sql);
assert_eq!(3, select.projection.len());
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nq")),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::CompoundIdentifier(vec![
Ident::with_span(Span::empty(), "nq"),
Ident::with_span(Span::empty(), "abc")
]),
expr_from_projection(&select.projection[2])
);
}