mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-12-23 11:12:51 +00:00
Support quote delimited strings
This commit is contained in:
parent
cdeed32294
commit
53af27d99e
4 changed files with 279 additions and 3 deletions
|
|
@ -167,6 +167,12 @@ pub enum Value {
|
|||
TripleDoubleQuotedRawStringLiteral(String),
|
||||
/// N'string value'
|
||||
NationalStringLiteral(String),
|
||||
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
|
||||
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
|
||||
QuoteDelimitedStringLiteral(char, String, char),
|
||||
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
|
||||
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
|
||||
NationalQuoteDelimitedStringLiteral(char, String, char),
|
||||
/// X'hex value'
|
||||
HexStringLiteral(String),
|
||||
|
||||
|
|
@ -205,6 +211,8 @@ impl Value {
|
|||
| Value::EscapedStringLiteral(s)
|
||||
| Value::UnicodeStringLiteral(s)
|
||||
| Value::NationalStringLiteral(s)
|
||||
| Value::QuoteDelimitedStringLiteral(_, s, _)
|
||||
| Value::NationalQuoteDelimitedStringLiteral(_, s, _)
|
||||
| Value::HexStringLiteral(s) => Some(s),
|
||||
Value::DollarQuotedString(s) => Some(s.value),
|
||||
_ => None,
|
||||
|
|
@ -242,6 +250,8 @@ impl fmt::Display for Value {
|
|||
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
||||
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
|
||||
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
|
||||
Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
|
||||
Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"),
|
||||
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
|
||||
Value::Boolean(v) => write!(f, "{v}"),
|
||||
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
|
||||
|
|
|
|||
|
|
@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
|
|||
| Token::TripleSingleQuotedRawStringLiteral(_)
|
||||
| Token::TripleDoubleQuotedRawStringLiteral(_)
|
||||
| Token::NationalStringLiteral(_)
|
||||
| Token::QuoteDelimitedStringLiteral(_, _, _)
|
||||
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
|
||||
| Token::HexStringLiteral(_) => {
|
||||
self.prev_token();
|
||||
Ok(Expr::Value(self.parse_value()?))
|
||||
|
|
@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
|
|||
| Token::EscapedStringLiteral(_)
|
||||
| Token::UnicodeStringLiteral(_)
|
||||
| Token::NationalStringLiteral(_)
|
||||
| Token::QuoteDelimitedStringLiteral(_, _, _)
|
||||
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
|
||||
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
|
||||
_ => self.expected(
|
||||
"either filler, WITH, or WITHOUT in LISTAGG",
|
||||
|
|
@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
|
|||
Token::NationalStringLiteral(ref s) => {
|
||||
ok_value(Value::NationalStringLiteral(s.to_string()))
|
||||
}
|
||||
Token::QuoteDelimitedStringLiteral(q1, s, q2) => {
|
||||
ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2))
|
||||
}
|
||||
Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => {
|
||||
ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2))
|
||||
}
|
||||
Token::EscapedStringLiteral(ref s) => {
|
||||
ok_value(Value::EscapedStringLiteral(s.to_string()))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ use alloc::{
|
|||
vec,
|
||||
vec::Vec,
|
||||
};
|
||||
use core::iter::Peekable;
|
||||
use core::num::NonZeroU8;
|
||||
use core::str::Chars;
|
||||
use core::{cmp, fmt};
|
||||
use core::{iter::Peekable, str};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize};
|
|||
#[cfg(feature = "visitor")]
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::dialect::Dialect;
|
||||
use crate::dialect::{
|
||||
BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
|
||||
SnowflakeDialect,
|
||||
};
|
||||
use crate::dialect::{Dialect, OracleDialect};
|
||||
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
|
||||
|
||||
|
|
@ -98,6 +98,12 @@ pub enum Token {
|
|||
TripleDoubleQuotedRawStringLiteral(String),
|
||||
/// "National" string literal: i.e: N'string'
|
||||
NationalStringLiteral(String),
|
||||
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
|
||||
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
|
||||
QuoteDelimitedStringLiteral(char, String, char),
|
||||
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
|
||||
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
|
||||
NationalQuoteDelimitedStringLiteral(char, String, char),
|
||||
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
||||
EscapedStringLiteral(String),
|
||||
/// Unicode string literal: i.e: U&'first \000A second'
|
||||
|
|
@ -292,6 +298,10 @@ impl fmt::Display for Token {
|
|||
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
|
||||
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
|
||||
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
|
||||
Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
|
||||
Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => {
|
||||
write!(f, "NQ'{q1}{s}{q2}'")
|
||||
}
|
||||
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
|
||||
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
|
||||
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
|
||||
|
|
@ -1032,6 +1042,16 @@ impl<'a> Tokenizer<'a> {
|
|||
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
|
||||
Ok(Some(Token::NationalStringLiteral(s)))
|
||||
}
|
||||
Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) =>
|
||||
{
|
||||
chars.next(); // consume and check the next char
|
||||
self.tokenize_word_or_quote_delimited_string(
|
||||
chars,
|
||||
&[n, q],
|
||||
Token::NationalQuoteDelimitedStringLiteral,
|
||||
)
|
||||
.map(Some)
|
||||
}
|
||||
_ => {
|
||||
// regular identifier starting with an "N"
|
||||
let s = self.tokenize_word(n, chars);
|
||||
|
|
@ -1039,6 +1059,15 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => {
|
||||
chars.next(); // consume and check the next char
|
||||
self.tokenize_word_or_quote_delimited_string(
|
||||
chars,
|
||||
&[q],
|
||||
Token::QuoteDelimitedStringLiteral,
|
||||
)
|
||||
.map(Some)
|
||||
}
|
||||
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
|
||||
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
|
||||
let starting_loc = chars.location();
|
||||
|
|
@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> {
|
|||
)
|
||||
}
|
||||
|
||||
/// Reads a quote delimited string without "backslash escaping" or a word
|
||||
/// depending on whether `chars.next()` delivers a `'`.
|
||||
///
|
||||
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html>
|
||||
fn tokenize_word_or_quote_delimited_string(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
// the prefix that introduced the possible literal or word,
|
||||
// e.g. "Q" or "nq"
|
||||
word_prefix: &[char],
|
||||
// turns an identified quote string literal,
|
||||
// ie. `(start-quote-char, string-literal, end-quote-char)`
|
||||
// into a token
|
||||
as_literal: fn(char, String, char) -> Token,
|
||||
) -> Result<Token, TokenizerError> {
|
||||
match chars.peek() {
|
||||
Some('\'') => {
|
||||
chars.next();
|
||||
// ~ determine the "quote character(s)"
|
||||
let error_loc = chars.location();
|
||||
let (start_quote_char, end_quote_char) = match chars.next() {
|
||||
// ~ "newline" is not allowed by Oracle's SQL Reference,
|
||||
// but works with sql*plus nevertheless
|
||||
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
|
||||
return self.tokenizer_error(
|
||||
error_loc,
|
||||
format!(
|
||||
"Invalid space, tab, newline, or EOF after '{}''.",
|
||||
String::from_iter(word_prefix)
|
||||
),
|
||||
);
|
||||
}
|
||||
Some(c) => (
|
||||
c,
|
||||
match c {
|
||||
'[' => ']',
|
||||
'{' => '}',
|
||||
'<' => '>',
|
||||
'(' => ')',
|
||||
c => c,
|
||||
},
|
||||
),
|
||||
};
|
||||
// read the string literal until the "quote character" following a by literal quote
|
||||
let mut s = String::new();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == end_quote_char {
|
||||
if let Some('\'') = chars.peek() {
|
||||
chars.next(); // ~ consume the quote
|
||||
return Ok(as_literal(start_quote_char, s, end_quote_char));
|
||||
}
|
||||
}
|
||||
s.push(ch);
|
||||
}
|
||||
self.tokenizer_error(error_loc, "Unterminated string literal")
|
||||
}
|
||||
// ~ not a literal introduced with _token_prefix_, assm
|
||||
_ => {
|
||||
let s = self.tokenize_word(String::from_iter(word_prefix), chars);
|
||||
Ok(Token::make_word(&s, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a quoted string.
|
||||
fn tokenize_quoted_string(
|
||||
&self,
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
use pretty_assertions::assert_eq;
|
||||
|
||||
use sqlparser::{
|
||||
ast::{BinaryOperator, Expr, Value, ValueWithSpan},
|
||||
ast::{BinaryOperator, Expr, Ident, Value, ValueWithSpan},
|
||||
dialect::OracleDialect,
|
||||
tokenizer::Span,
|
||||
};
|
||||
|
|
@ -103,3 +103,166 @@ fn plusminus_have_same_precedence_as_strconcat() {
|
|||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_quote_delimited_string() {
|
||||
let sql = "SELECT Q'.abc.', \
|
||||
Q'Xab'cX', \
|
||||
Q'|abc'''|', \
|
||||
Q'{abc}d}', \
|
||||
Q'[]abc[]', \
|
||||
Q'<a'bc>', \
|
||||
Q'<<<a'bc>', \
|
||||
Q'('abc'('abc)', \
|
||||
Q'(abc'def))', \
|
||||
Q'(abc'def)))' \
|
||||
FROM dual";
|
||||
let select = oracle().verified_only_select(sql);
|
||||
assert_eq!(10, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[1])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[2])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[3])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[4])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[5])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('<', "<<a'bc".into(), '>')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[6])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[7])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[8])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
(Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[9])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_quote_delimited_string_lowercase() {
|
||||
let sql = "select q'!a'b'c!d!' from dual";
|
||||
let select = oracle().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual");
|
||||
assert_eq!(1, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_quote_delimited_string_but_is_a_word() {
|
||||
let sql = "SELECT q, quux, q.abc FROM dual q";
|
||||
let select = oracle().verified_only_select(sql);
|
||||
assert_eq!(3, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Identifier(Ident::with_span(Span::empty(), "q")),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Identifier(Ident::with_span(Span::empty(), "quux")),
|
||||
expr_from_projection(&select.projection[1])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::CompoundIdentifier(vec![
|
||||
Ident::with_span(Span::empty(), "q"),
|
||||
Ident::with_span(Span::empty(), "abc")
|
||||
]),
|
||||
expr_from_projection(&select.projection[2])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_national_quote_delimited_string() {
|
||||
let sql = "SELECT NQ'.abc.' FROM dual";
|
||||
let select = oracle().verified_only_select(sql);
|
||||
assert_eq!(1, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_national_quote_delimited_string_lowercase() {
|
||||
for prefix in ["nq", "Nq", "nQ", "NQ"] {
|
||||
let select = oracle().verified_only_select_with_canonical(
|
||||
&format!("select {prefix}'!a'b'c!d!' from dual"),
|
||||
"SELECT NQ'!a'b'c!d!' FROM dual",
|
||||
);
|
||||
assert_eq!(1, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Value(
|
||||
Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!')
|
||||
.with_empty_span()
|
||||
),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_national_quote_delimited_string_but_is_a_word() {
|
||||
let sql = "SELECT nq, nqoo, nq.abc FROM dual q";
|
||||
let select = oracle().verified_only_select(sql);
|
||||
assert_eq!(3, select.projection.len());
|
||||
assert_eq!(
|
||||
&Expr::Identifier(Ident::with_span(Span::empty(), "nq")),
|
||||
expr_from_projection(&select.projection[0])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")),
|
||||
expr_from_projection(&select.projection[1])
|
||||
);
|
||||
assert_eq!(
|
||||
&Expr::CompoundIdentifier(vec![
|
||||
Ident::with_span(Span::empty(), "nq"),
|
||||
Ident::with_span(Span::empty(), "abc")
|
||||
]),
|
||||
expr_from_projection(&select.projection[2])
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue