diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a67..f4d05c31 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// X'hex value' HexStringLiteral(String), @@ -205,6 +211,8 @@ impl Value { | Value::EscapedStringLiteral(s) | Value::UnicodeStringLiteral(s) | Value::NationalStringLiteral(s) + | Value::QuoteDelimitedStringLiteral(_, s, _) + | Value::NationalQuoteDelimitedStringLiteral(_, s, _) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), _ => None, @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3ba4ba57..a89589f0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2)) + } + Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1..fe5002b7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SnowflakeDialect, }; +use crate::dialect::{Dialect, OracleDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; use crate::{ast::DollarQuotedString, dialect::HiveDialect}; @@ -98,6 +98,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +298,10 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => { + write!(f, "NQ'{q1}{s}{q2}'") + } Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1042,16 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) => + { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[n, q], + Token::NationalQuoteDelimitedStringLiteral, + ) + .map(Some) + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1059,15 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[q], + Token::QuoteDelimitedStringLiteral, + ) + .map(Some) + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string without "backslash escaping" or a word + /// depending on whether `chars.next()` delivers a `'`. + /// + /// See + fn tokenize_word_or_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + word_prefix: &[char], + // turns an identified quote string literal, + // ie. `(start-quote-char, string-literal, end-quote-char)` + // into a token + as_literal: fn(char, String, char) -> Token, + ) -> Result { + match chars.peek() { + Some('\'') => { + chars.next(); + // ~ determine the "quote character(s)" + let error_loc = chars.location(); + let (start_quote_char, end_quote_char) = match chars.next() { + // ~ "newline" is not allowed by Oracle's SQL Reference, + // but works with sql*plus nevertheless + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + error_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''.", + String::from_iter(word_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + // read the string literal until the "quote character" following a by literal quote + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote_char { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(as_literal(start_quote_char, s, end_quote_char)); + } + } + s.push(ch); + } + self.tokenizer_error(error_loc, "Unterminated string literal") + } + // ~ not a literal introduced with _token_prefix_, assm + _ => { + let s = self.tokenize_word(String::from_iter(word_prefix), chars); + Ok(Token::make_word(&s, None)) + } + } + } + /// Read a quoted string. fn tokenize_quoted_string( &self, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 09fd4191..6308e1b9 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,7 +21,7 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Value, ValueWithSpan}, + ast::{BinaryOperator, Expr, Ident, Value, ValueWithSpan}, dialect::OracleDialect, tokenizer::Span, }; @@ -103,3 +103,166 @@ fn plusminus_have_same_precedence_as_strconcat() { } ); } + +#[test] +fn parse_quote_delimited_string() { + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "<')).with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let sql = "select q'!a'b'c!d!' from dual"; + let select = oracle().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = oracle().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = oracle().verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!') + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = oracle().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +}