mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-09 21:42:05 +00:00
Support triple quoted strings (#1262)
This commit is contained in:
parent
e3692f4681
commit
036a4120b4
6 changed files with 592 additions and 62 deletions
|
@ -42,6 +42,12 @@ pub enum Value {
|
||||||
SingleQuotedString(String),
|
SingleQuotedString(String),
|
||||||
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
|
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
|
||||||
DollarQuotedString(DollarQuotedString),
|
DollarQuotedString(DollarQuotedString),
|
||||||
|
/// Triple single quoted strings: Example '''abc'''
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleSingleQuotedString(String),
|
||||||
|
/// Triple double quoted strings: Example """abc"""
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedString(String),
|
||||||
/// e'string value' (postgres extension)
|
/// e'string value' (postgres extension)
|
||||||
/// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS)
|
/// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS)
|
||||||
/// for more details.
|
/// for more details.
|
||||||
|
@ -50,9 +56,24 @@ pub enum Value {
|
||||||
SingleQuotedByteStringLiteral(String),
|
SingleQuotedByteStringLiteral(String),
|
||||||
/// B"string value"
|
/// B"string value"
|
||||||
DoubleQuotedByteStringLiteral(String),
|
DoubleQuotedByteStringLiteral(String),
|
||||||
/// R'string value' or r'string value' or r"string value"
|
/// Triple single quoted literal with byte string prefix. Example `B'''abc'''`
|
||||||
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals>
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
RawStringLiteral(String),
|
TripleSingleQuotedByteStringLiteral(String),
|
||||||
|
/// Triple double quoted literal with byte string prefix. Example `B"""abc"""`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedByteStringLiteral(String),
|
||||||
|
/// Single quoted literal with raw string prefix. Example `R'abc'`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
SingleQuotedRawStringLiteral(String),
|
||||||
|
/// Double quoted literal with raw string prefix. Example `R"abc"`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
DoubleQuotedRawStringLiteral(String),
|
||||||
|
/// Triple single quoted literal with raw string prefix. Example `R'''abc'''`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleSingleQuotedRawStringLiteral(String),
|
||||||
|
/// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedRawStringLiteral(String),
|
||||||
/// N'string value'
|
/// N'string value'
|
||||||
NationalStringLiteral(String),
|
NationalStringLiteral(String),
|
||||||
/// X'hex value'
|
/// X'hex value'
|
||||||
|
@ -73,6 +94,12 @@ impl fmt::Display for Value {
|
||||||
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
||||||
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
|
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
|
||||||
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
||||||
|
Value::TripleSingleQuotedString(v) => {
|
||||||
|
write!(f, "'''{v}'''")
|
||||||
|
}
|
||||||
|
Value::TripleDoubleQuotedString(v) => {
|
||||||
|
write!(f, r#""""{v}""""#)
|
||||||
|
}
|
||||||
Value::DollarQuotedString(v) => write!(f, "{v}"),
|
Value::DollarQuotedString(v) => write!(f, "{v}"),
|
||||||
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
||||||
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
|
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
|
||||||
|
@ -80,7 +107,12 @@ impl fmt::Display for Value {
|
||||||
Value::Boolean(v) => write!(f, "{v}"),
|
Value::Boolean(v) => write!(f, "{v}"),
|
||||||
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
|
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
|
||||||
Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""),
|
Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""),
|
||||||
Value::RawStringLiteral(v) => write!(f, "R'{v}'"),
|
Value::TripleSingleQuotedByteStringLiteral(v) => write!(f, "B'''{v}'''"),
|
||||||
|
Value::TripleDoubleQuotedByteStringLiteral(v) => write!(f, r#"B"""{v}""""#),
|
||||||
|
Value::SingleQuotedRawStringLiteral(v) => write!(f, "R'{v}'"),
|
||||||
|
Value::DoubleQuotedRawStringLiteral(v) => write!(f, "R\"{v}\""),
|
||||||
|
Value::TripleSingleQuotedRawStringLiteral(v) => write!(f, "R'''{v}'''"),
|
||||||
|
Value::TripleDoubleQuotedRawStringLiteral(v) => write!(f, r#"R"""{v}""""#),
|
||||||
Value::Null => write!(f, "NULL"),
|
Value::Null => write!(f, "NULL"),
|
||||||
Value::Placeholder(v) => write!(f, "{v}"),
|
Value::Placeholder(v) => write!(f, "{v}"),
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,11 @@ impl Dialect for BigQueryDialect {
|
||||||
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
|
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
fn supports_triple_quoted_string(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
|
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
|
||||||
fn supports_window_function_null_treatment_arg(&self) -> bool {
|
fn supports_window_function_null_treatment_arg(&self) -> bool {
|
||||||
true
|
true
|
||||||
|
|
|
@ -231,6 +231,11 @@ pub trait Dialect: Debug + Any {
|
||||||
fn convert_type_before_value(&self) -> bool {
|
fn convert_type_before_value(&self) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
/// Returns true if the dialect supports triple quoted string
|
||||||
|
/// e.g. `"""abc"""`
|
||||||
|
fn supports_triple_quoted_string(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
/// Dialect-specific prefix parser override
|
/// Dialect-specific prefix parser override
|
||||||
fn parse_prefix(&self, _parser: &mut Parser) -> Option<Result<Expr, ParserError>> {
|
fn parse_prefix(&self, _parser: &mut Parser) -> Option<Result<Expr, ParserError>> {
|
||||||
// return None to fall back to the default behavior
|
// return None to fall back to the default behavior
|
||||||
|
|
|
@ -1187,10 +1187,17 @@ impl<'a> Parser<'a> {
|
||||||
Token::Number(_, _)
|
Token::Number(_, _)
|
||||||
| Token::SingleQuotedString(_)
|
| Token::SingleQuotedString(_)
|
||||||
| Token::DoubleQuotedString(_)
|
| Token::DoubleQuotedString(_)
|
||||||
|
| Token::TripleSingleQuotedString(_)
|
||||||
|
| Token::TripleDoubleQuotedString(_)
|
||||||
| Token::DollarQuotedString(_)
|
| Token::DollarQuotedString(_)
|
||||||
| Token::SingleQuotedByteStringLiteral(_)
|
| Token::SingleQuotedByteStringLiteral(_)
|
||||||
| Token::DoubleQuotedByteStringLiteral(_)
|
| Token::DoubleQuotedByteStringLiteral(_)
|
||||||
| Token::RawStringLiteral(_)
|
| Token::TripleSingleQuotedByteStringLiteral(_)
|
||||||
|
| Token::TripleDoubleQuotedByteStringLiteral(_)
|
||||||
|
| Token::SingleQuotedRawStringLiteral(_)
|
||||||
|
| Token::DoubleQuotedRawStringLiteral(_)
|
||||||
|
| Token::TripleSingleQuotedRawStringLiteral(_)
|
||||||
|
| Token::TripleDoubleQuotedRawStringLiteral(_)
|
||||||
| Token::NationalStringLiteral(_)
|
| Token::NationalStringLiteral(_)
|
||||||
| Token::HexStringLiteral(_) => {
|
| Token::HexStringLiteral(_) => {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
|
@ -6425,6 +6432,12 @@ impl<'a> Parser<'a> {
|
||||||
},
|
},
|
||||||
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
||||||
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
||||||
|
Token::TripleSingleQuotedString(ref s) => {
|
||||||
|
Ok(Value::TripleSingleQuotedString(s.to_string()))
|
||||||
|
}
|
||||||
|
Token::TripleDoubleQuotedString(ref s) => {
|
||||||
|
Ok(Value::TripleDoubleQuotedString(s.to_string()))
|
||||||
|
}
|
||||||
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
|
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
|
||||||
Token::SingleQuotedByteStringLiteral(ref s) => {
|
Token::SingleQuotedByteStringLiteral(ref s) => {
|
||||||
Ok(Value::SingleQuotedByteStringLiteral(s.clone()))
|
Ok(Value::SingleQuotedByteStringLiteral(s.clone()))
|
||||||
|
@ -6432,7 +6445,24 @@ impl<'a> Parser<'a> {
|
||||||
Token::DoubleQuotedByteStringLiteral(ref s) => {
|
Token::DoubleQuotedByteStringLiteral(ref s) => {
|
||||||
Ok(Value::DoubleQuotedByteStringLiteral(s.clone()))
|
Ok(Value::DoubleQuotedByteStringLiteral(s.clone()))
|
||||||
}
|
}
|
||||||
Token::RawStringLiteral(ref s) => Ok(Value::RawStringLiteral(s.clone())),
|
Token::TripleSingleQuotedByteStringLiteral(ref s) => {
|
||||||
|
Ok(Value::TripleSingleQuotedByteStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
|
Token::TripleDoubleQuotedByteStringLiteral(ref s) => {
|
||||||
|
Ok(Value::TripleDoubleQuotedByteStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
|
Token::SingleQuotedRawStringLiteral(ref s) => {
|
||||||
|
Ok(Value::SingleQuotedRawStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
|
Token::DoubleQuotedRawStringLiteral(ref s) => {
|
||||||
|
Ok(Value::DoubleQuotedRawStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
|
Token::TripleSingleQuotedRawStringLiteral(ref s) => {
|
||||||
|
Ok(Value::TripleSingleQuotedRawStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
|
Token::TripleDoubleQuotedRawStringLiteral(ref s) => {
|
||||||
|
Ok(Value::TripleDoubleQuotedRawStringLiteral(s.clone()))
|
||||||
|
}
|
||||||
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
|
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
|
||||||
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
|
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
|
||||||
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
||||||
|
|
394
src/tokenizer.rs
394
src/tokenizer.rs
|
@ -26,6 +26,7 @@ use alloc::{
|
||||||
};
|
};
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
use core::iter::Peekable;
|
use core::iter::Peekable;
|
||||||
|
use core::num::NonZeroU8;
|
||||||
use core::str::Chars;
|
use core::str::Chars;
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
|
@ -58,6 +59,12 @@ pub enum Token {
|
||||||
SingleQuotedString(String),
|
SingleQuotedString(String),
|
||||||
/// Double quoted string: i.e: "string"
|
/// Double quoted string: i.e: "string"
|
||||||
DoubleQuotedString(String),
|
DoubleQuotedString(String),
|
||||||
|
/// Triple single quoted strings: Example '''abc'''
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleSingleQuotedString(String),
|
||||||
|
/// Triple double quoted strings: Example """abc"""
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedString(String),
|
||||||
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
|
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
|
||||||
DollarQuotedString(DollarQuotedString),
|
DollarQuotedString(DollarQuotedString),
|
||||||
/// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as
|
/// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as
|
||||||
|
@ -65,8 +72,24 @@ pub enum Token {
|
||||||
SingleQuotedByteStringLiteral(String),
|
SingleQuotedByteStringLiteral(String),
|
||||||
/// Byte string literal: i.e: b"string" or B"string"
|
/// Byte string literal: i.e: b"string" or B"string"
|
||||||
DoubleQuotedByteStringLiteral(String),
|
DoubleQuotedByteStringLiteral(String),
|
||||||
/// Raw string literal: i.e: r'string' or R'string' or r"string" or R"string"
|
/// Triple single quoted literal with byte string prefix. Example `B'''abc'''`
|
||||||
RawStringLiteral(String),
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleSingleQuotedByteStringLiteral(String),
|
||||||
|
/// Triple double quoted literal with byte string prefix. Example `B"""abc"""`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedByteStringLiteral(String),
|
||||||
|
/// Single quoted literal with raw string prefix. Example `R'abc'`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
SingleQuotedRawStringLiteral(String),
|
||||||
|
/// Double quoted literal with raw string prefix. Example `R"abc"`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
DoubleQuotedRawStringLiteral(String),
|
||||||
|
/// Triple single quoted literal with raw string prefix. Example `R'''abc'''`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleSingleQuotedRawStringLiteral(String),
|
||||||
|
/// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
|
||||||
|
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
|
||||||
|
TripleDoubleQuotedRawStringLiteral(String),
|
||||||
/// "National" string literal: i.e: N'string'
|
/// "National" string literal: i.e: N'string'
|
||||||
NationalStringLiteral(String),
|
NationalStringLiteral(String),
|
||||||
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
||||||
|
@ -218,14 +241,21 @@ impl fmt::Display for Token {
|
||||||
Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }),
|
Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }),
|
||||||
Token::Char(ref c) => write!(f, "{c}"),
|
Token::Char(ref c) => write!(f, "{c}"),
|
||||||
Token::SingleQuotedString(ref s) => write!(f, "'{s}'"),
|
Token::SingleQuotedString(ref s) => write!(f, "'{s}'"),
|
||||||
|
Token::TripleSingleQuotedString(ref s) => write!(f, "'''{s}'''"),
|
||||||
Token::DoubleQuotedString(ref s) => write!(f, "\"{s}\""),
|
Token::DoubleQuotedString(ref s) => write!(f, "\"{s}\""),
|
||||||
|
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
|
||||||
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
|
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
|
||||||
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
|
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
|
||||||
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
|
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
|
||||||
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
|
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
|
||||||
Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
|
Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
|
||||||
|
Token::TripleSingleQuotedByteStringLiteral(ref s) => write!(f, "B'''{s}'''"),
|
||||||
Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
|
Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
|
||||||
Token::RawStringLiteral(ref s) => write!(f, "R'{s}'"),
|
Token::TripleDoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"\"\"{s}\"\"\""),
|
||||||
|
Token::SingleQuotedRawStringLiteral(ref s) => write!(f, "R'{s}'"),
|
||||||
|
Token::DoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"{s}\""),
|
||||||
|
Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
|
||||||
|
Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
|
||||||
Token::Comma => f.write_str(","),
|
Token::Comma => f.write_str(","),
|
||||||
Token::Whitespace(ws) => write!(f, "{ws}"),
|
Token::Whitespace(ws) => write!(f, "{ws}"),
|
||||||
Token::DoubleEq => f.write_str("=="),
|
Token::DoubleEq => f.write_str("=="),
|
||||||
|
@ -490,6 +520,32 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents how many quote characters enclose a string literal.
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
enum NumStringQuoteChars {
|
||||||
|
/// e.g. `"abc"`, `'abc'`, `r'abc'`
|
||||||
|
One,
|
||||||
|
/// e.g. `"""abc"""`, `'''abc'''`, `r'''abc'''`
|
||||||
|
Many(NonZeroU8),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Settings for tokenizing a quoted string literal.
|
||||||
|
struct TokenizeQuotedStringSettings {
|
||||||
|
/// The character used to quote the string.
|
||||||
|
quote_style: char,
|
||||||
|
/// Represents how many quotes characters enclose the string literal.
|
||||||
|
num_quote_chars: NumStringQuoteChars,
|
||||||
|
/// The number of opening quotes left to consume, before parsing
|
||||||
|
/// the remaining string literal.
|
||||||
|
/// For example: given initial string `"""abc"""`. If the caller has
|
||||||
|
/// already parsed the first quote for some reason, then this value
|
||||||
|
/// is set to 1, flagging to look to consume only 2 leading quotes.
|
||||||
|
num_opening_quotes_to_consume: u8,
|
||||||
|
/// True if the string uses backslash escaping of special characters
|
||||||
|
/// e.g `'abc\ndef\'ghi'
|
||||||
|
backslash_escape: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// SQL Tokenizer
|
/// SQL Tokenizer
|
||||||
pub struct Tokenizer<'a> {
|
pub struct Tokenizer<'a> {
|
||||||
dialect: &'a dyn Dialect,
|
dialect: &'a dyn Dialect,
|
||||||
|
@ -639,11 +695,31 @@ impl<'a> Tokenizer<'a> {
|
||||||
chars.next(); // consume
|
chars.next(); // consume
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some('\'') => {
|
Some('\'') => {
|
||||||
let s = self.tokenize_quoted_string(chars, '\'', false)?;
|
if self.dialect.supports_triple_quoted_string() {
|
||||||
|
return self
|
||||||
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
|
chars,
|
||||||
|
'\'',
|
||||||
|
false,
|
||||||
|
Token::SingleQuotedByteStringLiteral,
|
||||||
|
Token::TripleSingleQuotedByteStringLiteral,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
|
||||||
Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
|
Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
|
||||||
}
|
}
|
||||||
Some('\"') => {
|
Some('\"') => {
|
||||||
let s = self.tokenize_quoted_string(chars, '\"', false)?;
|
if self.dialect.supports_triple_quoted_string() {
|
||||||
|
return self
|
||||||
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
|
chars,
|
||||||
|
'"',
|
||||||
|
false,
|
||||||
|
Token::DoubleQuotedByteStringLiteral,
|
||||||
|
Token::TripleDoubleQuotedByteStringLiteral,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
|
||||||
Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
|
Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
|
@ -657,14 +733,22 @@ impl<'a> Tokenizer<'a> {
|
||||||
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
|
||||||
chars.next(); // consume
|
chars.next(); // consume
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some('\'') => {
|
Some('\'') => self
|
||||||
let s = self.tokenize_quoted_string(chars, '\'', false)?;
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
Ok(Some(Token::RawStringLiteral(s)))
|
chars,
|
||||||
}
|
'\'',
|
||||||
Some('\"') => {
|
false,
|
||||||
let s = self.tokenize_quoted_string(chars, '\"', false)?;
|
Token::SingleQuotedRawStringLiteral,
|
||||||
Ok(Some(Token::RawStringLiteral(s)))
|
Token::TripleSingleQuotedRawStringLiteral,
|
||||||
}
|
),
|
||||||
|
Some('\"') => self
|
||||||
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
|
chars,
|
||||||
|
'"',
|
||||||
|
false,
|
||||||
|
Token::DoubleQuotedRawStringLiteral,
|
||||||
|
Token::TripleDoubleQuotedRawStringLiteral,
|
||||||
|
),
|
||||||
_ => {
|
_ => {
|
||||||
// regular identifier starting with an "r" or "R"
|
// regular identifier starting with an "r" or "R"
|
||||||
let s = self.tokenize_word(b, chars);
|
let s = self.tokenize_word(b, chars);
|
||||||
|
@ -678,7 +762,7 @@ impl<'a> Tokenizer<'a> {
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some('\'') => {
|
Some('\'') => {
|
||||||
// N'...' - a <national character string literal>
|
// N'...' - a <national character string literal>
|
||||||
let s = self.tokenize_quoted_string(chars, '\'', true)?;
|
let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
|
||||||
Ok(Some(Token::NationalStringLiteral(s)))
|
Ok(Some(Token::NationalStringLiteral(s)))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
|
@ -712,7 +796,7 @@ impl<'a> Tokenizer<'a> {
|
||||||
match chars.peek() {
|
match chars.peek() {
|
||||||
Some('\'') => {
|
Some('\'') => {
|
||||||
// X'...' - a <binary string literal>
|
// X'...' - a <binary string literal>
|
||||||
let s = self.tokenize_quoted_string(chars, '\'', true)?;
|
let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
|
||||||
Ok(Some(Token::HexStringLiteral(s)))
|
Ok(Some(Token::HexStringLiteral(s)))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
|
@ -724,7 +808,17 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
// single quoted string
|
// single quoted string
|
||||||
'\'' => {
|
'\'' => {
|
||||||
let s = self.tokenize_quoted_string(
|
if self.dialect.supports_triple_quoted_string() {
|
||||||
|
return self
|
||||||
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
|
chars,
|
||||||
|
'\'',
|
||||||
|
self.dialect.supports_string_literal_backslash_escape(),
|
||||||
|
Token::SingleQuotedString,
|
||||||
|
Token::TripleSingleQuotedString,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let s = self.tokenize_single_quoted_string(
|
||||||
chars,
|
chars,
|
||||||
'\'',
|
'\'',
|
||||||
self.dialect.supports_string_literal_backslash_escape(),
|
self.dialect.supports_string_literal_backslash_escape(),
|
||||||
|
@ -736,7 +830,17 @@ impl<'a> Tokenizer<'a> {
|
||||||
'\"' if !self.dialect.is_delimited_identifier_start(ch)
|
'\"' if !self.dialect.is_delimited_identifier_start(ch)
|
||||||
&& !self.dialect.is_identifier_start(ch) =>
|
&& !self.dialect.is_identifier_start(ch) =>
|
||||||
{
|
{
|
||||||
let s = self.tokenize_quoted_string(
|
if self.dialect.supports_triple_quoted_string() {
|
||||||
|
return self
|
||||||
|
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||||
|
chars,
|
||||||
|
'"',
|
||||||
|
self.dialect.supports_string_literal_backslash_escape(),
|
||||||
|
Token::DoubleQuotedString,
|
||||||
|
Token::TripleDoubleQuotedString,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let s = self.tokenize_single_quoted_string(
|
||||||
chars,
|
chars,
|
||||||
'"',
|
'"',
|
||||||
self.dialect.supports_string_literal_backslash_escape(),
|
self.dialect.supports_string_literal_backslash_escape(),
|
||||||
|
@ -1246,23 +1350,128 @@ impl<'a> Tokenizer<'a> {
|
||||||
self.tokenizer_error(starting_loc, "Unterminated encoded string literal")
|
self.tokenizer_error(starting_loc, "Unterminated encoded string literal")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a single quoted string, starting with the opening quote.
|
/// Reads a string literal quoted by a single or triple quote characters.
|
||||||
fn tokenize_quoted_string(
|
/// Examples: `'abc'`, `'''abc'''`, `"""abc"""`.
|
||||||
|
fn tokenize_single_or_triple_quoted_string<F>(
|
||||||
&self,
|
&self,
|
||||||
chars: &mut State,
|
chars: &mut State,
|
||||||
quote_style: char,
|
quote_style: char,
|
||||||
allow_escape: bool,
|
backslash_escape: bool,
|
||||||
|
single_quote_token: F,
|
||||||
|
triple_quote_token: F,
|
||||||
|
) -> Result<Option<Token>, TokenizerError>
|
||||||
|
where
|
||||||
|
F: Fn(String) -> Token,
|
||||||
|
{
|
||||||
|
let error_loc = chars.location();
|
||||||
|
|
||||||
|
let mut num_opening_quotes = 0u8;
|
||||||
|
for _ in 0..3 {
|
||||||
|
if Some("e_style) == chars.peek() {
|
||||||
|
chars.next(); // Consume quote.
|
||||||
|
num_opening_quotes += 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (token_fn, num_quote_chars) = match num_opening_quotes {
|
||||||
|
1 => (single_quote_token, NumStringQuoteChars::One),
|
||||||
|
2 => {
|
||||||
|
// If we matched double quotes, then this is an empty string.
|
||||||
|
return Ok(Some(single_quote_token("".into())));
|
||||||
|
}
|
||||||
|
3 => {
|
||||||
|
let Some(num_quote_chars) = NonZeroU8::new(3) else {
|
||||||
|
return self.tokenizer_error(error_loc, "invalid number of opening quotes");
|
||||||
|
};
|
||||||
|
(
|
||||||
|
triple_quote_token,
|
||||||
|
NumStringQuoteChars::Many(num_quote_chars),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return self.tokenizer_error(error_loc, "invalid string literal opening");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let settings = TokenizeQuotedStringSettings {
|
||||||
|
quote_style,
|
||||||
|
num_quote_chars,
|
||||||
|
num_opening_quotes_to_consume: 0,
|
||||||
|
backslash_escape,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.tokenize_quoted_string(chars, settings)
|
||||||
|
.map(token_fn)
|
||||||
|
.map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads a string literal quoted by a single quote character.
|
||||||
|
fn tokenize_single_quoted_string(
|
||||||
|
&self,
|
||||||
|
chars: &mut State,
|
||||||
|
quote_style: char,
|
||||||
|
backslash_escape: bool,
|
||||||
|
) -> Result<String, TokenizerError> {
|
||||||
|
self.tokenize_quoted_string(
|
||||||
|
chars,
|
||||||
|
TokenizeQuotedStringSettings {
|
||||||
|
quote_style,
|
||||||
|
num_quote_chars: NumStringQuoteChars::One,
|
||||||
|
num_opening_quotes_to_consume: 1,
|
||||||
|
backslash_escape,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a quoted string.
|
||||||
|
fn tokenize_quoted_string(
|
||||||
|
&self,
|
||||||
|
chars: &mut State,
|
||||||
|
settings: TokenizeQuotedStringSettings,
|
||||||
) -> Result<String, TokenizerError> {
|
) -> Result<String, TokenizerError> {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
let error_loc = chars.location();
|
let error_loc = chars.location();
|
||||||
|
|
||||||
chars.next(); // consume the opening quote
|
// Consume any opening quotes.
|
||||||
|
for _ in 0..settings.num_opening_quotes_to_consume {
|
||||||
|
if Some(settings.quote_style) != chars.next() {
|
||||||
|
return self.tokenizer_error(error_loc, "invalid string literal opening");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut num_consecutive_quotes = 0;
|
||||||
while let Some(&ch) = chars.peek() {
|
while let Some(&ch) = chars.peek() {
|
||||||
|
let pending_final_quote = match settings.num_quote_chars {
|
||||||
|
NumStringQuoteChars::One => Some(NumStringQuoteChars::One),
|
||||||
|
n @ NumStringQuoteChars::Many(count)
|
||||||
|
if num_consecutive_quotes + 1 == count.get() =>
|
||||||
|
{
|
||||||
|
Some(n)
|
||||||
|
}
|
||||||
|
NumStringQuoteChars::Many(_) => None,
|
||||||
|
};
|
||||||
|
|
||||||
match ch {
|
match ch {
|
||||||
char if char == quote_style => {
|
char if char == settings.quote_style && pending_final_quote.is_some() => {
|
||||||
chars.next(); // consume
|
chars.next(); // consume
|
||||||
if chars.peek().map(|c| *c == quote_style).unwrap_or(false) {
|
|
||||||
|
if let Some(NumStringQuoteChars::Many(count)) = pending_final_quote {
|
||||||
|
// For an initial string like `"""abc"""`, at this point we have
|
||||||
|
// `abc""` in the buffer and have now matched the final `"`.
|
||||||
|
// However, the string to return is simply `abc`, so we strip off
|
||||||
|
// the trailing quotes before returning.
|
||||||
|
let mut buf = s.chars();
|
||||||
|
for _ in 1..count.get() {
|
||||||
|
buf.next_back();
|
||||||
|
}
|
||||||
|
return Ok(buf.as_str().to_string());
|
||||||
|
} else if chars
|
||||||
|
.peek()
|
||||||
|
.map(|c| *c == settings.quote_style)
|
||||||
|
.unwrap_or(false)
|
||||||
|
{
|
||||||
s.push(ch);
|
s.push(ch);
|
||||||
if !self.unescape {
|
if !self.unescape {
|
||||||
// In no-escape mode, the given query has to be saved completely
|
// In no-escape mode, the given query has to be saved completely
|
||||||
|
@ -1273,10 +1482,12 @@ impl<'a> Tokenizer<'a> {
|
||||||
return Ok(s);
|
return Ok(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'\\' if allow_escape => {
|
'\\' if settings.backslash_escape => {
|
||||||
// consume backslash
|
// consume backslash
|
||||||
chars.next();
|
chars.next();
|
||||||
|
|
||||||
|
num_consecutive_quotes = 0;
|
||||||
|
|
||||||
if let Some(next) = chars.peek() {
|
if let Some(next) = chars.peek() {
|
||||||
if !self.unescape {
|
if !self.unescape {
|
||||||
// In no-escape mode, the given query has to be saved completely including backslashes.
|
// In no-escape mode, the given query has to be saved completely including backslashes.
|
||||||
|
@ -1300,8 +1511,15 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
ch => {
|
||||||
chars.next(); // consume
|
chars.next(); // consume ch
|
||||||
|
|
||||||
|
if ch == settings.quote_style {
|
||||||
|
num_consecutive_quotes += 1;
|
||||||
|
} else {
|
||||||
|
num_consecutive_quotes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
s.push(ch);
|
s.push(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2466,6 +2684,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_quoted_string_escape() {
|
fn tokenize_quoted_string_escape() {
|
||||||
|
let dialect = SnowflakeDialect {};
|
||||||
for (sql, expected, expected_unescaped) in [
|
for (sql, expected, expected_unescaped) in [
|
||||||
(r#"'%a\'%b'"#, r#"%a\'%b"#, r#"%a'%b"#),
|
(r#"'%a\'%b'"#, r#"%a\'%b"#, r#"%a'%b"#),
|
||||||
(r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#, r#"a''b'c'd"#),
|
(r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#, r#"a''b'c'd"#),
|
||||||
|
@ -2480,8 +2699,6 @@ mod tests {
|
||||||
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
|
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
|
||||||
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
|
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
|
||||||
] {
|
] {
|
||||||
let dialect = BigQueryDialect {};
|
|
||||||
|
|
||||||
let tokens = Tokenizer::new(&dialect, sql)
|
let tokens = Tokenizer::new(&dialect, sql)
|
||||||
.with_unescape(false)
|
.with_unescape(false)
|
||||||
.tokenize()
|
.tokenize()
|
||||||
|
@ -2498,7 +2715,6 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
for sql in [r#"'\'"#, r#"'ab\'"#] {
|
for sql in [r#"'\'"#, r#"'ab\'"#] {
|
||||||
let dialect = BigQueryDialect {};
|
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"Unterminated string literal",
|
"Unterminated string literal",
|
||||||
|
@ -2516,4 +2732,124 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_triple_quoted_string() {
|
||||||
|
fn check<F>(
|
||||||
|
q: char, // The quote character to test
|
||||||
|
r: char, // An alternate quote character.
|
||||||
|
quote_token: F,
|
||||||
|
) where
|
||||||
|
F: Fn(String) -> Token,
|
||||||
|
{
|
||||||
|
let dialect = BigQueryDialect {};
|
||||||
|
|
||||||
|
for (sql, expected, expected_unescaped) in [
|
||||||
|
// Empty string
|
||||||
|
(format!(r#"{q}{q}{q}{q}{q}{q}"#), "".into(), "".into()),
|
||||||
|
// Should not count escaped quote as end of string.
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}ab{q}{q}\{q}{q}cd{q}{q}{q}"#),
|
||||||
|
format!(r#"ab{q}{q}\{q}{q}cd"#),
|
||||||
|
format!(r#"ab{q}{q}{q}{q}cd"#),
|
||||||
|
),
|
||||||
|
// Simple string
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}abc{q}{q}{q}"#),
|
||||||
|
"abc".into(),
|
||||||
|
"abc".into(),
|
||||||
|
),
|
||||||
|
// Mix single-double quotes unescaped.
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}ab{r}{r}{r}c{r}def{r}{r}{r}{q}{q}{q}"#),
|
||||||
|
format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
|
||||||
|
format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
|
||||||
|
),
|
||||||
|
// Escaped quote.
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}ab{q}{q}c{q}{q}\{q}de{q}{q}f{q}{q}{q}"#),
|
||||||
|
format!(r#"ab{q}{q}c{q}{q}\{q}de{q}{q}f"#),
|
||||||
|
format!(r#"ab{q}{q}c{q}{q}{q}de{q}{q}f"#),
|
||||||
|
),
|
||||||
|
// backslash-escaped quote characters.
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}a\'\'b\'c\'d{q}{q}{q}"#),
|
||||||
|
r#"a\'\'b\'c\'d"#.into(),
|
||||||
|
r#"a''b'c'd"#.into(),
|
||||||
|
),
|
||||||
|
// backslash-escaped characters
|
||||||
|
(
|
||||||
|
format!(r#"{q}{q}{q}abc\0\n\rdef{q}{q}{q}"#),
|
||||||
|
r#"abc\0\n\rdef"#.into(),
|
||||||
|
"abc\0\n\rdef".into(),
|
||||||
|
),
|
||||||
|
] {
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql.as_str())
|
||||||
|
.with_unescape(false)
|
||||||
|
.tokenize()
|
||||||
|
.unwrap();
|
||||||
|
let expected = vec![quote_token(expected.to_string())];
|
||||||
|
compare(expected, tokens);
|
||||||
|
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql.as_str())
|
||||||
|
.with_unescape(true)
|
||||||
|
.tokenize()
|
||||||
|
.unwrap();
|
||||||
|
let expected = vec![quote_token(expected_unescaped.to_string())];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
for sql in [
|
||||||
|
format!(r#"{q}{q}{q}{q}{q}\{q}"#),
|
||||||
|
format!(r#"{q}{q}{q}abc{q}{q}\{q}"#),
|
||||||
|
format!(r#"{q}{q}{q}{q}"#),
|
||||||
|
format!(r#"{q}{q}{q}{r}{r}"#),
|
||||||
|
format!(r#"{q}{q}{q}abc{q}"#),
|
||||||
|
format!(r#"{q}{q}{q}abc{q}{q}"#),
|
||||||
|
format!(r#"{q}{q}{q}abc"#),
|
||||||
|
] {
|
||||||
|
let dialect = BigQueryDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, sql.as_str());
|
||||||
|
assert_eq!(
|
||||||
|
"Unterminated string literal",
|
||||||
|
tokenizer.tokenize().unwrap_err().message.as_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check('"', '\'', Token::TripleDoubleQuotedString);
|
||||||
|
|
||||||
|
check('\'', '"', Token::TripleSingleQuotedString);
|
||||||
|
|
||||||
|
let dialect = BigQueryDialect {};
|
||||||
|
|
||||||
|
let sql = r#"""''"#;
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql)
|
||||||
|
.with_unescape(true)
|
||||||
|
.tokenize()
|
||||||
|
.unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::DoubleQuotedString("".to_string()),
|
||||||
|
Token::SingleQuotedString("".to_string()),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
|
||||||
|
let sql = r#"''"""#;
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql)
|
||||||
|
.with_unescape(true)
|
||||||
|
.tokenize()
|
||||||
|
.unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::SingleQuotedString("".to_string()),
|
||||||
|
Token::DoubleQuotedString("".to_string()),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
|
||||||
|
// Non-triple quoted string dialect
|
||||||
|
let dialect = SnowflakeDialect {};
|
||||||
|
let sql = r#"''''''"#;
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
|
let expected = vec![Token::SingleQuotedString("''".to_string())];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,14 +18,30 @@ use std::ops::Deref;
|
||||||
|
|
||||||
use sqlparser::ast::*;
|
use sqlparser::ast::*;
|
||||||
use sqlparser::dialect::{BigQueryDialect, GenericDialect};
|
use sqlparser::dialect::{BigQueryDialect, GenericDialect};
|
||||||
use sqlparser::parser::ParserError;
|
use sqlparser::parser::{ParserError, ParserOptions};
|
||||||
use test_utils::*;
|
use test_utils::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_literal_string() {
|
fn parse_literal_string() {
|
||||||
let sql = r#"SELECT 'single', "double""#;
|
let sql = concat!(
|
||||||
let select = bigquery().verified_only_select(sql);
|
"SELECT ",
|
||||||
assert_eq!(2, select.projection.len());
|
"'single', ",
|
||||||
|
r#""double", "#,
|
||||||
|
"'''triple-single''', ",
|
||||||
|
r#""""triple-double""", "#,
|
||||||
|
r#"'single\'escaped', "#,
|
||||||
|
r#"'''triple-single\'escaped''', "#,
|
||||||
|
r#"'''triple-single'unescaped''', "#,
|
||||||
|
r#""double\"escaped", "#,
|
||||||
|
r#""""triple-double\"escaped""", "#,
|
||||||
|
r#""""triple-double"unescaped""""#,
|
||||||
|
);
|
||||||
|
let dialect = TestedDialects {
|
||||||
|
dialects: vec![Box::new(BigQueryDialect {})],
|
||||||
|
options: Some(ParserOptions::new().with_unescape(false)),
|
||||||
|
};
|
||||||
|
let select = dialect.verified_only_select(sql);
|
||||||
|
assert_eq!(10, select.projection.len());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::SingleQuotedString("single".to_string())),
|
&Expr::Value(Value::SingleQuotedString("single".to_string())),
|
||||||
expr_from_projection(&select.projection[0])
|
expr_from_projection(&select.projection[0])
|
||||||
|
@ -34,56 +50,162 @@ fn parse_literal_string() {
|
||||||
&Expr::Value(Value::DoubleQuotedString("double".to_string())),
|
&Expr::Value(Value::DoubleQuotedString("double".to_string())),
|
||||||
expr_from_projection(&select.projection[1])
|
expr_from_projection(&select.projection[1])
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleSingleQuotedString("triple-single".to_string())),
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleDoubleQuotedString("triple-double".to_string())),
|
||||||
|
expr_from_projection(&select.projection[3])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::SingleQuotedString(r#"single\'escaped"#.to_string())),
|
||||||
|
expr_from_projection(&select.projection[4])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleSingleQuotedString(
|
||||||
|
r#"triple-single\'escaped"#.to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[5])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleSingleQuotedString(
|
||||||
|
r#"triple-single'unescaped"#.to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[6])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DoubleQuotedString(r#"double\"escaped"#.to_string())),
|
||||||
|
expr_from_projection(&select.projection[7])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleDoubleQuotedString(
|
||||||
|
r#"triple-double\"escaped"#.to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[8])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleDoubleQuotedString(
|
||||||
|
r#"triple-double"unescaped"#.to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[9])
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_byte_literal() {
|
fn parse_byte_literal() {
|
||||||
let sql = r#"SELECT B'abc', B"abc""#;
|
let sql = concat!(
|
||||||
let select = bigquery().verified_only_select(sql);
|
"SELECT ",
|
||||||
assert_eq!(2, select.projection.len());
|
"B'abc', ",
|
||||||
assert_eq!(
|
r#"B"abc", "#,
|
||||||
&Expr::Value(Value::SingleQuotedByteStringLiteral("abc".to_string())),
|
r#"B'f\(abc,(.*),def\)', "#,
|
||||||
expr_from_projection(&select.projection[0])
|
r#"B"f\(abc,(.*),def\)", "#,
|
||||||
);
|
r#"B'''abc''', "#,
|
||||||
assert_eq!(
|
r#"B"""abc""""#,
|
||||||
&Expr::Value(Value::DoubleQuotedByteStringLiteral("abc".to_string())),
|
|
||||||
expr_from_projection(&select.projection[1])
|
|
||||||
);
|
);
|
||||||
|
let stmt = bigquery().verified_stmt(sql);
|
||||||
|
if let Statement::Query(query) = stmt {
|
||||||
|
if let SetExpr::Select(select) = *query.body {
|
||||||
|
assert_eq!(6, select.projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::SingleQuotedByteStringLiteral("abc".to_string())),
|
||||||
|
expr_from_projection(&select.projection[0])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DoubleQuotedByteStringLiteral("abc".to_string())),
|
||||||
|
expr_from_projection(&select.projection[1])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::SingleQuotedByteStringLiteral(
|
||||||
|
r"f\(abc,(.*),def\)".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[2])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DoubleQuotedByteStringLiteral(
|
||||||
|
r"f\(abc,(.*),def\)".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[3])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleSingleQuotedByteStringLiteral(
|
||||||
|
r"abc".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[4])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleDoubleQuotedByteStringLiteral(
|
||||||
|
r"abc".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[5])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
panic!("invalid query");
|
||||||
|
}
|
||||||
|
|
||||||
let sql = r#"SELECT b'abc', b"abc""#;
|
bigquery().one_statement_parses_to(
|
||||||
bigquery().one_statement_parses_to(sql, r#"SELECT B'abc', B"abc""#);
|
r#"SELECT b'123', b"123", b'''123''', b"""123""""#,
|
||||||
|
r#"SELECT B'123', B"123", B'''123''', B"""123""""#,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_raw_literal() {
|
fn parse_raw_literal() {
|
||||||
let sql = r#"SELECT R'abc', R"abc", R'f\(abc,(.*),def\)', R"f\(abc,(.*),def\)""#;
|
let sql = concat!(
|
||||||
let stmt = bigquery().one_statement_parses_to(
|
"SELECT ",
|
||||||
sql,
|
"R'abc', ",
|
||||||
r"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'",
|
r#"R"abc", "#,
|
||||||
|
r#"R'f\(abc,(.*),def\)', "#,
|
||||||
|
r#"R"f\(abc,(.*),def\)", "#,
|
||||||
|
r#"R'''abc''', "#,
|
||||||
|
r#"R"""abc""""#,
|
||||||
);
|
);
|
||||||
|
let stmt = bigquery().verified_stmt(sql);
|
||||||
if let Statement::Query(query) = stmt {
|
if let Statement::Query(query) = stmt {
|
||||||
if let SetExpr::Select(select) = *query.body {
|
if let SetExpr::Select(select) = *query.body {
|
||||||
assert_eq!(4, select.projection.len());
|
assert_eq!(6, select.projection.len());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::RawStringLiteral("abc".to_string())),
|
&Expr::Value(Value::SingleQuotedRawStringLiteral("abc".to_string())),
|
||||||
expr_from_projection(&select.projection[0])
|
expr_from_projection(&select.projection[0])
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::RawStringLiteral("abc".to_string())),
|
&Expr::Value(Value::DoubleQuotedRawStringLiteral("abc".to_string())),
|
||||||
expr_from_projection(&select.projection[1])
|
expr_from_projection(&select.projection[1])
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())),
|
&Expr::Value(Value::SingleQuotedRawStringLiteral(
|
||||||
|
r"f\(abc,(.*),def\)".to_string()
|
||||||
|
)),
|
||||||
expr_from_projection(&select.projection[2])
|
expr_from_projection(&select.projection[2])
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())),
|
&Expr::Value(Value::DoubleQuotedRawStringLiteral(
|
||||||
|
r"f\(abc,(.*),def\)".to_string()
|
||||||
|
)),
|
||||||
expr_from_projection(&select.projection[3])
|
expr_from_projection(&select.projection[3])
|
||||||
);
|
);
|
||||||
return;
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleSingleQuotedRawStringLiteral(
|
||||||
|
r"abc".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[4])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::TripleDoubleQuotedRawStringLiteral(
|
||||||
|
r"abc".to_string()
|
||||||
|
)),
|
||||||
|
expr_from_projection(&select.projection[5])
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
panic!("invalid query");
|
||||||
}
|
}
|
||||||
panic!("invalid query")
|
|
||||||
|
bigquery().one_statement_parses_to(
|
||||||
|
r#"SELECT r'123', r"123", r'''123''', r"""123""""#,
|
||||||
|
r#"SELECT R'123', R"123", R'''123''', R"""123""""#,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue