mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-26 15:39:12 +00:00
feat: mysql no-escape mode (#870)
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
eb288487a6
commit
f98a2f9dca
7 changed files with 485 additions and 121 deletions
|
@ -71,7 +71,7 @@ impl fmt::Display for Value {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
||||||
Value::DoubleQuotedString(v) => write!(f, "\"{v}\""),
|
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
|
||||||
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
||||||
Value::DollarQuotedString(v) => write!(f, "{v}"),
|
Value::DollarQuotedString(v) => write!(f, "{v}"),
|
||||||
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
||||||
|
@ -187,12 +187,49 @@ pub struct EscapeQuotedString<'a> {
|
||||||
|
|
||||||
impl<'a> fmt::Display for EscapeQuotedString<'a> {
|
impl<'a> fmt::Display for EscapeQuotedString<'a> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
for c in self.string.chars() {
|
// EscapeQuotedString doesn't know which mode of escape was
|
||||||
if c == self.quote {
|
// chosen by the user. So this code must to correctly display
|
||||||
write!(f, "{q}{q}", q = self.quote)?;
|
// strings without knowing if the strings are already escaped
|
||||||
} else {
|
// or not.
|
||||||
write!(f, "{c}")?;
|
//
|
||||||
|
// If the quote symbol in the string is repeated twice, OR, if
|
||||||
|
// the quote symbol is after backslash, display all the chars
|
||||||
|
// without any escape. However, if the quote symbol is used
|
||||||
|
// just between usual chars, `fmt()` should display it twice."
|
||||||
|
//
|
||||||
|
// The following table has examples
|
||||||
|
//
|
||||||
|
// | original query | mode | AST Node | serialized |
|
||||||
|
// | ------------- | --------- | -------------------------------------------------- | ------------ |
|
||||||
|
// | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` |
|
||||||
|
// | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
|
||||||
|
// | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` |
|
||||||
|
// | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
|
||||||
|
let quote = self.quote;
|
||||||
|
let mut previous_char = char::default();
|
||||||
|
let mut peekable_chars = self.string.chars().peekable();
|
||||||
|
while let Some(&ch) = peekable_chars.peek() {
|
||||||
|
match ch {
|
||||||
|
char if char == quote => {
|
||||||
|
if previous_char == '\\' {
|
||||||
|
write!(f, "{char}")?;
|
||||||
|
peekable_chars.next();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
peekable_chars.next();
|
||||||
|
if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) {
|
||||||
|
write!(f, "{char}{char}")?;
|
||||||
|
peekable_chars.next();
|
||||||
|
} else {
|
||||||
|
write!(f, "{char}{char}")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
write!(f, "{ch}")?;
|
||||||
|
peekable_chars.next();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
previous_char = ch;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -206,6 +243,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> {
|
||||||
escape_quoted_string(s, '\'')
|
escape_quoted_string(s, '\'')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> {
|
||||||
|
escape_quoted_string(s, '\"')
|
||||||
|
}
|
||||||
|
|
||||||
pub struct EscapeEscapedStringLiteral<'a>(&'a str);
|
pub struct EscapeEscapedStringLiteral<'a>(&'a str);
|
||||||
|
|
||||||
impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
|
impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
|
||||||
|
|
|
@ -632,8 +632,7 @@ mod tests {
|
||||||
|
|
||||||
fn do_visit(sql: &str) -> Vec<String> {
|
fn do_visit(sql: &str) -> Vec<String> {
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let s = Parser::new(&dialect)
|
let s = Parser::new(&dialect)
|
||||||
.with_tokens(tokens)
|
.with_tokens(tokens)
|
||||||
.parse_statement()
|
.parse_statement()
|
||||||
|
|
|
@ -195,9 +195,52 @@ impl std::error::Error for ParserError {}
|
||||||
// By default, allow expressions up to this deep before erroring
|
// By default, allow expressions up to this deep before erroring
|
||||||
const DEFAULT_REMAINING_DEPTH: usize = 50;
|
const DEFAULT_REMAINING_DEPTH: usize = 50;
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
/// Options that control how the [`Parser`] parses SQL text
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct ParserOptions {
|
pub struct ParserOptions {
|
||||||
pub trailing_commas: bool,
|
pub trailing_commas: bool,
|
||||||
|
/// Controls how literal values are unescaped. See
|
||||||
|
/// [`Tokenizer::with_unescape`] for more details.
|
||||||
|
pub unescape: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ParserOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
trailing_commas: false,
|
||||||
|
unescape: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParserOptions {
|
||||||
|
/// Create a new [`ParserOptions`]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set if trailing commas are allowed.
|
||||||
|
///
|
||||||
|
/// If this option is `false` (the default), the following SQL will
|
||||||
|
/// not parse. If the option is `true`, the SQL will parse.
|
||||||
|
///
|
||||||
|
/// ```sql
|
||||||
|
/// SELECT
|
||||||
|
/// foo,
|
||||||
|
/// bar,
|
||||||
|
/// FROM baz
|
||||||
|
/// ```
|
||||||
|
pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self {
|
||||||
|
self.trailing_commas = trailing_commas;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set if literal values are unescaped. Defaults to true. See
|
||||||
|
/// [`Tokenizer::with_unescape`] for more details.
|
||||||
|
pub fn with_unescape(mut self, unescape: bool) -> Self {
|
||||||
|
self.unescape = unescape;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Parser<'a> {
|
pub struct Parser<'a> {
|
||||||
|
@ -206,8 +249,9 @@ pub struct Parser<'a> {
|
||||||
index: usize,
|
index: usize,
|
||||||
/// The current dialect to use
|
/// The current dialect to use
|
||||||
dialect: &'a dyn Dialect,
|
dialect: &'a dyn Dialect,
|
||||||
/// Additional options that allow you to mix & match behavior otherwise
|
/// Additional options that allow you to mix & match behavior
|
||||||
/// constrained to certain dialects (e.g. trailing commas)
|
/// otherwise constrained to certain dialects (e.g. trailing
|
||||||
|
/// commas) and/or format of parse (e.g. unescaping)
|
||||||
options: ParserOptions,
|
options: ParserOptions,
|
||||||
/// ensure the stack does not overflow by limiting recursion depth
|
/// ensure the stack does not overflow by limiting recursion depth
|
||||||
recursion_counter: RecursionCounter,
|
recursion_counter: RecursionCounter,
|
||||||
|
@ -267,17 +311,20 @@ impl<'a> Parser<'a> {
|
||||||
/// Specify additional parser options
|
/// Specify additional parser options
|
||||||
///
|
///
|
||||||
///
|
///
|
||||||
/// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to
|
/// [`Parser`] supports additional options ([`ParserOptions`])
|
||||||
/// mix & match behavior otherwise constrained to certain dialects (e.g. trailing
|
/// that allow you to mix & match behavior otherwise constrained
|
||||||
/// commas).
|
/// to certain dialects (e.g. trailing commas).
|
||||||
///
|
///
|
||||||
/// Example:
|
/// Example:
|
||||||
/// ```
|
/// ```
|
||||||
/// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
|
/// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
|
||||||
/// # fn main() -> Result<(), ParserError> {
|
/// # fn main() -> Result<(), ParserError> {
|
||||||
/// let dialect = GenericDialect{};
|
/// let dialect = GenericDialect{};
|
||||||
|
/// let options = ParserOptions::new()
|
||||||
|
/// .with_trailing_commas(true)
|
||||||
|
/// .with_unescape(false);
|
||||||
/// let result = Parser::new(&dialect)
|
/// let result = Parser::new(&dialect)
|
||||||
/// .with_options(ParserOptions { trailing_commas: true })
|
/// .with_options(options)
|
||||||
/// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
|
/// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
|
||||||
/// .parse_statements();
|
/// .parse_statements();
|
||||||
/// assert!(matches!(result, Ok(_)));
|
/// assert!(matches!(result, Ok(_)));
|
||||||
|
@ -317,8 +364,9 @@ impl<'a> Parser<'a> {
|
||||||
/// See example on [`Parser::new()`] for an example
|
/// See example on [`Parser::new()`] for an example
|
||||||
pub fn try_with_sql(self, sql: &str) -> Result<Self, ParserError> {
|
pub fn try_with_sql(self, sql: &str) -> Result<Self, ParserError> {
|
||||||
debug!("Parsing sql '{}'...", sql);
|
debug!("Parsing sql '{}'...", sql);
|
||||||
let mut tokenizer = Tokenizer::new(self.dialect, sql);
|
let tokens = Tokenizer::new(self.dialect, sql)
|
||||||
let tokens = tokenizer.tokenize()?;
|
.with_unescape(self.options.unescape)
|
||||||
|
.tokenize()?;
|
||||||
Ok(self.with_tokens(tokens))
|
Ok(self.with_tokens(tokens))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3654,7 +3702,7 @@ impl<'a> Parser<'a> {
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(Some(ColumnOption::Check(expr)))
|
Ok(Some(ColumnOption::Check(expr)))
|
||||||
} else if self.parse_keyword(Keyword::AUTO_INCREMENT)
|
} else if self.parse_keyword(Keyword::AUTO_INCREMENT)
|
||||||
&& dialect_of!(self is MySqlDialect | GenericDialect)
|
&& dialect_of!(self is MySqlDialect | GenericDialect)
|
||||||
{
|
{
|
||||||
// Support AUTO_INCREMENT for MySQL
|
// Support AUTO_INCREMENT for MySQL
|
||||||
Ok(Some(ColumnOption::DialectSpecific(vec![
|
Ok(Some(ColumnOption::DialectSpecific(vec![
|
||||||
|
|
245
src/tokenizer.rs
245
src/tokenizer.rs
|
@ -455,12 +455,69 @@ impl<'a> State<'a> {
|
||||||
pub struct Tokenizer<'a> {
|
pub struct Tokenizer<'a> {
|
||||||
dialect: &'a dyn Dialect,
|
dialect: &'a dyn Dialect,
|
||||||
query: &'a str,
|
query: &'a str,
|
||||||
|
/// If true (the default), the tokenizer will un-escape literal
|
||||||
|
/// SQL strings See [`Tokenizer::with_unescape`] for more details.
|
||||||
|
unescape: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Tokenizer<'a> {
|
impl<'a> Tokenizer<'a> {
|
||||||
/// Create a new SQL tokenizer for the specified SQL statement
|
/// Create a new SQL tokenizer for the specified SQL statement
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer};
|
||||||
|
/// # use sqlparser::dialect::GenericDialect;
|
||||||
|
/// # let dialect = GenericDialect{};
|
||||||
|
/// let query = r#"SELECT 'foo'"#;
|
||||||
|
///
|
||||||
|
/// // Parsing the query
|
||||||
|
/// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!(tokens, vec![
|
||||||
|
/// Token::make_word("SELECT", None),
|
||||||
|
/// Token::Whitespace(Whitespace::Space),
|
||||||
|
/// Token::SingleQuotedString("foo".to_string()),
|
||||||
|
/// ]);
|
||||||
pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
|
pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
|
||||||
Self { dialect, query }
|
Self {
|
||||||
|
dialect,
|
||||||
|
query,
|
||||||
|
unescape: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set unescape mode
|
||||||
|
///
|
||||||
|
/// When true (default) the tokenizer unescapes literal values
|
||||||
|
/// (for example, `""` in SQL is unescaped to the literal `"`).
|
||||||
|
///
|
||||||
|
/// When false, the tokenizer provides the raw strings as provided
|
||||||
|
/// in the query. This can be helpful for programs that wish to
|
||||||
|
/// recover the *exact* original query text without normalizing
|
||||||
|
/// the escaping
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use sqlparser::tokenizer::{Token, Tokenizer};
|
||||||
|
/// # use sqlparser::dialect::GenericDialect;
|
||||||
|
/// # let dialect = GenericDialect{};
|
||||||
|
/// let query = r#""Foo "" Bar""#;
|
||||||
|
/// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"'));
|
||||||
|
/// let original = Token::make_word(r#"Foo "" Bar"#, Some('"'));
|
||||||
|
///
|
||||||
|
/// // Parsing with unescaping (default)
|
||||||
|
/// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
|
||||||
|
/// assert_eq!(tokens, vec![unescaped]);
|
||||||
|
///
|
||||||
|
/// // Parsing with unescape = false
|
||||||
|
/// let tokens = Tokenizer::new(&dialect, &query)
|
||||||
|
/// .with_unescape(false)
|
||||||
|
/// .tokenize().unwrap();
|
||||||
|
/// assert_eq!(tokens, vec![original]);
|
||||||
|
/// ```
|
||||||
|
pub fn with_unescape(mut self, unescape: bool) -> Self {
|
||||||
|
self.unescape = unescape;
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tokenize the statement and produce a vector of tokens
|
/// Tokenize the statement and produce a vector of tokens
|
||||||
|
@ -650,7 +707,7 @@ impl<'a> Tokenizer<'a> {
|
||||||
let error_loc = chars.location();
|
let error_loc = chars.location();
|
||||||
chars.next(); // consume the opening quote
|
chars.next(); // consume the opening quote
|
||||||
let quote_end = Word::matching_end_quote(quote_start);
|
let quote_end = Word::matching_end_quote(quote_start);
|
||||||
let (s, last_char) = parse_quoted_ident(chars, quote_end);
|
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
|
||||||
|
|
||||||
if last_char == Some(quote_end) {
|
if last_char == Some(quote_end) {
|
||||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
||||||
|
@ -1168,6 +1225,10 @@ impl<'a> Tokenizer<'a> {
|
||||||
chars.next(); // consume
|
chars.next(); // consume
|
||||||
if chars.peek().map(|c| *c == quote_style).unwrap_or(false) {
|
if chars.peek().map(|c| *c == quote_style).unwrap_or(false) {
|
||||||
s.push(ch);
|
s.push(ch);
|
||||||
|
if !self.unescape {
|
||||||
|
// In no-escape mode, the given query has to be saved completely
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
chars.next();
|
chars.next();
|
||||||
} else {
|
} else {
|
||||||
return Ok(s);
|
return Ok(s);
|
||||||
|
@ -1176,22 +1237,29 @@ impl<'a> Tokenizer<'a> {
|
||||||
'\\' => {
|
'\\' => {
|
||||||
// consume
|
// consume
|
||||||
chars.next();
|
chars.next();
|
||||||
// slash escaping is specific to MySQL dialect
|
// slash escaping is specific to MySQL dialect.
|
||||||
if dialect_of!(self is MySqlDialect) {
|
if dialect_of!(self is MySqlDialect) {
|
||||||
if let Some(next) = chars.peek() {
|
if let Some(next) = chars.peek() {
|
||||||
// See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences
|
if !self.unescape {
|
||||||
let n = match next {
|
// In no-escape mode, the given query has to be saved completely including backslashes.
|
||||||
'\'' | '\"' | '\\' | '%' | '_' => *next,
|
s.push(ch);
|
||||||
'0' => '\0',
|
s.push(*next);
|
||||||
'b' => '\u{8}',
|
chars.next(); // consume next
|
||||||
'n' => '\n',
|
} else {
|
||||||
'r' => '\r',
|
// See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences
|
||||||
't' => '\t',
|
let n = match next {
|
||||||
'Z' => '\u{1a}',
|
'\'' | '\"' | '\\' | '%' | '_' => *next,
|
||||||
_ => *next,
|
'0' => '\0',
|
||||||
};
|
'b' => '\u{8}',
|
||||||
s.push(n);
|
'n' => '\n',
|
||||||
chars.next(); // consume next
|
'r' => '\r',
|
||||||
|
't' => '\t',
|
||||||
|
'Z' => '\u{1a}',
|
||||||
|
_ => *next,
|
||||||
|
};
|
||||||
|
s.push(n);
|
||||||
|
chars.next(); // consume next
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s.push(ch);
|
s.push(ch);
|
||||||
|
@ -1239,6 +1307,29 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option<char>) {
|
||||||
|
let mut last_char = None;
|
||||||
|
let mut s = String::new();
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
if ch == quote_end {
|
||||||
|
if chars.peek() == Some("e_end) {
|
||||||
|
chars.next();
|
||||||
|
s.push(ch);
|
||||||
|
if !self.unescape {
|
||||||
|
// In no-escape mode, the given query has to be saved completely
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
last_char = Some(quote_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(s, last_char)
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::unnecessary_wraps)]
|
#[allow(clippy::unnecessary_wraps)]
|
||||||
fn consume_and_return(
|
fn consume_and_return(
|
||||||
&self,
|
&self,
|
||||||
|
@ -1266,25 +1357,6 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool
|
||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_quoted_ident(chars: &mut State, quote_end: char) -> (String, Option<char>) {
|
|
||||||
let mut last_char = None;
|
|
||||||
let mut s = String::new();
|
|
||||||
while let Some(ch) = chars.next() {
|
|
||||||
if ch == quote_end {
|
|
||||||
if chars.peek() == Some("e_end) {
|
|
||||||
chars.next();
|
|
||||||
s.push(ch);
|
|
||||||
} else {
|
|
||||||
last_char = Some(quote_end);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
s.push(ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(s, last_char)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -1309,8 +1381,7 @@ mod tests {
|
||||||
fn tokenize_select_1() {
|
fn tokenize_select_1() {
|
||||||
let sql = String::from("SELECT 1");
|
let sql = String::from("SELECT 1");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1325,8 +1396,7 @@ mod tests {
|
||||||
fn tokenize_select_float() {
|
fn tokenize_select_float() {
|
||||||
let sql = String::from("SELECT .1");
|
let sql = String::from("SELECT .1");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1341,8 +1411,7 @@ mod tests {
|
||||||
fn tokenize_select_exponent() {
|
fn tokenize_select_exponent() {
|
||||||
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
|
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1376,8 +1445,7 @@ mod tests {
|
||||||
fn tokenize_scalar_function() {
|
fn tokenize_scalar_function() {
|
||||||
let sql = String::from("SELECT sqrt(1)");
|
let sql = String::from("SELECT sqrt(1)");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1395,8 +1463,7 @@ mod tests {
|
||||||
fn tokenize_string_string_concat() {
|
fn tokenize_string_string_concat() {
|
||||||
let sql = String::from("SELECT 'a' || 'b'");
|
let sql = String::from("SELECT 'a' || 'b'");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1414,8 +1481,7 @@ mod tests {
|
||||||
fn tokenize_bitwise_op() {
|
fn tokenize_bitwise_op() {
|
||||||
let sql = String::from("SELECT one | two ^ three");
|
let sql = String::from("SELECT one | two ^ three");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1438,8 +1504,7 @@ mod tests {
|
||||||
let sql =
|
let sql =
|
||||||
String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true");
|
String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1478,8 +1543,7 @@ mod tests {
|
||||||
fn tokenize_simple_select() {
|
fn tokenize_simple_select() {
|
||||||
let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
|
let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1510,8 +1574,7 @@ mod tests {
|
||||||
fn tokenize_explain_select() {
|
fn tokenize_explain_select() {
|
||||||
let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1");
|
let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("EXPLAIN"),
|
Token::make_keyword("EXPLAIN"),
|
||||||
|
@ -1540,8 +1603,7 @@ mod tests {
|
||||||
fn tokenize_explain_analyze_select() {
|
fn tokenize_explain_analyze_select() {
|
||||||
let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1");
|
let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("EXPLAIN"),
|
Token::make_keyword("EXPLAIN"),
|
||||||
|
@ -1572,8 +1634,7 @@ mod tests {
|
||||||
fn tokenize_string_predicate() {
|
fn tokenize_string_predicate() {
|
||||||
let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
|
let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
|
@ -1601,8 +1662,7 @@ mod tests {
|
||||||
let sql = String::from("\n💝مصطفىh");
|
let sql = String::from("\n💝مصطفىh");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
// println!("tokens: {:#?}", tokens);
|
// println!("tokens: {:#?}", tokens);
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Whitespace(Whitespace::Newline),
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
@ -1617,8 +1677,7 @@ mod tests {
|
||||||
let sql = String::from("'foo\r\nbar\nbaz'");
|
let sql = String::from("'foo\r\nbar\nbaz'");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
|
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
@ -1660,8 +1719,7 @@ mod tests {
|
||||||
let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh");
|
let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
// println!("tokens: {:#?}", tokens);
|
// println!("tokens: {:#?}", tokens);
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Whitespace(Whitespace::Newline),
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
@ -1684,8 +1742,7 @@ mod tests {
|
||||||
fn tokenize_right_arrow() {
|
fn tokenize_right_arrow() {
|
||||||
let sql = String::from("FUNCTION(key=>value)");
|
let sql = String::from("FUNCTION(key=>value)");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_word("FUNCTION", None),
|
Token::make_word("FUNCTION", None),
|
||||||
Token::LParen,
|
Token::LParen,
|
||||||
|
@ -1701,8 +1758,7 @@ mod tests {
|
||||||
fn tokenize_is_null() {
|
fn tokenize_is_null() {
|
||||||
let sql = String::from("a IS NULL");
|
let sql = String::from("a IS NULL");
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_word("a", None),
|
Token::make_word("a", None),
|
||||||
|
@ -1720,8 +1776,7 @@ mod tests {
|
||||||
let sql = String::from("0--this is a comment\n1");
|
let sql = String::from("0--this is a comment\n1");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Number("0".to_string(), false),
|
Token::Number("0".to_string(), false),
|
||||||
Token::Whitespace(Whitespace::SingleLineComment {
|
Token::Whitespace(Whitespace::SingleLineComment {
|
||||||
|
@ -1738,8 +1793,7 @@ mod tests {
|
||||||
let sql = String::from("--this is a comment");
|
let sql = String::from("--this is a comment");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
|
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
|
||||||
prefix: "--".to_string(),
|
prefix: "--".to_string(),
|
||||||
comment: "this is a comment".to_string(),
|
comment: "this is a comment".to_string(),
|
||||||
|
@ -1752,8 +1806,7 @@ mod tests {
|
||||||
let sql = String::from("0/*multi-line\n* /comment*/1");
|
let sql = String::from("0/*multi-line\n* /comment*/1");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Number("0".to_string(), false),
|
Token::Number("0".to_string(), false),
|
||||||
Token::Whitespace(Whitespace::MultiLineComment(
|
Token::Whitespace(Whitespace::MultiLineComment(
|
||||||
|
@ -1769,8 +1822,7 @@ mod tests {
|
||||||
let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1");
|
let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Number("0".to_string(), false),
|
Token::Number("0".to_string(), false),
|
||||||
Token::Whitespace(Whitespace::MultiLineComment(
|
Token::Whitespace(Whitespace::MultiLineComment(
|
||||||
|
@ -1786,8 +1838,7 @@ mod tests {
|
||||||
let sql = String::from("\n/** Comment **/\n");
|
let sql = String::from("\n/** Comment **/\n");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Whitespace(Whitespace::Newline),
|
Token::Whitespace(Whitespace::Newline),
|
||||||
Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
|
Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
|
||||||
|
@ -1801,8 +1852,7 @@ mod tests {
|
||||||
let sql = String::from(" \u{2003}\n");
|
let sql = String::from(" \u{2003}\n");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Whitespace(Whitespace::Space),
|
Token::Whitespace(Whitespace::Space),
|
||||||
Token::Whitespace(Whitespace::Space),
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
@ -1832,8 +1882,7 @@ mod tests {
|
||||||
let sql = String::from("line1\nline2\rline3\r\nline4\r");
|
let sql = String::from("line1\nline2\rline3\r\nline4\r");
|
||||||
|
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_word("line1", None),
|
Token::make_word("line1", None),
|
||||||
Token::Whitespace(Whitespace::Newline),
|
Token::Whitespace(Whitespace::Newline),
|
||||||
|
@ -1851,8 +1900,7 @@ mod tests {
|
||||||
fn tokenize_mssql_top() {
|
fn tokenize_mssql_top() {
|
||||||
let sql = "SELECT TOP 5 [bar] FROM foo";
|
let sql = "SELECT TOP 5 [bar] FROM foo";
|
||||||
let dialect = MsSqlDialect {};
|
let dialect = MsSqlDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
Token::Whitespace(Whitespace::Space),
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
@ -1873,8 +1921,7 @@ mod tests {
|
||||||
fn tokenize_pg_regex_match() {
|
fn tokenize_pg_regex_match() {
|
||||||
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
|
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("SELECT"),
|
Token::make_keyword("SELECT"),
|
||||||
Token::Whitespace(Whitespace::Space),
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
@ -1912,8 +1959,7 @@ mod tests {
|
||||||
fn tokenize_quoted_identifier() {
|
fn tokenize_quoted_identifier() {
|
||||||
let sql = r#" "a "" b" "a """ "c """"" "#;
|
let sql = r#" "a "" b" "a """ "c """"" "#;
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::Whitespace(Whitespace::Space),
|
Token::Whitespace(Whitespace::Space),
|
||||||
Token::make_word(r#"a " b"#, Some('"')),
|
Token::make_word(r#"a " b"#, Some('"')),
|
||||||
|
@ -1926,12 +1972,33 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_quoted_identifier_with_no_escape() {
|
||||||
|
let sql = r#" "a "" b" "a """ "c """"" "#;
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let tokens = Tokenizer::new(&dialect, sql)
|
||||||
|
.with_unescape(false)
|
||||||
|
.tokenize()
|
||||||
|
.unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"a "" b"#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"a """#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"c """""#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_with_location() {
|
fn tokenize_with_location() {
|
||||||
let sql = "SELECT a,\n b";
|
let sql = "SELECT a,\n b";
|
||||||
let dialect = GenericDialect {};
|
let dialect = GenericDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql)
|
||||||
let tokens = tokenizer.tokenize_with_location().unwrap();
|
.tokenize_with_location()
|
||||||
|
.unwrap();
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1),
|
TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1),
|
||||||
TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7),
|
TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7),
|
||||||
|
|
|
@ -1004,11 +1004,13 @@ fn parse_select_with_date_column_name() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_escaped_single_quote_string_predicate() {
|
fn parse_escaped_single_quote_string_predicate_with_escape() {
|
||||||
use self::BinaryOperator::*;
|
use self::BinaryOperator::*;
|
||||||
let sql = "SELECT id, fname, lname FROM customer \
|
let sql = "SELECT id, fname, lname FROM customer \
|
||||||
WHERE salary <> 'Jim''s salary'";
|
WHERE salary <> 'Jim''s salary'";
|
||||||
|
|
||||||
let ast = verified_only_select(sql);
|
let ast = verified_only_select(sql);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Some(Expr::BinaryOp {
|
Some(Expr::BinaryOp {
|
||||||
left: Box::new(Expr::Identifier(Ident::new("salary"))),
|
left: Box::new(Expr::Identifier(Ident::new("salary"))),
|
||||||
|
@ -1021,6 +1023,34 @@ fn parse_escaped_single_quote_string_predicate() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_escaped_single_quote_string_predicate_with_no_escape() {
|
||||||
|
use self::BinaryOperator::*;
|
||||||
|
let sql = "SELECT id, fname, lname FROM customer \
|
||||||
|
WHERE salary <> 'Jim''s salary'";
|
||||||
|
|
||||||
|
let ast = TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: Some(
|
||||||
|
ParserOptions::new()
|
||||||
|
.with_trailing_commas(true)
|
||||||
|
.with_unescape(false),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.verified_only_select(sql);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Some(Expr::BinaryOp {
|
||||||
|
left: Box::new(Expr::Identifier(Ident::new("salary"))),
|
||||||
|
op: NotEq,
|
||||||
|
right: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||||
|
"Jim''s salary".to_string()
|
||||||
|
))),
|
||||||
|
}),
|
||||||
|
ast.selection,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_number() {
|
fn parse_number() {
|
||||||
let expr = verified_expr("1.0");
|
let expr = verified_expr("1.0");
|
||||||
|
@ -7264,9 +7294,7 @@ fn parse_non_latin_identifiers() {
|
||||||
fn parse_trailing_comma() {
|
fn parse_trailing_comma() {
|
||||||
let trailing_commas = TestedDialects {
|
let trailing_commas = TestedDialects {
|
||||||
dialects: vec![Box::new(GenericDialect {})],
|
dialects: vec![Box::new(GenericDialect {})],
|
||||||
options: Some(ParserOptions {
|
options: Some(ParserOptions::new().with_trailing_commas(true)),
|
||||||
trailing_commas: true,
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
trailing_commas.one_statement_parses_to(
|
trailing_commas.one_statement_parses_to(
|
||||||
|
|
|
@ -18,6 +18,7 @@ use sqlparser::ast::Expr;
|
||||||
use sqlparser::ast::Value;
|
use sqlparser::ast::Value;
|
||||||
use sqlparser::ast::*;
|
use sqlparser::ast::*;
|
||||||
use sqlparser::dialect::{GenericDialect, MySqlDialect};
|
use sqlparser::dialect::{GenericDialect, MySqlDialect};
|
||||||
|
use sqlparser::parser::ParserOptions;
|
||||||
use sqlparser::tokenizer::Token;
|
use sqlparser::tokenizer::Token;
|
||||||
use test_utils::*;
|
use test_utils::*;
|
||||||
|
|
||||||
|
@ -432,10 +433,14 @@ fn parse_quote_identifiers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_quote_identifiers_2() {
|
fn parse_escaped_quote_identifiers_with_escape() {
|
||||||
let sql = "SELECT `quoted `` identifier`";
|
let sql = "SELECT `quoted `` identifier`";
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
mysql().verified_stmt(sql),
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: None,
|
||||||
|
}
|
||||||
|
.verified_stmt(sql),
|
||||||
Statement::Query(Box::new(Query {
|
Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
|
@ -467,10 +472,56 @@ fn parse_quote_identifiers_2() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_quote_identifiers_3() {
|
fn parse_escaped_quote_identifiers_with_no_escape() {
|
||||||
|
let sql = "SELECT `quoted `` identifier`";
|
||||||
|
assert_eq!(
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: Some(ParserOptions {
|
||||||
|
trailing_commas: false,
|
||||||
|
unescape: false,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
.verified_stmt(sql),
|
||||||
|
Statement::Query(Box::new(Query {
|
||||||
|
with: None,
|
||||||
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
|
distinct: None,
|
||||||
|
top: None,
|
||||||
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||||
|
value: "quoted `` identifier".into(),
|
||||||
|
quote_style: Some('`'),
|
||||||
|
}))],
|
||||||
|
into: None,
|
||||||
|
from: vec![],
|
||||||
|
lateral_views: vec![],
|
||||||
|
selection: None,
|
||||||
|
group_by: vec![],
|
||||||
|
cluster_by: vec![],
|
||||||
|
distribute_by: vec![],
|
||||||
|
sort_by: vec![],
|
||||||
|
having: None,
|
||||||
|
named_window: vec![],
|
||||||
|
qualify: None
|
||||||
|
}))),
|
||||||
|
order_by: vec![],
|
||||||
|
limit: None,
|
||||||
|
offset: None,
|
||||||
|
fetch: None,
|
||||||
|
locks: vec![],
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_escaped_backticks_with_escape() {
|
||||||
let sql = "SELECT ```quoted identifier```";
|
let sql = "SELECT ```quoted identifier```";
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
mysql().verified_stmt(sql),
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: None,
|
||||||
|
}
|
||||||
|
.verified_stmt(sql),
|
||||||
Statement::Query(Box::new(Query {
|
Statement::Query(Box::new(Query {
|
||||||
with: None,
|
with: None,
|
||||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
|
@ -501,6 +552,45 @@ fn parse_quote_identifiers_3() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_escaped_backticks_with_no_escape() {
|
||||||
|
let sql = "SELECT ```quoted identifier```";
|
||||||
|
assert_eq!(
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: Some(ParserOptions::new().with_unescape(false)),
|
||||||
|
}
|
||||||
|
.verified_stmt(sql),
|
||||||
|
Statement::Query(Box::new(Query {
|
||||||
|
with: None,
|
||||||
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
|
distinct: None,
|
||||||
|
top: None,
|
||||||
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||||
|
value: "``quoted identifier``".into(),
|
||||||
|
quote_style: Some('`'),
|
||||||
|
}))],
|
||||||
|
into: None,
|
||||||
|
from: vec![],
|
||||||
|
lateral_views: vec![],
|
||||||
|
selection: None,
|
||||||
|
group_by: vec![],
|
||||||
|
cluster_by: vec![],
|
||||||
|
distribute_by: vec![],
|
||||||
|
sort_by: vec![],
|
||||||
|
having: None,
|
||||||
|
named_window: vec![],
|
||||||
|
qualify: None
|
||||||
|
}))),
|
||||||
|
order_by: vec![],
|
||||||
|
limit: None,
|
||||||
|
offset: None,
|
||||||
|
fetch: None,
|
||||||
|
locks: vec![],
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_unterminated_escape() {
|
fn parse_unterminated_escape() {
|
||||||
let sql = r#"SELECT 'I\'m not fine\'"#;
|
let sql = r#"SELECT 'I\'m not fine\'"#;
|
||||||
|
@ -513,9 +603,13 @@ fn parse_unterminated_escape() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_escaped_string() {
|
fn parse_escaped_string_with_escape() {
|
||||||
fn assert_mysql_query_value(sql: &str, quoted: &str) {
|
fn assert_mysql_query_value(sql: &str, quoted: &str) {
|
||||||
let stmt = mysql().one_statement_parses_to(sql, "");
|
let stmt = TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: None,
|
||||||
|
}
|
||||||
|
.one_statement_parses_to(sql, "");
|
||||||
|
|
||||||
match stmt {
|
match stmt {
|
||||||
Statement::Query(query) => match *query.body {
|
Statement::Query(query) => match *query.body {
|
||||||
|
@ -544,6 +638,95 @@ fn parse_escaped_string() {
|
||||||
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a ");
|
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_escaped_string_with_no_escape() {
|
||||||
|
fn assert_mysql_query_value(sql: &str, quoted: &str) {
|
||||||
|
let stmt = TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: Some(ParserOptions::new().with_unescape(false)),
|
||||||
|
}
|
||||||
|
.one_statement_parses_to(sql, "");
|
||||||
|
|
||||||
|
match stmt {
|
||||||
|
Statement::Query(query) => match *query.body {
|
||||||
|
SetExpr::Select(value) => {
|
||||||
|
let expr = expr_from_projection(only(&value.projection));
|
||||||
|
assert_eq!(
|
||||||
|
*expr,
|
||||||
|
Expr::Value(Value::SingleQuotedString(quoted.to_string()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
},
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let sql = r#"SELECT 'I\'m fine'"#;
|
||||||
|
assert_mysql_query_value(sql, r#"I\'m fine"#);
|
||||||
|
|
||||||
|
let sql = r#"SELECT 'I''m fine'"#;
|
||||||
|
assert_mysql_query_value(sql, r#"I''m fine"#);
|
||||||
|
|
||||||
|
let sql = r#"SELECT 'I\"m fine'"#;
|
||||||
|
assert_mysql_query_value(sql, r#"I\"m fine"#);
|
||||||
|
|
||||||
|
let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#;
|
||||||
|
assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_roundtrip_of_escaped_string() {
|
||||||
|
let options = Some(ParserOptions::new().with_unescape(false));
|
||||||
|
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT 'I\'m fine'"#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT 'I''m fine'"#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT 'I\\\'m fine'"#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT 'I\\\'m fine'"#);
|
||||||
|
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT "I\"m fine""#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT "I""m fine""#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT "I\\\"m fine""#);
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options: options.clone(),
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT "I\\\"m fine""#);
|
||||||
|
|
||||||
|
TestedDialects {
|
||||||
|
dialects: vec![Box::new(MySqlDialect {})],
|
||||||
|
options,
|
||||||
|
}
|
||||||
|
.verified_stmt(r#"SELECT "I'm ''fine''""#);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_create_table_with_minimum_display_width() {
|
fn parse_create_table_with_minimum_display_width() {
|
||||||
let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))";
|
let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))";
|
||||||
|
|
|
@ -55,8 +55,7 @@ fn test_snowflake_create_transient_table() {
|
||||||
fn test_snowflake_single_line_tokenize() {
|
fn test_snowflake_single_line_tokenize() {
|
||||||
let sql = "CREATE TABLE# this is a comment \ntable_1";
|
let sql = "CREATE TABLE# this is a comment \ntable_1";
|
||||||
let dialect = SnowflakeDialect {};
|
let dialect = SnowflakeDialect {};
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("CREATE"),
|
Token::make_keyword("CREATE"),
|
||||||
|
@ -72,8 +71,7 @@ fn test_snowflake_single_line_tokenize() {
|
||||||
assert_eq!(expected, tokens);
|
assert_eq!(expected, tokens);
|
||||||
|
|
||||||
let sql = "CREATE TABLE // this is a comment \ntable_1";
|
let sql = "CREATE TABLE // this is a comment \ntable_1";
|
||||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||||
let tokens = tokenizer.tokenize().unwrap();
|
|
||||||
|
|
||||||
let expected = vec![
|
let expected = vec![
|
||||||
Token::make_keyword("CREATE"),
|
Token::make_keyword("CREATE"),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue