mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-22 13:42:31 +00:00
fix: Handle double quotes inside quoted identifiers correctly (#411)
* fix: Handle double quotes inside quoted identifiers correctly This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they handle this differently. May need more extensive testing as well. * refactor: Make quoted identifier parsing a seperate function * test: Check that quoted identifier tokenization works Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable * test: Check that quoted identifiers work in mysql * chore: cargo clippy
This commit is contained in:
parent
2614576dbf
commit
34fedf311d
6 changed files with 94 additions and 4 deletions
|
@ -36,6 +36,7 @@ serde_json = { version = "1.0", optional = true }
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
simple_logger = "2.1"
|
simple_logger = "2.1"
|
||||||
matches = "0.1"
|
matches = "0.1"
|
||||||
|
pretty_assertions = "1"
|
||||||
|
|
||||||
[package.metadata.release]
|
[package.metadata.release]
|
||||||
# Instruct `cargo release` to not run `cargo publish` locally:
|
# Instruct `cargo release` to not run `cargo publish` locally:
|
||||||
|
|
|
@ -23,7 +23,7 @@ use alloc::{
|
||||||
string::{String, ToString},
|
string::{String, ToString},
|
||||||
vec::Vec,
|
vec::Vec,
|
||||||
};
|
};
|
||||||
use core::fmt;
|
use core::fmt::{self, Write};
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
@ -127,7 +127,18 @@ impl From<&str> for Ident {
|
||||||
impl fmt::Display for Ident {
|
impl fmt::Display for Ident {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self.quote_style {
|
match self.quote_style {
|
||||||
Some(q) if q == '"' || q == '\'' || q == '`' => write!(f, "{}{}{}", q, self.value, q),
|
Some(q) if q == '"' || q == '\'' || q == '`' => {
|
||||||
|
f.write_char(q)?;
|
||||||
|
let mut first = true;
|
||||||
|
for s in self.value.split_inclusive(q) {
|
||||||
|
if !first {
|
||||||
|
f.write_char(q)?;
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
f.write_str(s)?;
|
||||||
|
}
|
||||||
|
f.write_char(q)
|
||||||
|
}
|
||||||
Some(q) if q == '[' => write!(f, "[{}]", self.value),
|
Some(q) if q == '[' => write!(f, "[{}]", self.value),
|
||||||
None => f.write_str(&self.value),
|
None => f.write_str(&self.value),
|
||||||
_ => panic!("unexpected quote style"),
|
_ => panic!("unexpected quote style"),
|
||||||
|
|
|
@ -39,6 +39,10 @@
|
||||||
#[cfg(not(feature = "std"))]
|
#[cfg(not(feature = "std"))]
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
#[cfg(test)]
|
||||||
|
extern crate pretty_assertions;
|
||||||
|
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod dialect;
|
pub mod dialect;
|
||||||
|
|
|
@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
|
||||||
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
||||||
chars.next(); // consume the opening quote
|
chars.next(); // consume the opening quote
|
||||||
let quote_end = Word::matching_end_quote(quote_start);
|
let quote_end = Word::matching_end_quote(quote_start);
|
||||||
let s = peeking_take_while(chars, |ch| ch != quote_end);
|
let (s, last_char) = parse_quoted_ident(chars, quote_end);
|
||||||
if chars.next() == Some(quote_end) {
|
|
||||||
|
if last_char == Some(quote_end) {
|
||||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
||||||
} else {
|
} else {
|
||||||
self.tokenizer_error(format!(
|
self.tokenizer_error(format!(
|
||||||
|
@ -728,6 +729,25 @@ fn peeking_take_while(
|
||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
|
||||||
|
let mut last_char = None;
|
||||||
|
let mut s = String::new();
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
if ch == quote_end {
|
||||||
|
if chars.peek() == Some("e_end) {
|
||||||
|
chars.next();
|
||||||
|
s.push(ch);
|
||||||
|
} else {
|
||||||
|
last_char = Some(quote_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(s, last_char)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -1276,6 +1296,24 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_quoted_identifier() {
|
||||||
|
let sql = r#" "a "" b" "a """ "c """"" "#;
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"a " b"#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"a ""#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::make_word(r#"c """#, Some('"')),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
];
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||||
//println!("------------------------------");
|
//println!("------------------------------");
|
||||||
//println!("tokens = {:?}", actual);
|
//println!("tokens = {:?}", actual);
|
||||||
|
|
|
@ -178,6 +178,37 @@ fn parse_quote_identifiers() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_quote_identifiers_2() {
|
||||||
|
let sql = "SELECT `quoted `` identifier`";
|
||||||
|
assert_eq!(
|
||||||
|
mysql().verified_stmt(sql),
|
||||||
|
Statement::Query(Box::new(Query {
|
||||||
|
with: None,
|
||||||
|
body: SetExpr::Select(Box::new(Select {
|
||||||
|
distinct: false,
|
||||||
|
top: None,
|
||||||
|
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||||
|
value: "quoted ` identifier".into(),
|
||||||
|
quote_style: Some('`'),
|
||||||
|
}))],
|
||||||
|
from: vec![],
|
||||||
|
lateral_views: vec![],
|
||||||
|
selection: None,
|
||||||
|
group_by: vec![],
|
||||||
|
cluster_by: vec![],
|
||||||
|
distribute_by: vec![],
|
||||||
|
sort_by: vec![],
|
||||||
|
having: None,
|
||||||
|
})),
|
||||||
|
order_by: vec![],
|
||||||
|
limit: None,
|
||||||
|
offset: None,
|
||||||
|
fetch: None,
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_unterminated_escape() {
|
fn parse_unterminated_escape() {
|
||||||
let sql = r#"SELECT 'I\'m not fine\'"#;
|
let sql = r#"SELECT 'I\'m not fine\'"#;
|
||||||
|
|
|
@ -891,6 +891,11 @@ fn parse_comments() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_quoted_identifier() {
|
||||||
|
pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#);
|
||||||
|
}
|
||||||
|
|
||||||
fn pg() -> TestedDialects {
|
fn pg() -> TestedDialects {
|
||||||
TestedDialects {
|
TestedDialects {
|
||||||
dialects: vec![Box::new(PostgreSqlDialect {})],
|
dialects: vec![Box::new(PostgreSqlDialect {})],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue