mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-04 06:18:17 +00:00
fix: Handle double quotes inside quoted identifiers correctly (#411)
* fix: Handle double quotes inside quoted identifiers correctly This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they handle this differently. May need more extensive testing as well. * refactor: Make quoted identifier parsing a seperate function * test: Check that quoted identifier tokenization works Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable * test: Check that quoted identifiers work in mysql * chore: cargo clippy
This commit is contained in:
parent
2614576dbf
commit
34fedf311d
6 changed files with 94 additions and 4 deletions
|
@ -36,6 +36,7 @@ serde_json = { version = "1.0", optional = true }
|
|||
[dev-dependencies]
|
||||
simple_logger = "2.1"
|
||||
matches = "0.1"
|
||||
pretty_assertions = "1"
|
||||
|
||||
[package.metadata.release]
|
||||
# Instruct `cargo release` to not run `cargo publish` locally:
|
||||
|
|
|
@ -23,7 +23,7 @@ use alloc::{
|
|||
string::{String, ToString},
|
||||
vec::Vec,
|
||||
};
|
||||
use core::fmt;
|
||||
use core::fmt::{self, Write};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -127,7 +127,18 @@ impl From<&str> for Ident {
|
|||
impl fmt::Display for Ident {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.quote_style {
|
||||
Some(q) if q == '"' || q == '\'' || q == '`' => write!(f, "{}{}{}", q, self.value, q),
|
||||
Some(q) if q == '"' || q == '\'' || q == '`' => {
|
||||
f.write_char(q)?;
|
||||
let mut first = true;
|
||||
for s in self.value.split_inclusive(q) {
|
||||
if !first {
|
||||
f.write_char(q)?;
|
||||
}
|
||||
first = false;
|
||||
f.write_str(s)?;
|
||||
}
|
||||
f.write_char(q)
|
||||
}
|
||||
Some(q) if q == '[' => write!(f, "[{}]", self.value),
|
||||
None => f.write_str(&self.value),
|
||||
_ => panic!("unexpected quote style"),
|
||||
|
|
|
@ -39,6 +39,10 @@
|
|||
#[cfg(not(feature = "std"))]
|
||||
extern crate alloc;
|
||||
|
||||
#[macro_use]
|
||||
#[cfg(test)]
|
||||
extern crate pretty_assertions;
|
||||
|
||||
pub mod ast;
|
||||
#[macro_use]
|
||||
pub mod dialect;
|
||||
|
|
|
@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
|
|||
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
||||
chars.next(); // consume the opening quote
|
||||
let quote_end = Word::matching_end_quote(quote_start);
|
||||
let s = peeking_take_while(chars, |ch| ch != quote_end);
|
||||
if chars.next() == Some(quote_end) {
|
||||
let (s, last_char) = parse_quoted_ident(chars, quote_end);
|
||||
|
||||
if last_char == Some(quote_end) {
|
||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
||||
} else {
|
||||
self.tokenizer_error(format!(
|
||||
|
@ -728,6 +729,25 @@ fn peeking_take_while(
|
|||
s
|
||||
}
|
||||
|
||||
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
|
||||
let mut last_char = None;
|
||||
let mut s = String::new();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == quote_end {
|
||||
if chars.peek() == Some("e_end) {
|
||||
chars.next();
|
||||
s.push(ch);
|
||||
} else {
|
||||
last_char = Some(quote_end);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
s.push(ch);
|
||||
}
|
||||
}
|
||||
(s, last_char)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -1276,6 +1296,24 @@ mod tests {
|
|||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_quoted_identifier() {
|
||||
let sql = r#" "a "" b" "a """ "c """"" "#;
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"a " b"#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"a ""#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"c """#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||
//println!("------------------------------");
|
||||
//println!("tokens = {:?}", actual);
|
||||
|
|
|
@ -178,6 +178,37 @@ fn parse_quote_identifiers() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_quote_identifiers_2() {
|
||||
let sql = "SELECT `quoted `` identifier`";
|
||||
assert_eq!(
|
||||
mysql().verified_stmt(sql),
|
||||
Statement::Query(Box::new(Query {
|
||||
with: None,
|
||||
body: SetExpr::Select(Box::new(Select {
|
||||
distinct: false,
|
||||
top: None,
|
||||
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||
value: "quoted ` identifier".into(),
|
||||
quote_style: Some('`'),
|
||||
}))],
|
||||
from: vec![],
|
||||
lateral_views: vec![],
|
||||
selection: None,
|
||||
group_by: vec![],
|
||||
cluster_by: vec![],
|
||||
distribute_by: vec![],
|
||||
sort_by: vec![],
|
||||
having: None,
|
||||
})),
|
||||
order_by: vec![],
|
||||
limit: None,
|
||||
offset: None,
|
||||
fetch: None,
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unterminated_escape() {
|
||||
let sql = r#"SELECT 'I\'m not fine\'"#;
|
||||
|
|
|
@ -891,6 +891,11 @@ fn parse_comments() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_quoted_identifier() {
|
||||
pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#);
|
||||
}
|
||||
|
||||
fn pg() -> TestedDialects {
|
||||
TestedDialects {
|
||||
dialects: vec![Box::new(PostgreSqlDialect {})],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue