fix: Handle double quotes inside quoted identifiers correctly (#411)

* fix: Handle double quotes inside quoted identifiers correctly

This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they
handle this differently. May need more extensive testing as well.

* refactor: Make quoted identifier parsing a seperate function

* test: Check that quoted identifier tokenization works

Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable

* test: Check that quoted identifiers work in mysql

* chore: cargo clippy
This commit is contained in:
Markus Westerlind 2022-02-07 16:05:17 +01:00 committed by GitHub
parent 2614576dbf
commit 34fedf311d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 94 additions and 4 deletions

View file

@ -36,6 +36,7 @@ serde_json = { version = "1.0", optional = true }
[dev-dependencies]
simple_logger = "2.1"
matches = "0.1"
pretty_assertions = "1"
[package.metadata.release]
# Instruct `cargo release` to not run `cargo publish` locally:

View file

@ -23,7 +23,7 @@ use alloc::{
string::{String, ToString},
vec::Vec,
};
use core::fmt;
use core::fmt::{self, Write};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -127,7 +127,18 @@ impl From<&str> for Ident {
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.quote_style {
Some(q) if q == '"' || q == '\'' || q == '`' => write!(f, "{}{}{}", q, self.value, q),
Some(q) if q == '"' || q == '\'' || q == '`' => {
f.write_char(q)?;
let mut first = true;
for s in self.value.split_inclusive(q) {
if !first {
f.write_char(q)?;
}
first = false;
f.write_str(s)?;
}
f.write_char(q)
}
Some(q) if q == '[' => write!(f, "[{}]", self.value),
None => f.write_str(&self.value),
_ => panic!("unexpected quote style"),

View file

@ -39,6 +39,10 @@
#[cfg(not(feature = "std"))]
extern crate alloc;
#[macro_use]
#[cfg(test)]
extern crate pretty_assertions;
pub mod ast;
#[macro_use]
pub mod dialect;

View file

@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let s = peeking_take_while(chars, |ch| ch != quote_end);
if chars.next() == Some(quote_end) {
let (s, last_char) = parse_quoted_ident(chars, quote_end);
if last_char == Some(quote_end) {
Ok(Some(Token::make_word(&s, Some(quote_start))))
} else {
self.tokenizer_error(format!(
@ -728,6 +729,25 @@ fn peeking_take_while(
s
}
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
let mut last_char = None;
let mut s = String::new();
while let Some(ch) = chars.next() {
if ch == quote_end {
if chars.peek() == Some(&quote_end) {
chars.next();
s.push(ch);
} else {
last_char = Some(quote_end);
break;
}
} else {
s.push(ch);
}
}
(s, last_char)
}
#[cfg(test)]
mod tests {
use super::*;
@ -1276,6 +1296,24 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_quoted_identifier() {
let sql = r#" "a "" b" "a """ "c """"" "#;
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"a " b"#, Some('"')),
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"a ""#, Some('"')),
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"c """#, Some('"')),
Token::Whitespace(Whitespace::Space),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);

View file

@ -178,6 +178,37 @@ fn parse_quote_identifiers() {
}
}
#[test]
fn parse_quote_identifiers_2() {
let sql = "SELECT `quoted `` identifier`";
assert_eq!(
mysql().verified_stmt(sql),
Statement::Query(Box::new(Query {
with: None,
body: SetExpr::Select(Box::new(Select {
distinct: false,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "quoted ` identifier".into(),
quote_style: Some('`'),
}))],
from: vec![],
lateral_views: vec![],
selection: None,
group_by: vec![],
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
})),
order_by: vec![],
limit: None,
offset: None,
fetch: None,
}))
);
}
#[test]
fn parse_unterminated_escape() {
let sql = r#"SELECT 'I\'m not fine\'"#;

View file

@ -891,6 +891,11 @@ fn parse_comments() {
}
}
#[test]
fn parse_quoted_identifier() {
pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#);
}
fn pg() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {})],