fix: Handle double quotes inside quoted identifiers correctly (#411)

* fix: Handle double quotes inside quoted identifiers correctly

This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they
handle this differently. May need more extensive testing as well.

* refactor: Make quoted identifier parsing a seperate function

* test: Check that quoted identifier tokenization works

Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable

* test: Check that quoted identifiers work in mysql

* chore: cargo clippy
This commit is contained in:
Markus Westerlind 2022-02-07 16:05:17 +01:00 committed by GitHub
parent 2614576dbf
commit 34fedf311d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 94 additions and 4 deletions

View file

@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let s = peeking_take_while(chars, |ch| ch != quote_end);
if chars.next() == Some(quote_end) {
let (s, last_char) = parse_quoted_ident(chars, quote_end);
if last_char == Some(quote_end) {
Ok(Some(Token::make_word(&s, Some(quote_start))))
} else {
self.tokenizer_error(format!(
@ -728,6 +729,25 @@ fn peeking_take_while(
s
}
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
let mut last_char = None;
let mut s = String::new();
while let Some(ch) = chars.next() {
if ch == quote_end {
if chars.peek() == Some(&quote_end) {
chars.next();
s.push(ch);
} else {
last_char = Some(quote_end);
break;
}
} else {
s.push(ch);
}
}
(s, last_char)
}
#[cfg(test)]
mod tests {
use super::*;
@ -1276,6 +1296,24 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_quoted_identifier() {
let sql = r#" "a "" b" "a """ "c """"" "#;
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"a " b"#, Some('"')),
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"a ""#, Some('"')),
Token::Whitespace(Whitespace::Space),
Token::make_word(r#"c """#, Some('"')),
Token::Whitespace(Whitespace::Space),
];
compare(expected, tokens);
}
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);