fix: Handle double quotes inside quoted identifiers correctly (#411)

* fix: Handle double quotes inside quoted identifiers correctly This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they handle this differently. May need more extensive testing as well. * refactor: Make quoted identifier parsing a seperate function * test: Check that quoted identifier tokenization works Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable * test: Check that quoted identifiers work in mysql * chore: cargo clippy
2025-08-17 12:40:17 +00:00 · 2022-02-07 16:05:17 +01:00 · 2022-02-07 16:05:17 +01:00 · 34fedf311d
commit 34fedf311d
parent 2614576dbf
6 changed files with 94 additions and 4 deletions
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
                quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
                    chars.next(); // consume the opening quote
                    let quote_end = Word::matching_end_quote(quote_start);
-                    let s = peeking_take_while(chars, |ch| ch != quote_end);
-                    if chars.next() == Some(quote_end) {
+                    let (s, last_char) = parse_quoted_ident(chars, quote_end);
+
+                    if last_char == Some(quote_end) {
                        Ok(Some(Token::make_word(&s, Some(quote_start))))
                    } else {
                        self.tokenizer_error(format!(
@ -728,6 +729,25 @@ fn peeking_take_while(
    s
 }

+fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
+    let mut last_char = None;
+    let mut s = String::new();
+    while let Some(ch) = chars.next() {
+        if ch == quote_end {
+            if chars.peek() == Some(&quote_end) {
+                chars.next();
+                s.push(ch);
+            } else {
+                last_char = Some(quote_end);
+                break;
+            }
+        } else {
+            s.push(ch);
+        }
+    }
+    (s, last_char)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -1276,6 +1296,24 @@ mod tests {
        compare(expected, tokens);
    }

+    #[test]
+    fn tokenize_quoted_identifier() {
+        let sql = r#" "a "" b" "a """ "c """"" "#;
+        let dialect = GenericDialect {};
+        let mut tokenizer = Tokenizer::new(&dialect, sql);
+        let tokens = tokenizer.tokenize().unwrap();
+        let expected = vec![
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"a " b"#, Some('"')),
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"a ""#, Some('"')),
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"c """#, Some('"')),
+            Token::Whitespace(Whitespace::Space),
+        ];
+        compare(expected, tokens);
+    }
+
    fn compare(expected: Vec<Token>, actual: Vec<Token>) {
        //println!("------------------------------");
        //println!("tokens   = {:?}", actual);