mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-17 12:40:17 +00:00
fix: Handle double quotes inside quoted identifiers correctly (#411)
* fix: Handle double quotes inside quoted identifiers correctly This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they handle this differently. May need more extensive testing as well. * refactor: Make quoted identifier parsing a seperate function * test: Check that quoted identifier tokenization works Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable * test: Check that quoted identifiers work in mysql * chore: cargo clippy
This commit is contained in:
parent
2614576dbf
commit
34fedf311d
6 changed files with 94 additions and 4 deletions
|
@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
|
|||
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
||||
chars.next(); // consume the opening quote
|
||||
let quote_end = Word::matching_end_quote(quote_start);
|
||||
let s = peeking_take_while(chars, |ch| ch != quote_end);
|
||||
if chars.next() == Some(quote_end) {
|
||||
let (s, last_char) = parse_quoted_ident(chars, quote_end);
|
||||
|
||||
if last_char == Some(quote_end) {
|
||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
||||
} else {
|
||||
self.tokenizer_error(format!(
|
||||
|
@ -728,6 +729,25 @@ fn peeking_take_while(
|
|||
s
|
||||
}
|
||||
|
||||
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
|
||||
let mut last_char = None;
|
||||
let mut s = String::new();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == quote_end {
|
||||
if chars.peek() == Some("e_end) {
|
||||
chars.next();
|
||||
s.push(ch);
|
||||
} else {
|
||||
last_char = Some(quote_end);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
s.push(ch);
|
||||
}
|
||||
}
|
||||
(s, last_char)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -1276,6 +1296,24 @@ mod tests {
|
|||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_quoted_identifier() {
|
||||
let sql = r#" "a "" b" "a """ "c """"" "#;
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, sql);
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"a " b"#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"a ""#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word(r#"c """#, Some('"')),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||
//println!("------------------------------");
|
||||
//println!("tokens = {:?}", actual);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue