From e5c860213b1d1c73a0090f4ca16023cbdbf81b58 Mon Sep 17 00:00:00 2001 From: ZacJW Date: Fri, 12 Apr 2024 11:38:04 +0100 Subject: [PATCH] Fix dollar quoted string tokenizer (#1193) --- src/tokenizer.rs | 130 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 25 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1ceec705..b239d990 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1119,38 +1119,49 @@ impl<'a> Tokenizer<'a> { if let Some('$') = chars.peek() { chars.next(); - s.push_str(&peeking_take_while(chars, |ch| ch != '$')); - match chars.peek() { - Some('$') => { - chars.next(); - for c in value.chars() { - let next_char = chars.next(); - if Some(c) != next_char { - return self.tokenizer_error( - chars.location(), - format!( - "Unterminated dollar-quoted string at or near \"{value}\"" - ), - ); + 'searching_for_end: loop { + s.push_str(&peeking_take_while(chars, |ch| ch != '$')); + match chars.peek() { + Some('$') => { + chars.next(); + let mut maybe_s = String::from("$"); + for c in value.chars() { + if let Some(next_char) = chars.next() { + maybe_s.push(next_char); + if next_char != c { + // This doesn't match the dollar quote delimiter so this + // is not the end of the string. + s.push_str(&maybe_s); + continue 'searching_for_end; + } + } else { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted, expected $", + ); + } + } + if chars.peek() == Some(&'$') { + chars.next(); + maybe_s.push('$'); + // maybe_s matches the end delimiter + break 'searching_for_end; + } else { + // This also doesn't match the dollar quote delimiter as there are + // more characters before the second dollar so this is not the end + // of the string. + s.push_str(&maybe_s); + continue 'searching_for_end; } } - - if let Some('$') = chars.peek() { - chars.next(); - } else { + _ => { return self.tokenizer_error( chars.location(), - "Unterminated dollar-quoted string, expected $", - ); + "Unterminated dollar-quoted, expected $", + ) } } - _ => { - return self.tokenizer_error( - chars.location(), - "Unterminated dollar-quoted, expected $", - ); - } } } else { return Ok(Token::Placeholder(String::from("$") + &value)); @@ -1906,6 +1917,75 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_dollar_quoted_string_tagged() { + let sql = String::from( + "SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$", + ); + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::DollarQuotedString(DollarQuotedString { + value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(), + tag: Some("tag".into()), + }), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_dollar_quoted_string_tagged_unterminated() { + let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$"); + let dialect = GenericDialect {}; + assert_eq!( + Tokenizer::new(&dialect, &sql).tokenize(), + Err(TokenizerError { + message: "Unterminated dollar-quoted, expected $".into(), + location: Location { + line: 1, + column: 91 + } + }) + ); + } + + #[test] + fn tokenize_dollar_quoted_string_untagged() { + let sql = + String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$"); + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::DollarQuotedString(DollarQuotedString { + value: "within dollar '$' quoted strings have $tags like this$ ".into(), + tag: None, + }), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_dollar_quoted_string_untagged_unterminated() { + let sql = String::from( + "SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$", + ); + let dialect = GenericDialect {}; + assert_eq!( + Tokenizer::new(&dialect, &sql).tokenize(), + Err(TokenizerError { + message: "Unterminated dollar-quoted string".into(), + location: Location { + line: 1, + column: 86 + } + }) + ); + } + #[test] fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)");