mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Correctly look for end delimiter dollar quoted string (#1650)
This commit is contained in:
parent
3b4dc0f227
commit
c808c4e4fd
1 changed files with 129 additions and 47 deletions
176
src/tokenizer.rs
176
src/tokenizer.rs
|
@ -1566,46 +1566,33 @@ impl<'a> Tokenizer<'a> {
|
|||
if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
|
||||
chars.next();
|
||||
|
||||
'searching_for_end: loop {
|
||||
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
|
||||
match chars.peek() {
|
||||
Some('$') => {
|
||||
chars.next();
|
||||
let mut maybe_s = String::from("$");
|
||||
for c in value.chars() {
|
||||
if let Some(next_char) = chars.next() {
|
||||
maybe_s.push(next_char);
|
||||
if next_char != c {
|
||||
// This doesn't match the dollar quote delimiter so this
|
||||
// is not the end of the string.
|
||||
s.push_str(&maybe_s);
|
||||
continue 'searching_for_end;
|
||||
}
|
||||
} else {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
);
|
||||
let mut temp = String::new();
|
||||
let end_delimiter = format!("${}$", value);
|
||||
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some(ch) => {
|
||||
temp.push(ch);
|
||||
|
||||
if temp.ends_with(&end_delimiter) {
|
||||
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
|
||||
s.push_str(temp);
|
||||
}
|
||||
}
|
||||
if chars.peek() == Some(&'$') {
|
||||
chars.next();
|
||||
maybe_s.push('$');
|
||||
// maybe_s matches the end delimiter
|
||||
break 'searching_for_end;
|
||||
} else {
|
||||
// This also doesn't match the dollar quote delimiter as there are
|
||||
// more characters before the second dollar so this is not the end
|
||||
// of the string.
|
||||
s.push_str(&maybe_s);
|
||||
continue 'searching_for_end;
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
None => {
|
||||
if temp.ends_with(&end_delimiter) {
|
||||
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
|
||||
s.push_str(temp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2569,20 +2556,67 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_tagged() {
|
||||
let sql = String::from(
|
||||
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
|
||||
);
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
|
||||
tag: Some("tag".into()),
|
||||
}),
|
||||
let test_cases = vec![
|
||||
(
|
||||
String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
|
||||
tag: Some("tag".into()),
|
||||
})
|
||||
]
|
||||
),
|
||||
(
|
||||
String::from("SELECT $abc$x$ab$abc$"),
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "x$ab".into(),
|
||||
tag: Some("abc".into()),
|
||||
})
|
||||
]
|
||||
),
|
||||
(
|
||||
String::from("SELECT $abc$$abc$"),
|
||||
vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "".into(),
|
||||
tag: Some("abc".into()),
|
||||
})
|
||||
]
|
||||
),
|
||||
(
|
||||
String::from("0$abc$$abc$1"),
|
||||
vec![
|
||||
Token::Number("0".into(), false),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "".into(),
|
||||
tag: Some("abc".into()),
|
||||
}),
|
||||
Token::Number("1".into(), false),
|
||||
]
|
||||
),
|
||||
(
|
||||
String::from("$function$abc$q$data$q$$function$"),
|
||||
vec![
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "abc$q$data$q$".into(),
|
||||
tag: Some("function".into()),
|
||||
}),
|
||||
]
|
||||
),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
for (sql, expected) in test_cases {
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
compare(expected, tokens);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -2601,6 +2635,22 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
|
||||
let sql = String::from("SELECT $abc$abc$");
|
||||
let dialect = GenericDialect {};
|
||||
assert_eq!(
|
||||
Tokenizer::new(&dialect, &sql).tokenize(),
|
||||
Err(TokenizerError {
|
||||
message: "Unterminated dollar-quoted, expected $".into(),
|
||||
location: Location {
|
||||
line: 1,
|
||||
column: 17
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_placeholder() {
|
||||
let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
|
||||
|
@ -2625,6 +2675,38 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_nested_dollar_quoted_strings() {
|
||||
let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "dollar $nested$ string".into(),
|
||||
tag: Some("tag".into()),
|
||||
}),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_untagged_empty() {
|
||||
let sql = String::from("SELECT $$$$");
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "".into(),
|
||||
tag: None,
|
||||
}),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_untagged() {
|
||||
let sql =
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue