mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 19:27:21 +00:00
Fix dollar quoted string tokenizer (#1193)
This commit is contained in:
parent
a0ed14ce02
commit
e5c860213b
1 changed files with 105 additions and 25 deletions
130
src/tokenizer.rs
130
src/tokenizer.rs
|
@ -1119,38 +1119,49 @@ impl<'a> Tokenizer<'a> {
|
|||
|
||||
if let Some('$') = chars.peek() {
|
||||
chars.next();
|
||||
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
|
||||
|
||||
match chars.peek() {
|
||||
Some('$') => {
|
||||
chars.next();
|
||||
for c in value.chars() {
|
||||
let next_char = chars.next();
|
||||
if Some(c) != next_char {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
format!(
|
||||
"Unterminated dollar-quoted string at or near \"{value}\""
|
||||
),
|
||||
);
|
||||
'searching_for_end: loop {
|
||||
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
|
||||
match chars.peek() {
|
||||
Some('$') => {
|
||||
chars.next();
|
||||
let mut maybe_s = String::from("$");
|
||||
for c in value.chars() {
|
||||
if let Some(next_char) = chars.next() {
|
||||
maybe_s.push(next_char);
|
||||
if next_char != c {
|
||||
// This doesn't match the dollar quote delimiter so this
|
||||
// is not the end of the string.
|
||||
s.push_str(&maybe_s);
|
||||
continue 'searching_for_end;
|
||||
}
|
||||
} else {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
);
|
||||
}
|
||||
}
|
||||
if chars.peek() == Some(&'$') {
|
||||
chars.next();
|
||||
maybe_s.push('$');
|
||||
// maybe_s matches the end delimiter
|
||||
break 'searching_for_end;
|
||||
} else {
|
||||
// This also doesn't match the dollar quote delimiter as there are
|
||||
// more characters before the second dollar so this is not the end
|
||||
// of the string.
|
||||
s.push_str(&maybe_s);
|
||||
continue 'searching_for_end;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some('$') = chars.peek() {
|
||||
chars.next();
|
||||
} else {
|
||||
_ => {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted string, expected $",
|
||||
);
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Ok(Token::Placeholder(String::from("$") + &value));
|
||||
|
@ -1906,6 +1917,75 @@ mod tests {
|
|||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_tagged() {
|
||||
let sql = String::from(
|
||||
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
|
||||
);
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
|
||||
tag: Some("tag".into()),
|
||||
}),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_tagged_unterminated() {
|
||||
let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$");
|
||||
let dialect = GenericDialect {};
|
||||
assert_eq!(
|
||||
Tokenizer::new(&dialect, &sql).tokenize(),
|
||||
Err(TokenizerError {
|
||||
message: "Unterminated dollar-quoted, expected $".into(),
|
||||
location: Location {
|
||||
line: 1,
|
||||
column: 91
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_untagged() {
|
||||
let sql =
|
||||
String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$");
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::DollarQuotedString(DollarQuotedString {
|
||||
value: "within dollar '$' quoted strings have $tags like this$ ".into(),
|
||||
tag: None,
|
||||
}),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dollar_quoted_string_untagged_unterminated() {
|
||||
let sql = String::from(
|
||||
"SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$",
|
||||
);
|
||||
let dialect = GenericDialect {};
|
||||
assert_eq!(
|
||||
Tokenizer::new(&dialect, &sql).tokenize(),
|
||||
Err(TokenizerError {
|
||||
message: "Unterminated dollar-quoted string".into(),
|
||||
location: Location {
|
||||
line: 1,
|
||||
column: 86
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_right_arrow() {
|
||||
let sql = String::from("FUNCTION(key=>value)");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue