mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-16 12:10:15 +00:00
feat: dollar-quoted strings support (#772)
* feat: support dollar-quoted strings * remove comment * unused code * removed debugging * added tests * fmt * clippy * updated tests
This commit is contained in:
parent
0c9ec40082
commit
072ccc0d76
5 changed files with 210 additions and 33 deletions
111
src/tokenizer.rs
111
src/tokenizer.rs
|
@ -34,6 +34,7 @@ use serde::{Deserialize, Serialize};
|
|||
#[cfg(feature = "visitor")]
|
||||
use sqlparser_derive::Visit;
|
||||
|
||||
use crate::ast::DollarQuotedString;
|
||||
use crate::dialect::SnowflakeDialect;
|
||||
use crate::dialect::{Dialect, MySqlDialect};
|
||||
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||
|
@ -55,6 +56,8 @@ pub enum Token {
|
|||
SingleQuotedString(String),
|
||||
/// Double quoted string: i.e: "string"
|
||||
DoubleQuotedString(String),
|
||||
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
|
||||
DollarQuotedString(DollarQuotedString),
|
||||
/// "National" string literal: i.e: N'string'
|
||||
NationalStringLiteral(String),
|
||||
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
||||
|
@ -149,8 +152,6 @@ pub enum Token {
|
|||
PGCubeRoot,
|
||||
/// `?` or `$` , a prepared statement arg placeholder
|
||||
Placeholder(String),
|
||||
/// `$$`, used for PostgreSQL create function definition
|
||||
DoubleDollarQuoting,
|
||||
/// ->, used as a operator to extract json field in PostgreSQL
|
||||
Arrow,
|
||||
/// ->>, used as a operator to extract json field as text in PostgreSQL
|
||||
|
@ -184,6 +185,7 @@ impl fmt::Display for Token {
|
|||
Token::Char(ref c) => write!(f, "{}", c),
|
||||
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
|
||||
Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s),
|
||||
Token::DollarQuotedString(ref s) => write!(f, "{}", s),
|
||||
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
|
||||
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
|
||||
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
|
||||
|
@ -236,7 +238,6 @@ impl fmt::Display for Token {
|
|||
Token::HashArrow => write!(f, "#>"),
|
||||
Token::HashLongArrow => write!(f, "#>>"),
|
||||
Token::AtArrow => write!(f, "@>"),
|
||||
Token::DoubleDollarQuoting => write!(f, "$$"),
|
||||
Token::ArrowAt => write!(f, "<@"),
|
||||
Token::HashMinus => write!(f, "#-"),
|
||||
Token::AtQuestion => write!(f, "@?"),
|
||||
|
@ -837,17 +838,8 @@ impl<'a> Tokenizer<'a> {
|
|||
let s = peeking_take_while(chars, |ch| ch.is_numeric());
|
||||
Ok(Some(Token::Placeholder(String::from("?") + &s)))
|
||||
}
|
||||
'$' => {
|
||||
chars.next();
|
||||
match chars.peek() {
|
||||
Some('$') => self.consume_and_return(chars, Token::DoubleDollarQuoting),
|
||||
_ => {
|
||||
let s =
|
||||
peeking_take_while(chars, |ch| ch.is_alphanumeric() || ch == '_');
|
||||
Ok(Some(Token::Placeholder(String::from("$") + &s)))
|
||||
}
|
||||
}
|
||||
}
|
||||
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
|
||||
|
||||
//whitespace check (including unicode chars) should be last as it covers some of the chars above
|
||||
ch if ch.is_whitespace() => {
|
||||
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
|
||||
|
@ -858,6 +850,97 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tokenize dollar preceded value (i.e: a string/placeholder)
|
||||
fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
|
||||
let mut s = String::new();
|
||||
let mut value = String::new();
|
||||
|
||||
chars.next();
|
||||
|
||||
if let Some('$') = chars.peek() {
|
||||
chars.next();
|
||||
|
||||
let mut is_terminated = false;
|
||||
let mut prev: Option<char> = None;
|
||||
|
||||
while let Some(&ch) = chars.peek() {
|
||||
if prev == Some('$') {
|
||||
if ch == '$' {
|
||||
chars.next();
|
||||
is_terminated = true;
|
||||
break;
|
||||
} else {
|
||||
s.push('$');
|
||||
s.push(ch);
|
||||
}
|
||||
} else if ch != '$' {
|
||||
s.push(ch);
|
||||
}
|
||||
|
||||
prev = Some(ch);
|
||||
chars.next();
|
||||
}
|
||||
|
||||
return if chars.peek().is_none() && !is_terminated {
|
||||
self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
|
||||
} else {
|
||||
Ok(Token::DollarQuotedString(DollarQuotedString {
|
||||
value: s,
|
||||
tag: None,
|
||||
}))
|
||||
};
|
||||
} else {
|
||||
value.push_str(&peeking_take_while(chars, |ch| {
|
||||
ch.is_alphanumeric() || ch == '_'
|
||||
}));
|
||||
|
||||
if let Some('$') = chars.peek() {
|
||||
chars.next();
|
||||
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
|
||||
|
||||
match chars.peek() {
|
||||
Some('$') => {
|
||||
chars.next();
|
||||
for (_, c) in value.chars().enumerate() {
|
||||
let next_char = chars.next();
|
||||
if Some(c) != next_char {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
format!(
|
||||
"Unterminated dollar-quoted string at or near \"{}\"",
|
||||
value
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some('$') = chars.peek() {
|
||||
chars.next();
|
||||
} else {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted string, expected $",
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return self.tokenizer_error(
|
||||
chars.location(),
|
||||
"Unterminated dollar-quoted, expected $",
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Ok(Token::Placeholder(String::from("$") + &value));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Token::DollarQuotedString(DollarQuotedString {
|
||||
value: s,
|
||||
tag: if value.is_empty() { None } else { Some(value) },
|
||||
}))
|
||||
}
|
||||
|
||||
fn tokenizer_error<R>(
|
||||
&self,
|
||||
loc: Location,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue