mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-30 18:57:21 +00:00
feat: dollar-quoted strings support (#772)
* feat: support dollar-quoted strings * remove comment * unused code * removed debugging * added tests * fmt * clippy * updated tests
This commit is contained in:
parent
0c9ec40082
commit
072ccc0d76
5 changed files with 210 additions and 33 deletions
|
@ -39,7 +39,9 @@ pub use self::query::{
|
||||||
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
|
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
|
||||||
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
|
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
|
||||||
};
|
};
|
||||||
pub use self::value::{escape_quoted_string, DateTimeField, TrimWhereField, Value};
|
pub use self::value::{
|
||||||
|
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
|
||||||
|
};
|
||||||
|
|
||||||
#[cfg(feature = "visitor")]
|
#[cfg(feature = "visitor")]
|
||||||
pub use visitor::*;
|
pub use visitor::*;
|
||||||
|
|
|
@ -35,6 +35,8 @@ pub enum Value {
|
||||||
Number(BigDecimal, bool),
|
Number(BigDecimal, bool),
|
||||||
/// 'string value'
|
/// 'string value'
|
||||||
SingleQuotedString(String),
|
SingleQuotedString(String),
|
||||||
|
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
|
||||||
|
DollarQuotedString(DollarQuotedString),
|
||||||
/// e'string value' (postgres extension)
|
/// e'string value' (postgres extension)
|
||||||
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
|
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
|
||||||
EscapedStringLiteral(String),
|
EscapedStringLiteral(String),
|
||||||
|
@ -60,6 +62,7 @@ impl fmt::Display for Value {
|
||||||
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
|
||||||
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
|
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
|
||||||
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
|
||||||
|
Value::DollarQuotedString(v) => write!(f, "{}", v),
|
||||||
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
|
||||||
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
|
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
|
||||||
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
|
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
|
||||||
|
@ -71,6 +74,27 @@ impl fmt::Display for Value {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit))]
|
||||||
|
pub struct DollarQuotedString {
|
||||||
|
pub value: String,
|
||||||
|
pub tag: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for DollarQuotedString {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match &self.tag {
|
||||||
|
Some(tag) => {
|
||||||
|
write!(f, "${}${}${}$", tag, self.value, tag)
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
write!(f, "$${}$$", self.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[cfg_attr(feature = "visitor", derive(Visit))]
|
#[cfg_attr(feature = "visitor", derive(Visit))]
|
||||||
|
|
|
@ -778,6 +778,7 @@ impl<'a> Parser<'a> {
|
||||||
Token::Number(_, _)
|
Token::Number(_, _)
|
||||||
| Token::SingleQuotedString(_)
|
| Token::SingleQuotedString(_)
|
||||||
| Token::DoubleQuotedString(_)
|
| Token::DoubleQuotedString(_)
|
||||||
|
| Token::DollarQuotedString(_)
|
||||||
| Token::NationalStringLiteral(_)
|
| Token::NationalStringLiteral(_)
|
||||||
| Token::HexStringLiteral(_) => {
|
| Token::HexStringLiteral(_) => {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
|
@ -4104,6 +4105,7 @@ impl<'a> Parser<'a> {
|
||||||
},
|
},
|
||||||
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
||||||
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
||||||
|
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
|
||||||
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
|
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
|
||||||
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
|
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
|
||||||
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
||||||
|
@ -4148,24 +4150,9 @@ impl<'a> Parser<'a> {
|
||||||
pub fn parse_function_definition(&mut self) -> Result<FunctionDefinition, ParserError> {
|
pub fn parse_function_definition(&mut self) -> Result<FunctionDefinition, ParserError> {
|
||||||
let peek_token = self.peek_token();
|
let peek_token = self.peek_token();
|
||||||
match peek_token.token {
|
match peek_token.token {
|
||||||
Token::DoubleDollarQuoting if dialect_of!(self is PostgreSqlDialect) => {
|
Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => {
|
||||||
self.next_token();
|
self.next_token();
|
||||||
let mut func_desc = String::new();
|
Ok(FunctionDefinition::DoubleDollarDef(value.value))
|
||||||
loop {
|
|
||||||
if let Some(next_token) = self.next_token_no_skip() {
|
|
||||||
match &next_token.token {
|
|
||||||
Token::DoubleDollarQuoting => break,
|
|
||||||
Token::EOF => {
|
|
||||||
return self.expected(
|
|
||||||
"literal string",
|
|
||||||
TokenWithLocation::wrap(Token::EOF),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
token => func_desc.push_str(token.to_string().as_str()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(FunctionDefinition::DoubleDollarDef(func_desc))
|
|
||||||
}
|
}
|
||||||
_ => Ok(FunctionDefinition::SingleQuotedDef(
|
_ => Ok(FunctionDefinition::SingleQuotedDef(
|
||||||
self.parse_literal_string()?,
|
self.parse_literal_string()?,
|
||||||
|
@ -4712,7 +4699,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a query expression, i.e. a `SELECT` statement optionally
|
/// Parse a query expression, i.e. a `SELECT` statement optionally
|
||||||
/// preceeded with some `WITH` CTE declarations and optionally followed
|
/// preceded with some `WITH` CTE declarations and optionally followed
|
||||||
/// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
|
/// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
|
||||||
/// expect the initial keyword to be already consumed
|
/// expect the initial keyword to be already consumed
|
||||||
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
|
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
|
||||||
|
|
111
src/tokenizer.rs
111
src/tokenizer.rs
|
@ -34,6 +34,7 @@ use serde::{Deserialize, Serialize};
|
||||||
#[cfg(feature = "visitor")]
|
#[cfg(feature = "visitor")]
|
||||||
use sqlparser_derive::Visit;
|
use sqlparser_derive::Visit;
|
||||||
|
|
||||||
|
use crate::ast::DollarQuotedString;
|
||||||
use crate::dialect::SnowflakeDialect;
|
use crate::dialect::SnowflakeDialect;
|
||||||
use crate::dialect::{Dialect, MySqlDialect};
|
use crate::dialect::{Dialect, MySqlDialect};
|
||||||
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||||
|
@ -55,6 +56,8 @@ pub enum Token {
|
||||||
SingleQuotedString(String),
|
SingleQuotedString(String),
|
||||||
/// Double quoted string: i.e: "string"
|
/// Double quoted string: i.e: "string"
|
||||||
DoubleQuotedString(String),
|
DoubleQuotedString(String),
|
||||||
|
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
|
||||||
|
DollarQuotedString(DollarQuotedString),
|
||||||
/// "National" string literal: i.e: N'string'
|
/// "National" string literal: i.e: N'string'
|
||||||
NationalStringLiteral(String),
|
NationalStringLiteral(String),
|
||||||
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
|
||||||
|
@ -149,8 +152,6 @@ pub enum Token {
|
||||||
PGCubeRoot,
|
PGCubeRoot,
|
||||||
/// `?` or `$` , a prepared statement arg placeholder
|
/// `?` or `$` , a prepared statement arg placeholder
|
||||||
Placeholder(String),
|
Placeholder(String),
|
||||||
/// `$$`, used for PostgreSQL create function definition
|
|
||||||
DoubleDollarQuoting,
|
|
||||||
/// ->, used as a operator to extract json field in PostgreSQL
|
/// ->, used as a operator to extract json field in PostgreSQL
|
||||||
Arrow,
|
Arrow,
|
||||||
/// ->>, used as a operator to extract json field as text in PostgreSQL
|
/// ->>, used as a operator to extract json field as text in PostgreSQL
|
||||||
|
@ -184,6 +185,7 @@ impl fmt::Display for Token {
|
||||||
Token::Char(ref c) => write!(f, "{}", c),
|
Token::Char(ref c) => write!(f, "{}", c),
|
||||||
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
|
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
|
||||||
Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s),
|
Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s),
|
||||||
|
Token::DollarQuotedString(ref s) => write!(f, "{}", s),
|
||||||
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
|
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
|
||||||
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
|
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
|
||||||
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
|
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
|
||||||
|
@ -236,7 +238,6 @@ impl fmt::Display for Token {
|
||||||
Token::HashArrow => write!(f, "#>"),
|
Token::HashArrow => write!(f, "#>"),
|
||||||
Token::HashLongArrow => write!(f, "#>>"),
|
Token::HashLongArrow => write!(f, "#>>"),
|
||||||
Token::AtArrow => write!(f, "@>"),
|
Token::AtArrow => write!(f, "@>"),
|
||||||
Token::DoubleDollarQuoting => write!(f, "$$"),
|
|
||||||
Token::ArrowAt => write!(f, "<@"),
|
Token::ArrowAt => write!(f, "<@"),
|
||||||
Token::HashMinus => write!(f, "#-"),
|
Token::HashMinus => write!(f, "#-"),
|
||||||
Token::AtQuestion => write!(f, "@?"),
|
Token::AtQuestion => write!(f, "@?"),
|
||||||
|
@ -837,17 +838,8 @@ impl<'a> Tokenizer<'a> {
|
||||||
let s = peeking_take_while(chars, |ch| ch.is_numeric());
|
let s = peeking_take_while(chars, |ch| ch.is_numeric());
|
||||||
Ok(Some(Token::Placeholder(String::from("?") + &s)))
|
Ok(Some(Token::Placeholder(String::from("?") + &s)))
|
||||||
}
|
}
|
||||||
'$' => {
|
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
|
||||||
chars.next();
|
|
||||||
match chars.peek() {
|
|
||||||
Some('$') => self.consume_and_return(chars, Token::DoubleDollarQuoting),
|
|
||||||
_ => {
|
|
||||||
let s =
|
|
||||||
peeking_take_while(chars, |ch| ch.is_alphanumeric() || ch == '_');
|
|
||||||
Ok(Some(Token::Placeholder(String::from("$") + &s)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//whitespace check (including unicode chars) should be last as it covers some of the chars above
|
//whitespace check (including unicode chars) should be last as it covers some of the chars above
|
||||||
ch if ch.is_whitespace() => {
|
ch if ch.is_whitespace() => {
|
||||||
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
|
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
|
||||||
|
@ -858,6 +850,97 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tokenize dollar preceded value (i.e: a string/placeholder)
|
||||||
|
fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
|
||||||
|
let mut s = String::new();
|
||||||
|
let mut value = String::new();
|
||||||
|
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
if let Some('$') = chars.peek() {
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
let mut is_terminated = false;
|
||||||
|
let mut prev: Option<char> = None;
|
||||||
|
|
||||||
|
while let Some(&ch) = chars.peek() {
|
||||||
|
if prev == Some('$') {
|
||||||
|
if ch == '$' {
|
||||||
|
chars.next();
|
||||||
|
is_terminated = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
s.push('$');
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
} else if ch != '$' {
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = Some(ch);
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
return if chars.peek().is_none() && !is_terminated {
|
||||||
|
self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
|
||||||
|
} else {
|
||||||
|
Ok(Token::DollarQuotedString(DollarQuotedString {
|
||||||
|
value: s,
|
||||||
|
tag: None,
|
||||||
|
}))
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
value.push_str(&peeking_take_while(chars, |ch| {
|
||||||
|
ch.is_alphanumeric() || ch == '_'
|
||||||
|
}));
|
||||||
|
|
||||||
|
if let Some('$') = chars.peek() {
|
||||||
|
chars.next();
|
||||||
|
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
|
||||||
|
|
||||||
|
match chars.peek() {
|
||||||
|
Some('$') => {
|
||||||
|
chars.next();
|
||||||
|
for (_, c) in value.chars().enumerate() {
|
||||||
|
let next_char = chars.next();
|
||||||
|
if Some(c) != next_char {
|
||||||
|
return self.tokenizer_error(
|
||||||
|
chars.location(),
|
||||||
|
format!(
|
||||||
|
"Unterminated dollar-quoted string at or near \"{}\"",
|
||||||
|
value
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some('$') = chars.peek() {
|
||||||
|
chars.next();
|
||||||
|
} else {
|
||||||
|
return self.tokenizer_error(
|
||||||
|
chars.location(),
|
||||||
|
"Unterminated dollar-quoted string, expected $",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return self.tokenizer_error(
|
||||||
|
chars.location(),
|
||||||
|
"Unterminated dollar-quoted, expected $",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Ok(Token::Placeholder(String::from("$") + &value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Token::DollarQuotedString(DollarQuotedString {
|
||||||
|
value: s,
|
||||||
|
tag: if value.is_empty() { None } else { Some(value) },
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
fn tokenizer_error<R>(
|
fn tokenizer_error<R>(
|
||||||
&self,
|
&self,
|
||||||
loc: Location,
|
loc: Location,
|
||||||
|
|
|
@ -2507,3 +2507,84 @@ fn parse_drop_function() {
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_dollar_quoted_string() {
|
||||||
|
let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$";
|
||||||
|
|
||||||
|
let stmt = pg().parse_sql_statements(sql).unwrap();
|
||||||
|
|
||||||
|
let projection = match stmt.get(0).unwrap() {
|
||||||
|
Statement::Query(query) => match &*query.body {
|
||||||
|
SetExpr::Select(select) => &select.projection,
|
||||||
|
_ => unreachable!(),
|
||||||
|
},
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: None,
|
||||||
|
value: "hello".into()
|
||||||
|
})),
|
||||||
|
expr_from_projection(&projection[0])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: Some("tag_name".into()),
|
||||||
|
value: "world".into()
|
||||||
|
})),
|
||||||
|
expr_from_projection(&projection[1])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: None,
|
||||||
|
value: "Foo$Bar".into()
|
||||||
|
})),
|
||||||
|
expr_from_projection(&projection[2])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
projection[3],
|
||||||
|
SelectItem::ExprWithAlias {
|
||||||
|
expr: Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: None,
|
||||||
|
value: "Foo$Bar".into(),
|
||||||
|
})),
|
||||||
|
alias: Ident {
|
||||||
|
value: "col_name".into(),
|
||||||
|
quote_style: None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
expr_from_projection(&projection[4]),
|
||||||
|
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: None,
|
||||||
|
value: "".into()
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
expr_from_projection(&projection[5]),
|
||||||
|
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
|
||||||
|
tag: Some("tag_name".into()),
|
||||||
|
value: "".into()
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_incorrect_dollar_quoted_string() {
|
||||||
|
let sql = "SELECT $x$hello$$";
|
||||||
|
assert!(pg().parse_sql_statements(sql).is_err());
|
||||||
|
|
||||||
|
let sql = "SELECT $hello$$";
|
||||||
|
assert!(pg().parse_sql_statements(sql).is_err());
|
||||||
|
|
||||||
|
let sql = "SELECT $$$";
|
||||||
|
assert!(pg().parse_sql_statements(sql).is_err());
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue