mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-17 09:17:14 +00:00
Redshift: Fix parsing for quoted numbered columns (#1576)
This commit is contained in:
parent
316bb14135
commit
7867ba3cf0
4 changed files with 204 additions and 40 deletions
|
@ -128,14 +128,39 @@ pub trait Dialect: Debug + Any {
|
||||||
ch == '"' || ch == '`'
|
ch == '"' || ch == '`'
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the character used to quote identifiers.
|
/// Determine if a character starts a potential nested quoted identifier.
|
||||||
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
|
/// Example: RedShift supports the following quote styles to all mean the same thing:
|
||||||
|
/// ```sql
|
||||||
|
/// SELECT 1 AS foo;
|
||||||
|
/// SELECT 1 AS "foo";
|
||||||
|
/// SELECT 1 AS [foo];
|
||||||
|
/// SELECT 1 AS ["foo"];
|
||||||
|
/// ```
|
||||||
|
fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true
|
||||||
|
/// If the next sequence of tokens potentially represent a nested identifier, then this method
|
||||||
|
/// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style.
|
||||||
|
///
|
||||||
|
/// Example (Redshift):
|
||||||
|
/// ```text
|
||||||
|
/// `["foo"]` => Some(`[`, Some(`"`))
|
||||||
|
/// `[foo]` => Some(`[`, None)
|
||||||
|
/// `[0]` => None
|
||||||
|
/// `"foo"` => None
|
||||||
|
/// ```
|
||||||
|
fn peek_nested_delimited_identifier_quotes(
|
||||||
|
&self,
|
||||||
|
mut _chars: Peekable<Chars<'_>>,
|
||||||
|
) -> Option<(char, Option<char>)> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if quoted characters are proper for identifier
|
/// Return the character used to quote identifiers.
|
||||||
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
|
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
|
||||||
true
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if a character is a valid start character for an unquoted identifier
|
/// Determine if a character is a valid start character for an unquoted identifier
|
||||||
|
@ -869,6 +894,17 @@ mod tests {
|
||||||
self.0.is_delimited_identifier_start(ch)
|
self.0.is_delimited_identifier_start(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
|
||||||
|
self.0.is_nested_delimited_identifier_start(ch)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek_nested_delimited_identifier_quotes(
|
||||||
|
&self,
|
||||||
|
chars: std::iter::Peekable<std::str::Chars<'_>>,
|
||||||
|
) -> Option<(char, Option<char>)> {
|
||||||
|
self.0.peek_nested_delimited_identifier_quotes(chars)
|
||||||
|
}
|
||||||
|
|
||||||
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
|
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
|
||||||
self.0.identifier_quote_style(identifier)
|
self.0.identifier_quote_style(identifier)
|
||||||
}
|
}
|
||||||
|
@ -877,13 +913,6 @@ mod tests {
|
||||||
self.0.supports_string_literal_backslash_escape()
|
self.0.supports_string_literal_backslash_escape()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_proper_identifier_inside_quotes(
|
|
||||||
&self,
|
|
||||||
chars: std::iter::Peekable<std::str::Chars<'_>>,
|
|
||||||
) -> bool {
|
|
||||||
self.0.is_proper_identifier_inside_quotes(chars)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn supports_filter_during_aggregation(&self) -> bool {
|
fn supports_filter_during_aggregation(&self) -> bool {
|
||||||
self.0.supports_filter_during_aggregation()
|
self.0.supports_filter_during_aggregation()
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {}
|
||||||
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
|
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
|
||||||
// be a json path
|
// be a json path
|
||||||
impl Dialect for RedshiftSqlDialect {
|
impl Dialect for RedshiftSqlDialect {
|
||||||
fn is_delimited_identifier_start(&self, ch: char) -> bool {
|
/// Determine if a character starts a potential nested quoted identifier.
|
||||||
ch == '"' || ch == '['
|
/// Example: RedShift supports the following quote styles to all mean the same thing:
|
||||||
|
/// ```sql
|
||||||
|
/// SELECT 1 AS foo;
|
||||||
|
/// SELECT 1 AS "foo";
|
||||||
|
/// SELECT 1 AS [foo];
|
||||||
|
/// SELECT 1 AS ["foo"];
|
||||||
|
/// ```
|
||||||
|
fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
|
||||||
|
ch == '['
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if quoted characters are proper for identifier
|
/// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true
|
||||||
/// It's needed to distinguish treating square brackets as quotes from
|
/// If the next sequence of tokens potentially represent a nested identifier, then this method
|
||||||
/// treating them as json path. If there is identifier then we assume
|
/// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style.
|
||||||
/// there is no json path.
|
///
|
||||||
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
|
/// Example (Redshift):
|
||||||
chars.next();
|
/// ```text
|
||||||
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
|
/// `["foo"]` => Some(`[`, Some(`"`))
|
||||||
if let Some(&ch) = not_white_chars.peek() {
|
/// `[foo]` => Some(`[`, None)
|
||||||
return self.is_identifier_start(ch);
|
/// `[0]` => None
|
||||||
|
/// `"foo"` => None
|
||||||
|
/// ```
|
||||||
|
fn peek_nested_delimited_identifier_quotes(
|
||||||
|
&self,
|
||||||
|
mut chars: Peekable<Chars<'_>>,
|
||||||
|
) -> Option<(char, Option<char>)> {
|
||||||
|
if chars.peek() != Some(&'[') {
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
false
|
|
||||||
|
chars.next();
|
||||||
|
|
||||||
|
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
|
||||||
|
|
||||||
|
if let Some(&ch) = not_white_chars.peek() {
|
||||||
|
if ch == '"' {
|
||||||
|
return Some(('[', Some('"')));
|
||||||
|
}
|
||||||
|
if self.is_identifier_start(ch) {
|
||||||
|
return Some(('[', None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_start(&self, ch: char) -> bool {
|
fn is_identifier_start(&self, ch: char) -> bool {
|
||||||
|
|
|
@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> {
|
||||||
Ok(Some(Token::DoubleQuotedString(s)))
|
Ok(Some(Token::DoubleQuotedString(s)))
|
||||||
}
|
}
|
||||||
// delimited (quoted) identifier
|
// delimited (quoted) identifier
|
||||||
|
quote_start if self.dialect.is_delimited_identifier_start(ch) => {
|
||||||
|
let word = self.tokenize_quoted_identifier(quote_start, chars)?;
|
||||||
|
Ok(Some(Token::make_word(&word, Some(quote_start))))
|
||||||
|
}
|
||||||
|
// Potentially nested delimited (quoted) identifier
|
||||||
quote_start
|
quote_start
|
||||||
if self.dialect.is_delimited_identifier_start(ch)
|
if self
|
||||||
|
.dialect
|
||||||
|
.is_nested_delimited_identifier_start(quote_start)
|
||||||
&& self
|
&& self
|
||||||
.dialect
|
.dialect
|
||||||
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
|
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
|
||||||
|
.is_some() =>
|
||||||
{
|
{
|
||||||
let error_loc = chars.location();
|
let Some((quote_start, nested_quote_start)) = self
|
||||||
chars.next(); // consume the opening quote
|
.dialect
|
||||||
let quote_end = Word::matching_end_quote(quote_start);
|
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
|
||||||
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
|
else {
|
||||||
|
return self.tokenizer_error(
|
||||||
|
chars.location(),
|
||||||
|
format!("Expected nested delimiter '{quote_start}' before EOF."),
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
if last_char == Some(quote_end) {
|
let Some(nested_quote_start) = nested_quote_start else {
|
||||||
Ok(Some(Token::make_word(&s, Some(quote_start))))
|
let word = self.tokenize_quoted_identifier(quote_start, chars)?;
|
||||||
} else {
|
return Ok(Some(Token::make_word(&word, Some(quote_start))));
|
||||||
self.tokenizer_error(
|
};
|
||||||
|
|
||||||
|
let mut word = vec![];
|
||||||
|
let quote_end = Word::matching_end_quote(quote_start);
|
||||||
|
let nested_quote_end = Word::matching_end_quote(nested_quote_start);
|
||||||
|
let error_loc = chars.location();
|
||||||
|
|
||||||
|
chars.next(); // skip the first delimiter
|
||||||
|
peeking_take_while(chars, |ch| ch.is_whitespace());
|
||||||
|
if chars.peek() != Some(&nested_quote_start) {
|
||||||
|
return self.tokenizer_error(
|
||||||
|
error_loc,
|
||||||
|
format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
word.push(nested_quote_start.into());
|
||||||
|
word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
|
||||||
|
word.push(nested_quote_end.into());
|
||||||
|
peeking_take_while(chars, |ch| ch.is_whitespace());
|
||||||
|
if chars.peek() != Some("e_end) {
|
||||||
|
return self.tokenizer_error(
|
||||||
error_loc,
|
error_loc,
|
||||||
format!("Expected close delimiter '{quote_end}' before EOF."),
|
format!("Expected close delimiter '{quote_end}' before EOF."),
|
||||||
)
|
);
|
||||||
}
|
}
|
||||||
|
chars.next(); // skip close delimiter
|
||||||
|
|
||||||
|
Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
|
||||||
}
|
}
|
||||||
// numbers and period
|
// numbers and period
|
||||||
'0'..='9' | '.' => {
|
'0'..='9' | '.' => {
|
||||||
|
@ -1597,6 +1633,27 @@ impl<'a> Tokenizer<'a> {
|
||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read a quoted identifier
|
||||||
|
fn tokenize_quoted_identifier(
|
||||||
|
&self,
|
||||||
|
quote_start: char,
|
||||||
|
chars: &mut State,
|
||||||
|
) -> Result<String, TokenizerError> {
|
||||||
|
let error_loc = chars.location();
|
||||||
|
chars.next(); // consume the opening quote
|
||||||
|
let quote_end = Word::matching_end_quote(quote_start);
|
||||||
|
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
|
||||||
|
|
||||||
|
if last_char == Some(quote_end) {
|
||||||
|
Ok(s)
|
||||||
|
} else {
|
||||||
|
self.tokenizer_error(
|
||||||
|
error_loc,
|
||||||
|
format!("Expected close delimiter '{quote_end}' before EOF."),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Read a single quoted string, starting with the opening quote.
|
/// Read a single quoted string, starting with the opening quote.
|
||||||
fn tokenize_escaped_single_quoted_string(
|
fn tokenize_escaped_single_quoted_string(
|
||||||
&self,
|
&self,
|
||||||
|
|
|
@ -157,6 +157,8 @@ fn parse_delimited_identifiers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#);
|
redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#);
|
||||||
|
// An alias starting with a number
|
||||||
|
redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#);
|
||||||
redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#);
|
redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#);
|
||||||
//TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#);
|
//TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#);
|
||||||
}
|
}
|
||||||
|
@ -203,7 +205,7 @@ fn test_redshift_json_path() {
|
||||||
path: JsonPath {
|
path: JsonPath {
|
||||||
path: vec![
|
path: vec![
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
|
key: Expr::Value(number("0"))
|
||||||
},
|
},
|
||||||
JsonPathElem::Dot {
|
JsonPathElem::Dot {
|
||||||
key: "o_orderkey".to_string(),
|
key: "o_orderkey".to_string(),
|
||||||
|
@ -226,7 +228,7 @@ fn test_redshift_json_path() {
|
||||||
path: JsonPath {
|
path: JsonPath {
|
||||||
path: vec![
|
path: vec![
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
|
key: Expr::Value(number("0"))
|
||||||
},
|
},
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
|
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
|
||||||
|
@ -250,7 +252,7 @@ fn test_redshift_json_path() {
|
||||||
path: JsonPath {
|
path: JsonPath {
|
||||||
path: vec![
|
path: vec![
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
|
key: Expr::Value(number("0"))
|
||||||
},
|
},
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
|
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
|
||||||
|
@ -260,6 +262,31 @@ fn test_redshift_json_path() {
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#;
|
||||||
|
let select = dialects.verified_only_select(sql);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::JsonAccess {
|
||||||
|
value: Box::new(Expr::CompoundIdentifier(vec![
|
||||||
|
Ident::new("db1"),
|
||||||
|
Ident::new("sc1"),
|
||||||
|
Ident::new("tbl1"),
|
||||||
|
Ident::new("col1")
|
||||||
|
])),
|
||||||
|
path: JsonPath {
|
||||||
|
path: vec![
|
||||||
|
JsonPathElem::Bracket {
|
||||||
|
key: Expr::Value(number("0"))
|
||||||
|
},
|
||||||
|
JsonPathElem::Dot {
|
||||||
|
key: "id".to_string(),
|
||||||
|
quoted: true,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
expr_from_projection(only(&select.projection))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -276,7 +303,7 @@ fn test_parse_json_path_from() {
|
||||||
&Some(JsonPath {
|
&Some(JsonPath {
|
||||||
path: vec![
|
path: vec![
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
|
key: Expr::Value(number("0"))
|
||||||
},
|
},
|
||||||
JsonPathElem::Dot {
|
JsonPathElem::Dot {
|
||||||
key: "a".to_string(),
|
key: "a".to_string(),
|
||||||
|
@ -300,7 +327,7 @@ fn test_parse_json_path_from() {
|
||||||
&Some(JsonPath {
|
&Some(JsonPath {
|
||||||
path: vec![
|
path: vec![
|
||||||
JsonPathElem::Bracket {
|
JsonPathElem::Bracket {
|
||||||
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
|
key: Expr::Value(number("0"))
|
||||||
},
|
},
|
||||||
JsonPathElem::Dot {
|
JsonPathElem::Dot {
|
||||||
key: "a".to_string(),
|
key: "a".to_string(),
|
||||||
|
@ -334,3 +361,24 @@ fn test_parse_json_path_from() {
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_select_numbered_columns() {
|
||||||
|
// An alias starting with a number
|
||||||
|
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
|
||||||
|
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_nested_quoted_identifier() {
|
||||||
|
redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
|
||||||
|
redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
|
||||||
|
redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
|
||||||
|
redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
|
||||||
|
// trim spaces
|
||||||
|
redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 AS [" 1 "]"#);
|
||||||
|
// invalid query
|
||||||
|
assert!(redshift()
|
||||||
|
.parse_sql_statements(r#"SELECT 1 AS ["1]"#)
|
||||||
|
.is_err());
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue