mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add Redshift dialect, handle square brackets properly (#471)
* Redshift square bracket handling We need to detect `[` or `"` for Redshift quotes around indentifier and at the same time exclude treating JSON paths as indentifer * RedshiftSqlDialect documentation update Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * Renamed _chars to chars * Fixed warnings * Missing license Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
a9d7f7af1f
commit
7fc6361fe8
8 changed files with 173 additions and 3 deletions
|
@ -43,6 +43,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
|
|||
"--mysql" => Box::new(MySqlDialect {}),
|
||||
"--snowflake" => Box::new(SnowflakeDialect {}),
|
||||
"--hive" => Box::new(HiveDialect {}),
|
||||
"--redshift" => Box::new(RedshiftSqlDialect {}),
|
||||
"--generic" | "" => Box::new(GenericDialect {}),
|
||||
s => panic!("Unexpected parameter: {}", s),
|
||||
};
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
//! (commonly referred to as Data Definition Language, or DDL)
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use alloc::{boxed::Box, string::String, string::ToString, vec::Vec};
|
||||
use alloc::{boxed::Box, string::String, vec::Vec};
|
||||
use core::fmt;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
|
|
|
@ -17,11 +17,14 @@ mod hive;
|
|||
mod mssql;
|
||||
mod mysql;
|
||||
mod postgresql;
|
||||
mod redshift;
|
||||
mod snowflake;
|
||||
mod sqlite;
|
||||
|
||||
use core::any::{Any, TypeId};
|
||||
use core::fmt::Debug;
|
||||
use core::iter::Peekable;
|
||||
use core::str::Chars;
|
||||
|
||||
pub use self::ansi::AnsiDialect;
|
||||
pub use self::clickhouse::ClickHouseDialect;
|
||||
|
@ -30,6 +33,7 @@ pub use self::hive::HiveDialect;
|
|||
pub use self::mssql::MsSqlDialect;
|
||||
pub use self::mysql::MySqlDialect;
|
||||
pub use self::postgresql::PostgreSqlDialect;
|
||||
pub use self::redshift::RedshiftSqlDialect;
|
||||
pub use self::snowflake::SnowflakeDialect;
|
||||
pub use self::sqlite::SQLiteDialect;
|
||||
pub use crate::keywords;
|
||||
|
@ -51,6 +55,10 @@ pub trait Dialect: Debug + Any {
|
|||
fn is_delimited_identifier_start(&self, ch: char) -> bool {
|
||||
ch == '"'
|
||||
}
|
||||
/// Determine if quoted characters are proper for identifier
|
||||
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
|
||||
true
|
||||
}
|
||||
/// Determine if a character is a valid start character for an unquoted identifier
|
||||
fn is_identifier_start(&self, ch: char) -> bool;
|
||||
/// Determine if a character is a valid unquoted identifier character
|
||||
|
|
53
src/dialect/redshift.rs
Normal file
53
src/dialect/redshift.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::dialect::Dialect;
|
||||
use core::iter::Peekable;
|
||||
use core::str::Chars;
|
||||
|
||||
use super::PostgreSqlDialect;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RedshiftSqlDialect {}
|
||||
|
||||
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
|
||||
//
|
||||
// Notable differences:
|
||||
// 1. Redshift treats brackets `[` and `]` differently. For example, `SQL SELECT a[1][2] FROM b`
|
||||
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
|
||||
// be a json path
|
||||
impl Dialect for RedshiftSqlDialect {
|
||||
fn is_delimited_identifier_start(&self, ch: char) -> bool {
|
||||
ch == '"' || ch == '['
|
||||
}
|
||||
|
||||
/// Determine if quoted characters are proper for identifier
|
||||
/// It's needed to distinguish treating square brackets as quotes from
|
||||
/// treating them as json path. If there is identifier then we assume
|
||||
/// there is no json path.
|
||||
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
|
||||
chars.next();
|
||||
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
|
||||
if let Some(&ch) = not_white_chars.peek() {
|
||||
return self.is_identifier_start(ch);
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_identifier_start(&self, ch: char) -> bool {
|
||||
PostgreSqlDialect {}.is_identifier_start(ch)
|
||||
}
|
||||
|
||||
fn is_identifier_part(&self, ch: char) -> bool {
|
||||
PostgreSqlDialect {}.is_identifier_part(ch)
|
||||
}
|
||||
}
|
|
@ -141,6 +141,7 @@ pub fn all_dialects() -> TestedDialects {
|
|||
Box::new(AnsiDialect {}),
|
||||
Box::new(SnowflakeDialect {}),
|
||||
Box::new(HiveDialect {}),
|
||||
Box::new(RedshiftSqlDialect {}),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -431,7 +431,12 @@ impl<'a> Tokenizer<'a> {
|
|||
Ok(Some(Token::SingleQuotedString(s)))
|
||||
}
|
||||
// delimited (quoted) identifier
|
||||
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
|
||||
quote_start
|
||||
if self.dialect.is_delimited_identifier_start(ch)
|
||||
&& self
|
||||
.dialect
|
||||
.is_proper_identifier_inside_quotes(chars.clone()) =>
|
||||
{
|
||||
chars.next(); // consume the opening quote
|
||||
let quote_end = Word::matching_end_quote(quote_start);
|
||||
let (s, last_char) = parse_quoted_ident(chars, quote_end);
|
||||
|
|
|
@ -590,7 +590,7 @@ fn test_copy_to() {
|
|||
|
||||
#[test]
|
||||
fn parse_copy_from() {
|
||||
let sql = "COPY table (a, b) FROM 'file.csv' WITH
|
||||
let sql = "COPY table (a, b) FROM 'file.csv' WITH
|
||||
(
|
||||
FORMAT CSV,
|
||||
FREEZE,
|
||||
|
|
102
tests/sqlparser_redshift.rs
Normal file
102
tests/sqlparser_redshift.rs
Normal file
|
@ -0,0 +1,102 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[macro_use]
|
||||
mod test_utils;
|
||||
|
||||
use test_utils::*;
|
||||
|
||||
use sqlparser::ast::*;
|
||||
use sqlparser::dialect::RedshiftSqlDialect;
|
||||
|
||||
#[test]
|
||||
fn test_square_brackets_over_db_schema_table_name() {
|
||||
let select = redshift().verified_only_select("SELECT [col1] FROM [test_schema].[test_table]");
|
||||
assert_eq!(
|
||||
select.projection[0],
|
||||
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||
value: "col1".to_string(),
|
||||
quote_style: Some('[')
|
||||
})),
|
||||
);
|
||||
assert_eq!(
|
||||
select.from[0],
|
||||
TableWithJoins {
|
||||
relation: TableFactor::Table {
|
||||
name: ObjectName(vec![
|
||||
Ident {
|
||||
value: "test_schema".to_string(),
|
||||
quote_style: Some('[')
|
||||
},
|
||||
Ident {
|
||||
value: "test_table".to_string(),
|
||||
quote_style: Some('[')
|
||||
}
|
||||
]),
|
||||
alias: None,
|
||||
args: vec![],
|
||||
with_hints: vec![],
|
||||
},
|
||||
joins: vec![],
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn brackets_over_db_schema_table_name_with_whites_paces() {
|
||||
match redshift().parse_sql_statements("SELECT [ col1 ] FROM [ test_schema].[ test_table]") {
|
||||
Ok(statements) => {
|
||||
assert_eq!(statements.len(), 1);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_quotes_over_db_schema_table_name() {
|
||||
let select =
|
||||
redshift().verified_only_select("SELECT \"col1\" FROM \"test_schema\".\"test_table\"");
|
||||
assert_eq!(
|
||||
select.projection[0],
|
||||
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
|
||||
value: "col1".to_string(),
|
||||
quote_style: Some('"')
|
||||
})),
|
||||
);
|
||||
assert_eq!(
|
||||
select.from[0],
|
||||
TableWithJoins {
|
||||
relation: TableFactor::Table {
|
||||
name: ObjectName(vec![
|
||||
Ident {
|
||||
value: "test_schema".to_string(),
|
||||
quote_style: Some('"')
|
||||
},
|
||||
Ident {
|
||||
value: "test_table".to_string(),
|
||||
quote_style: Some('"')
|
||||
}
|
||||
]),
|
||||
alias: None,
|
||||
args: vec![],
|
||||
with_hints: vec![],
|
||||
},
|
||||
joins: vec![],
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
fn redshift() -> TestedDialects {
|
||||
TestedDialects {
|
||||
dialects: vec![Box::new(RedshiftSqlDialect {})],
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue