Add Redshift dialect, handle square brackets properly (#471)

* Redshift square bracket handling

We need to detect `[` or `"` for Redshift quotes around indentifier and at the same time exclude
treating JSON paths as indentifer

* RedshiftSqlDialect documentation update

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Renamed _chars to chars

* Fixed warnings

* Missing license

Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
Maciej Skrzypkowski 2022-05-04 17:11:00 +02:00 committed by GitHub
parent a9d7f7af1f
commit 7fc6361fe8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 173 additions and 3 deletions

View file

@ -43,6 +43,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
"--mysql" => Box::new(MySqlDialect {}),
"--snowflake" => Box::new(SnowflakeDialect {}),
"--hive" => Box::new(HiveDialect {}),
"--redshift" => Box::new(RedshiftSqlDialect {}),
"--generic" | "" => Box::new(GenericDialect {}),
s => panic!("Unexpected parameter: {}", s),
};

View file

@ -14,7 +14,7 @@
//! (commonly referred to as Data Definition Language, or DDL)
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, string::String, string::ToString, vec::Vec};
use alloc::{boxed::Box, string::String, vec::Vec};
use core::fmt;
#[cfg(feature = "serde")]

View file

@ -17,11 +17,14 @@ mod hive;
mod mssql;
mod mysql;
mod postgresql;
mod redshift;
mod snowflake;
mod sqlite;
use core::any::{Any, TypeId};
use core::fmt::Debug;
use core::iter::Peekable;
use core::str::Chars;
pub use self::ansi::AnsiDialect;
pub use self::clickhouse::ClickHouseDialect;
@ -30,6 +33,7 @@ pub use self::hive::HiveDialect;
pub use self::mssql::MsSqlDialect;
pub use self::mysql::MySqlDialect;
pub use self::postgresql::PostgreSqlDialect;
pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
pub use crate::keywords;
@ -51,6 +55,10 @@ pub trait Dialect: Debug + Any {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '"'
}
/// Determine if quoted characters are proper for identifier
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
true
}
/// Determine if a character is a valid start character for an unquoted identifier
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid unquoted identifier character

53
src/dialect/redshift.rs Normal file
View file

@ -0,0 +1,53 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dialect::Dialect;
use core::iter::Peekable;
use core::str::Chars;
use super::PostgreSqlDialect;
#[derive(Debug)]
pub struct RedshiftSqlDialect {}
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
//
// Notable differences:
// 1. Redshift treats brackets `[` and `]` differently. For example, `SQL SELECT a[1][2] FROM b`
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
// be a json path
impl Dialect for RedshiftSqlDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '"' || ch == '['
}
/// Determine if quoted characters are proper for identifier
/// It's needed to distinguish treating square brackets as quotes from
/// treating them as json path. If there is identifier then we assume
/// there is no json path.
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
chars.next();
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
if let Some(&ch) = not_white_chars.peek() {
return self.is_identifier_start(ch);
}
false
}
fn is_identifier_start(&self, ch: char) -> bool {
PostgreSqlDialect {}.is_identifier_start(ch)
}
fn is_identifier_part(&self, ch: char) -> bool {
PostgreSqlDialect {}.is_identifier_part(ch)
}
}

View file

@ -141,6 +141,7 @@ pub fn all_dialects() -> TestedDialects {
Box::new(AnsiDialect {}),
Box::new(SnowflakeDialect {}),
Box::new(HiveDialect {}),
Box::new(RedshiftSqlDialect {}),
],
}
}

View file

@ -431,7 +431,12 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::SingleQuotedString(s)))
}
// delimited (quoted) identifier
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
quote_start
if self.dialect.is_delimited_identifier_start(ch)
&& self
.dialect
.is_proper_identifier_inside_quotes(chars.clone()) =>
{
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let (s, last_char) = parse_quoted_ident(chars, quote_end);

View file

@ -590,7 +590,7 @@ fn test_copy_to() {
#[test]
fn parse_copy_from() {
let sql = "COPY table (a, b) FROM 'file.csv' WITH
let sql = "COPY table (a, b) FROM 'file.csv' WITH
(
FORMAT CSV,
FREEZE,

102
tests/sqlparser_redshift.rs Normal file
View file

@ -0,0 +1,102 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[macro_use]
mod test_utils;
use test_utils::*;
use sqlparser::ast::*;
use sqlparser::dialect::RedshiftSqlDialect;
#[test]
fn test_square_brackets_over_db_schema_table_name() {
let select = redshift().verified_only_select("SELECT [col1] FROM [test_schema].[test_table]");
assert_eq!(
select.projection[0],
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "col1".to_string(),
quote_style: Some('[')
})),
);
assert_eq!(
select.from[0],
TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(vec![
Ident {
value: "test_schema".to_string(),
quote_style: Some('[')
},
Ident {
value: "test_table".to_string(),
quote_style: Some('[')
}
]),
alias: None,
args: vec![],
with_hints: vec![],
},
joins: vec![],
}
);
}
#[test]
fn brackets_over_db_schema_table_name_with_whites_paces() {
match redshift().parse_sql_statements("SELECT [ col1 ] FROM [ test_schema].[ test_table]") {
Ok(statements) => {
assert_eq!(statements.len(), 1);
}
_ => unreachable!(),
}
}
#[test]
fn test_double_quotes_over_db_schema_table_name() {
let select =
redshift().verified_only_select("SELECT \"col1\" FROM \"test_schema\".\"test_table\"");
assert_eq!(
select.projection[0],
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "col1".to_string(),
quote_style: Some('"')
})),
);
assert_eq!(
select.from[0],
TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(vec![
Ident {
value: "test_schema".to_string(),
quote_style: Some('"')
},
Ident {
value: "test_table".to_string(),
quote_style: Some('"')
}
]),
alias: None,
args: vec![],
with_hints: vec![],
},
joins: vec![],
}
);
}
fn redshift() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(RedshiftSqlDialect {})],
}
}