Add BigQuery dialect (#490)

This commit is contained in:
Yoshiyuki Komazaki 2022-05-10 23:25:27 +09:00 committed by GitHub
parent 484a7b6da4
commit 97a148aee4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 127 additions and 1 deletions

View file

@ -38,6 +38,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
"--ansi" => Box::new(AnsiDialect {}),
"--bigquery" => Box::new(BigQueryDialect {}),
"--postgres" => Box::new(PostgreSqlDialect {}),
"--ms" => Box::new(MsSqlDialect {}),
"--mysql" => Box::new(MySqlDialect {}),

35
src/dialect/bigquery.rs Normal file
View file

@ -0,0 +1,35 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dialect::Dialect;
#[derive(Debug, Default)]
pub struct BigQueryDialect;
impl Dialect for BigQueryDialect {
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '`'
}
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '_'
|| ch == '-'
}
}

View file

@ -11,6 +11,7 @@
// limitations under the License.
mod ansi;
mod bigquery;
mod clickhouse;
mod generic;
mod hive;
@ -27,6 +28,7 @@ use core::iter::Peekable;
use core::str::Chars;
pub use self::ansi::AnsiDialect;
pub use self::bigquery::BigQueryDialect;
pub use self::clickhouse::ClickHouseDialect;
pub use self::generic::GenericDialect;
pub use self::hive::HiveDialect;

View file

@ -0,0 +1,86 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[macro_use]
mod test_utils;
use test_utils::*;
use sqlparser::ast::*;
use sqlparser::dialect::BigQueryDialect;
#[test]
fn parse_table_identifiers() {
fn test_table_ident(ident: &str, expected: Vec<Ident>) {
let sql = format!("SELECT 1 FROM {}", ident);
let select = bigquery().verified_only_select(&sql);
assert_eq!(
select.from,
vec![TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(expected),
alias: None,
args: vec![],
with_hints: vec![],
},
joins: vec![]
},]
);
}
fn test_table_ident_err(ident: &str) {
let sql = format!("SELECT 1 FROM {}", ident);
assert!(bigquery().parse_sql_statements(&sql).is_err());
}
test_table_ident("da-sh-es", vec![Ident::new("da-sh-es")]);
test_table_ident("`spa ce`", vec![Ident::with_quote('`', "spa ce")]);
test_table_ident(
"`!@#$%^&*()-=_+`",
vec![Ident::with_quote('`', "!@#$%^&*()-=_+")],
);
test_table_ident(
"_5abc.dataField",
vec![Ident::new("_5abc"), Ident::new("dataField")],
);
test_table_ident(
"`5abc`.dataField",
vec![Ident::with_quote('`', "5abc"), Ident::new("dataField")],
);
test_table_ident_err("5abc.dataField");
test_table_ident(
"abc5.dataField",
vec![Ident::new("abc5"), Ident::new("dataField")],
);
test_table_ident_err("abc5!.dataField");
test_table_ident(
"`GROUP`.dataField",
vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")],
);
// TODO: this should be error
// test_table_ident_err("GROUP.dataField");
test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]);
}
fn bigquery() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(BigQueryDialect {})],
}
}

View file

@ -23,7 +23,8 @@ mod test_utils;
use matches::assert_matches;
use sqlparser::ast::*;
use sqlparser::dialect::{
AnsiDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect, SnowflakeDialect,
AnsiDialect, BigQueryDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect,
SnowflakeDialect,
};
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError};
@ -4556,6 +4557,7 @@ fn test_placeholder() {
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SnowflakeDialect {}),
// Note: `$` is the starting word for the HiveDialect identifier
// Box::new(sqlparser::dialect::HiveDialect {}),