Implement Hive QL Parsing (#235)

This commit is contained in:
Stephen Carman 2021-02-04 14:53:20 -05:00 committed by GitHub
parent 17f8eb9c5a
commit 8a214f9919
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 1388 additions and 176 deletions

View file

@ -40,6 +40,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
"--postgres" => Box::new(PostgreSqlDialect {}),
"--ms" => Box::new(MsSqlDialect {}),
"--snowflake" => Box::new(SnowflakeDialect {}),
"--hive" => Box::new(HiveDialect {}),
"--generic" | "" => Box::new(GenericDialect {}),
s => panic!("Unexpected parameter: {}", s),
};

View file

@ -61,6 +61,8 @@ pub enum DataType {
Regclass,
/// Text
Text,
/// String
String,
/// Bytea
Bytea,
/// Custom type such as enums
@ -101,6 +103,7 @@ impl fmt::Display for DataType {
DataType::Interval => write!(f, "INTERVAL"),
DataType::Regclass => write!(f, "REGCLASS"),
DataType::Text => write!(f, "TEXT"),
DataType::String => write!(f, "STRING"),
DataType::Bytea => write!(f, "BYTEA"),
DataType::Array(ty) => write!(f, "{}[]", ty),
DataType::Custom(ty) => write!(f, "{}", ty),

View file

@ -35,22 +35,54 @@ pub enum AlterTableOperation {
if_exists: bool,
cascade: bool,
},
/// `RENAME TO PARTITION (partition=val)`
RenamePartitions {
old_partitions: Vec<Expr>,
new_partitions: Vec<Expr>,
},
/// Add Partitions
AddPartitions {
if_not_exists: bool,
new_partitions: Vec<Expr>,
},
DropPartitions {
partitions: Vec<Expr>,
if_exists: bool,
},
/// `RENAME [ COLUMN ] <old_column_name> TO <new_column_name>`
RenameColumn {
old_column_name: Ident,
new_column_name: Ident,
},
/// `RENAME TO <table_name>`
RenameTable { table_name: Ident },
RenameTable { table_name: ObjectName },
}
impl fmt::Display for AlterTableOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AlterTableOperation::AddPartitions {
if_not_exists,
new_partitions,
} => write!(
f,
"ADD{ine} PARTITION ({})",
display_comma_separated(new_partitions),
ine = if *if_not_exists { " IF NOT EXISTS" } else { "" }
),
AlterTableOperation::AddConstraint(c) => write!(f, "ADD {}", c),
AlterTableOperation::AddColumn { column_def } => {
write!(f, "ADD COLUMN {}", column_def.to_string())
}
AlterTableOperation::DropPartitions {
partitions,
if_exists,
} => write!(
f,
"DROP{ie} PARTITION ({})",
display_comma_separated(partitions),
ie = if *if_exists { " IF EXISTS" } else { "" }
),
AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name),
AlterTableOperation::DropColumn {
column_name,
@ -63,6 +95,15 @@ impl fmt::Display for AlterTableOperation {
column_name,
if *cascade { " CASCADE" } else { "" }
),
AlterTableOperation::RenamePartitions {
old_partitions,
new_partitions,
} => write!(
f,
"PARTITION ({}) RENAME TO PARTITION ({})",
display_comma_separated(old_partitions),
display_comma_separated(new_partitions)
),
AlterTableOperation::RenameColumn {
old_column_name,
new_column_name,

View file

@ -29,8 +29,9 @@ pub use self::ddl::{
};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select,
SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, With,
Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr,
Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top,
Values, With,
};
pub use self::value::{DateTimeField, Value};
@ -191,7 +192,10 @@ pub enum Expr {
right: Box<Expr>,
},
/// Unary operation e.g. `NOT foo`
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
UnaryOp {
op: UnaryOperator,
expr: Box<Expr>,
},
/// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
Cast {
expr: Box<Expr>,
@ -213,7 +217,14 @@ pub enum Expr {
/// A constant of form `<data_type> 'value'`.
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
/// as well as constants of other types (a non-standard PostgreSQL extension).
TypedString { data_type: DataType, value: String },
TypedString {
data_type: DataType,
value: String,
},
MapAccess {
column: Box<Expr>,
key: String,
},
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
@ -241,6 +252,7 @@ impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expr::Identifier(s) => write!(f, "{}", s),
Expr::MapAccess { column, key } => write!(f, "{}[\"{}\"]", column, key),
Expr::Wildcard => f.write_str("*"),
Expr::QualifiedWildcard(q) => write!(f, "{}.*", display_separated(q, ".")),
Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
@ -426,11 +438,50 @@ impl fmt::Display for WindowFrameBound {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum AddDropSync {
ADD,
DROP,
SYNC,
}
impl fmt::Display for AddDropSync {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AddDropSync::SYNC => f.write_str("SYNC PARTITIONS"),
AddDropSync::DROP => f.write_str("DROP PARTITIONS"),
AddDropSync::ADD => f.write_str("ADD PARTITIONS"),
}
}
}
/// A top-level statement (SELECT, INSERT, CREATE, etc.)
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum Statement {
/// Analyze (Hive)
Analyze {
table_name: ObjectName,
partitions: Option<Vec<Expr>>,
for_columns: bool,
columns: Vec<Ident>,
cache_metadata: bool,
noscan: bool,
compute_statistics: bool,
},
/// Truncate (Hive)
Truncate {
table_name: ObjectName,
partitions: Option<Vec<Expr>>,
},
/// Msck (Hive)
Msck {
table_name: ObjectName,
repair: bool,
partition_action: Option<AddDropSync>,
},
/// SELECT
Query(Box<Query>),
/// INSERT
@ -439,8 +490,24 @@ pub enum Statement {
table_name: ObjectName,
/// COLUMNS
columns: Vec<Ident>,
/// Overwrite (Hive)
overwrite: bool,
/// A SQL query that specifies what to insert
source: Box<Query>,
/// partitioned insert (Hive)
partitioned: Option<Vec<Expr>>,
/// Columns defined after PARTITION
after_columns: Vec<Ident>,
/// whether the insert has the table keyword (Hive)
table: bool,
},
// TODO: Support ROW FORMAT
Directory {
overwrite: bool,
local: bool,
path: String,
file_format: Option<FileFormat>,
source: Box<Query>,
},
Copy {
/// TABLE
@ -479,6 +546,7 @@ pub enum Statement {
/// CREATE TABLE
CreateTable {
or_replace: bool,
temporary: bool,
external: bool,
if_not_exists: bool,
/// Table name
@ -486,11 +554,15 @@ pub enum Statement {
/// Optional schema
columns: Vec<ColumnDef>,
constraints: Vec<TableConstraint>,
hive_distribution: HiveDistributionStyle,
hive_formats: Option<HiveFormat>,
table_properties: Vec<SqlOption>,
with_options: Vec<SqlOption>,
file_format: Option<FileFormat>,
location: Option<String>,
query: Option<Box<Query>>,
without_rowid: bool,
like: Option<ObjectName>,
},
/// SQLite's `CREATE VIRTUAL TABLE .. USING <module_name> (<module_args>)`
CreateVirtualTable {
@ -525,6 +597,9 @@ pub enum Statement {
/// Whether `CASCADE` was specified. This will be `false` when
/// `RESTRICT` or no drop behavior at all was specified.
cascade: bool,
/// Hive allows you specify whether the table's stored data will be
/// deleted along with the dropped table
purge: bool,
},
/// SET <variable>
///
@ -533,8 +608,9 @@ pub enum Statement {
/// supported yet.
SetVariable {
local: bool,
hivevar: bool,
variable: Ident,
value: SetVariableValue,
value: Vec<SetVariableValue>,
},
/// SHOW <variable>
///
@ -562,6 +638,13 @@ pub enum Statement {
schema_name: ObjectName,
if_not_exists: bool,
},
/// CREATE DATABASE
CreateDatabase {
db_name: ObjectName,
if_not_exists: bool,
location: Option<String>,
managed_location: Option<String>,
},
/// `ASSERT <condition> [AS <message>]`
Assert {
condition: Expr,
@ -592,11 +675,6 @@ pub enum Statement {
/// A SQL query that specifies what to explain
statement: Box<Statement>,
},
/// ANALYZE
Analyze {
/// Name of table
table_name: ObjectName,
},
}
impl fmt::Display for Statement {
@ -622,17 +700,114 @@ impl fmt::Display for Statement {
write!(f, "{}", statement)
}
Statement::Analyze { table_name } => write!(f, "ANALYZE TABLE {}", table_name),
Statement::Query(s) => write!(f, "{}", s),
Statement::Insert {
table_name,
columns,
Statement::Directory {
overwrite,
local,
path,
file_format,
source,
} => {
write!(f, "INSERT INTO {} ", table_name)?;
write!(
f,
"INSERT{overwrite}{local} DIRECTORY '{path}'",
overwrite = if *overwrite { " OVERWRITE" } else { "" },
local = if *local { " LOCAL" } else { "" },
path = path
)?;
if let Some(ref ff) = file_format {
write!(f, " STORED AS {}", ff)?
}
write!(f, " {}", source)
}
Statement::Msck {
table_name,
repair,
partition_action,
} => {
write!(
f,
"MSCK {repair}TABLE {table}",
repair = if *repair { "REPAIR " } else { "" },
table = table_name
)?;
if let Some(pa) = partition_action {
write!(f, " {}", pa)?;
}
Ok(())
}
Statement::Truncate {
table_name,
partitions,
} => {
write!(f, "TRUNCATE TABLE {}", table_name)?;
if let Some(ref parts) = partitions {
if !parts.is_empty() {
write!(f, " PARTITION ({})", display_comma_separated(parts))?;
}
}
Ok(())
}
Statement::Analyze {
table_name,
partitions,
for_columns,
columns,
cache_metadata,
noscan,
compute_statistics,
} => {
write!(f, "ANALYZE TABLE {}", table_name)?;
if let Some(ref parts) = partitions {
if !parts.is_empty() {
write!(f, " PARTITION ({})", display_comma_separated(parts))?;
}
}
if *compute_statistics {
write!(f, " COMPUTE STATISTICS")?;
}
if *noscan {
write!(f, " NOSCAN")?;
}
if *cache_metadata {
write!(f, " CACHE METADATA")?;
}
if *for_columns {
write!(f, " FOR COLUMNS")?;
if !columns.is_empty() {
write!(f, " {}", display_comma_separated(columns))?;
}
}
Ok(())
}
Statement::Insert {
table_name,
overwrite,
partitioned,
columns,
after_columns,
source,
table,
} => {
write!(
f,
"INSERT {act}{tbl} {table_name} ",
table_name = table_name,
act = if *overwrite { "OVERWRITE" } else { "INTO" },
tbl = if *table { " TABLE" } else { "" }
)?;
if !columns.is_empty() {
write!(f, "({}) ", display_comma_separated(columns))?;
}
if let Some(ref parts) = partitioned {
if !parts.is_empty() {
write!(f, "PARTITION ({}) ", display_comma_separated(parts))?;
}
}
if !after_columns.is_empty() {
write!(f, "({}) ", display_comma_separated(after_columns))?;
}
write!(f, "{}", source)
}
Statement::Copy {
@ -684,6 +859,25 @@ impl fmt::Display for Statement {
}
Ok(())
}
Statement::CreateDatabase {
db_name,
if_not_exists,
location,
managed_location,
} => {
write!(f, "CREATE")?;
if *if_not_exists {
write!(f, " IF NOT EXISTS")?;
}
write!(f, " {}", db_name)?;
if let Some(l) = location {
write!(f, " LOCATION '{}'", l)?;
}
if let Some(ml) = managed_location {
write!(f, " MANAGEDLOCATION '{}'", ml)?;
}
Ok(())
}
Statement::CreateView {
name,
or_replace,
@ -711,14 +905,19 @@ impl fmt::Display for Statement {
name,
columns,
constraints,
table_properties,
with_options,
or_replace,
if_not_exists,
hive_distribution,
hive_formats,
external,
temporary,
file_format,
location,
query,
without_rowid,
like,
} => {
// We want to allow the following options
// Empty column list, allowed by PostgreSQL:
@ -729,10 +928,11 @@ impl fmt::Display for Statement {
// `CREATE TABLE t (a INT) AS SELECT a from t2`
write!(
f,
"CREATE {or_replace}{external}TABLE {if_not_exists}{name}",
"CREATE {or_replace}{external}{temporary}TABLE {if_not_exists}{name}",
or_replace = if *or_replace { "OR REPLACE " } else { "" },
external = if *external { "EXTERNAL " } else { "" },
if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" },
temporary = if *temporary { "TEMPORARY " } else { "" },
name = name,
)?;
if !columns.is_empty() || !constraints.is_empty() {
@ -741,7 +941,7 @@ impl fmt::Display for Statement {
write!(f, ", ")?;
}
write!(f, "{})", display_comma_separated(constraints))?;
} else if query.is_none() {
} else if query.is_none() && like.is_none() {
// PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens
write!(f, " ()")?;
}
@ -749,6 +949,79 @@ impl fmt::Display for Statement {
if *without_rowid {
write!(f, " WITHOUT ROWID")?;
}
// Only for Hive
if let Some(l) = like {
write!(f, " LIKE {}", l)?;
}
match hive_distribution {
HiveDistributionStyle::PARTITIONED { columns } => {
write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?;
}
HiveDistributionStyle::CLUSTERED {
columns,
sorted_by,
num_buckets,
} => {
write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?;
if !sorted_by.is_empty() {
write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?;
}
if *num_buckets > 0 {
write!(f, " INTO {} BUCKETS", num_buckets)?;
}
}
HiveDistributionStyle::SKEWED {
columns,
on,
stored_as_directories,
} => {
write!(
f,
" SKEWED BY ({})) ON ({})",
display_comma_separated(&columns),
display_comma_separated(&on)
)?;
if *stored_as_directories {
write!(f, " STORED AS DIRECTORIES")?;
}
}
_ => (),
}
if let Some(HiveFormat {
row_format,
storage,
location,
}) = hive_formats
{
match row_format {
Some(HiveRowFormat::SERDE { class }) => {
write!(f, " ROW FORMAT SERDE '{}'", class)?
}
Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?,
None => (),
}
match storage {
Some(HiveIOFormat::IOF {
input_format,
output_format,
}) => write!(
f,
" STORED AS INPUTFORMAT {} OUTPUTFORMAT {}",
input_format, output_format
)?,
Some(HiveIOFormat::FileFormat { format }) if !*external => {
write!(f, " STORED AS {}", format)?
}
_ => (),
}
if !*external {
if let Some(loc) = location {
write!(f, " LOCATION '{}'", loc)?;
}
}
}
if *external {
write!(
f,
@ -757,6 +1030,13 @@ impl fmt::Display for Statement {
location.as_ref().unwrap()
)?;
}
if !table_properties.is_empty() {
write!(
f,
" TBLPROPERTIES ({})",
display_comma_separated(table_properties)
)?;
}
if !with_options.is_empty() {
write!(f, " WITH ({})", display_comma_separated(with_options))?;
}
@ -806,25 +1086,34 @@ impl fmt::Display for Statement {
if_exists,
names,
cascade,
purge,
} => write!(
f,
"DROP {}{} {}{}",
"DROP {}{} {}{}{}",
object_type,
if *if_exists { " IF EXISTS" } else { "" },
display_comma_separated(names),
if *cascade { " CASCADE" } else { "" },
if *purge { " PURGE" } else { "" }
),
Statement::SetVariable {
local,
variable,
hivevar,
value,
} => write!(
f,
"SET{local} {variable} = {value}",
local = if *local { " LOCAL" } else { "" },
variable = variable,
value = value
),
} => {
f.write_str("SET ")?;
if *local {
f.write_str("LOCAL ")?;
}
write!(
f,
"{hivevar}{name} = {value}",
hivevar = if *hivevar { "HIVEVAR:" } else { "" },
name = variable,
value = display_comma_separated(value)
)
}
Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable),
Statement::ShowColumns {
extended,
@ -1086,6 +1375,62 @@ impl fmt::Display for ObjectType {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum HiveDistributionStyle {
PARTITIONED {
columns: Vec<ColumnDef>,
},
CLUSTERED {
columns: Vec<Ident>,
sorted_by: Vec<ColumnDef>,
num_buckets: i32,
},
SKEWED {
columns: Vec<ColumnDef>,
on: Vec<ColumnDef>,
stored_as_directories: bool,
},
NONE,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum HiveRowFormat {
SERDE { class: String },
DELIMITED,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum HiveIOFormat {
IOF {
input_format: Expr,
output_format: Expr,
},
FileFormat {
format: FileFormat,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct HiveFormat {
pub row_format: Option<HiveRowFormat>,
pub storage: Option<HiveIOFormat>,
pub location: Option<String>,
}
impl Default for HiveFormat {
fn default() -> Self {
HiveFormat {
row_format: None,
location: None,
storage: None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SqlOption {

View file

@ -65,6 +65,7 @@ pub enum BinaryOperator {
Lt,
GtEq,
LtEq,
Spaceship,
Eq,
NotEq,
And,
@ -92,6 +93,7 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::Lt => "<",
BinaryOperator::GtEq => ">=",
BinaryOperator::LtEq => "<=",
BinaryOperator::Spaceship => "<=>",
BinaryOperator::Eq => "=",
BinaryOperator::NotEq => "<>",
BinaryOperator::And => "AND",

View file

@ -57,6 +57,7 @@ impl fmt::Display for Query {
/// A node in a tree, representing a "query body" expression, roughly:
/// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]`
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum SetExpr {
@ -73,6 +74,7 @@ pub enum SetExpr {
right: Box<SetExpr>,
},
Values(Values),
Insert(Statement),
// TODO: ANSI SQL supports `TABLE` here.
}
@ -82,6 +84,7 @@ impl fmt::Display for SetExpr {
SetExpr::Select(s) => write!(f, "{}", s),
SetExpr::Query(q) => write!(f, "({})", q),
SetExpr::Values(v) => write!(f, "{}", v),
SetExpr::Insert(v) => write!(f, "{}", v),
SetExpr::SetOperation {
left,
right,
@ -126,10 +129,18 @@ pub struct Select {
pub projection: Vec<SelectItem>,
/// FROM
pub from: Vec<TableWithJoins>,
/// LATERAL VIEWs
pub lateral_views: Vec<LateralView>,
/// WHERE
pub selection: Option<Expr>,
/// GROUP BY
pub group_by: Vec<Expr>,
/// CLUSTER BY (Hive)
pub cluster_by: Vec<Expr>,
/// DISTRIBUTE BY (Hive)
pub distribute_by: Vec<Expr>,
/// SORT BY (Hive)
pub sort_by: Vec<Expr>,
/// HAVING
pub having: Option<Expr>,
}
@ -144,12 +155,34 @@ impl fmt::Display for Select {
if !self.from.is_empty() {
write!(f, " FROM {}", display_comma_separated(&self.from))?;
}
if !self.lateral_views.is_empty() {
for lv in &self.lateral_views {
write!(f, "{}", lv)?;
}
}
if let Some(ref selection) = self.selection {
write!(f, " WHERE {}", selection)?;
}
if !self.group_by.is_empty() {
write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?;
}
if !self.cluster_by.is_empty() {
write!(
f,
" CLUSTER BY {}",
display_comma_separated(&self.cluster_by)
)?;
}
if !self.distribute_by.is_empty() {
write!(
f,
" DISTRIBUTE BY {}",
display_comma_separated(&self.distribute_by)
)?;
}
if !self.sort_by.is_empty() {
write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?;
}
if let Some(ref having) = self.having {
write!(f, " HAVING {}", having)?;
}
@ -157,6 +190,40 @@ impl fmt::Display for Select {
}
}
/// A hive LATERAL VIEW with potential column aliases
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LateralView {
/// LATERAL VIEW
pub lateral_view: Expr,
/// LATERAL VIEW table name
pub lateral_view_name: ObjectName,
/// LATERAL VIEW optional column aliases
pub lateral_col_alias: Vec<Ident>,
/// LATERAL VIEW OUTER
pub outer: bool,
}
impl fmt::Display for LateralView {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
" LATERAL VIEW{outer} {} {}",
self.lateral_view,
self.lateral_view_name,
outer = if self.outer { " OUTER" } else { "" }
)?;
if !self.lateral_col_alias.is_empty() {
write!(
f,
" AS {}",
display_comma_separated(&self.lateral_col_alias)
)?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct With {
@ -184,11 +251,16 @@ impl fmt::Display for With {
pub struct Cte {
pub alias: TableAlias,
pub query: Query,
pub from: Option<Ident>,
}
impl fmt::Display for Cte {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} AS ({})", self.alias, self.query)
write!(f, "{} AS ({})", self.alias, self.query)?;
if let Some(ref fr) = self.from {
write!(f, " FROM {}", fr)?;
}
Ok(())
}
}
@ -417,6 +489,7 @@ pub enum JoinConstraint {
On(Expr),
Using(Vec<Ident>),
Natural,
None,
}
/// An `ORDER BY` expression

View file

@ -22,15 +22,17 @@ use std::fmt;
pub enum Value {
/// Numeric literal
#[cfg(not(feature = "bigdecimal"))]
Number(String),
Number(String, bool),
#[cfg(feature = "bigdecimal")]
Number(BigDecimal),
Number(BigDecimal, bool),
/// 'string value'
SingleQuotedString(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
HexStringLiteral(String),
DoubleQuotedString(String),
/// Boolean value true or false
Boolean(bool),
/// INTERVAL literals, roughly in the following format:
@ -59,7 +61,8 @@ pub enum Value {
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Value::Number(v) => write!(f, "{}", v),
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),

39
src/dialect/hive.rs Normal file
View file

@ -0,0 +1,39 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dialect::Dialect;
#[derive(Debug)]
pub struct HiveDialect {}
impl Dialect for HiveDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
(ch == '"') || (ch == '`')
}
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '$'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '_'
|| ch == '$'
|| ch == '{'
|| ch == '}'
}
}

View file

@ -103,6 +103,7 @@ define_keywords!(
BOTH,
BY,
BYTEA,
CACHE,
CALL,
CALLED,
CARDINALITY,
@ -120,6 +121,7 @@ define_keywords!(
CHECK,
CLOB,
CLOSE,
CLUSTER,
COALESCE,
COLLATE,
COLLECT,
@ -127,6 +129,7 @@ define_keywords!(
COLUMNS,
COMMIT,
COMMITTED,
COMPUTE,
CONDITION,
CONNECT,
CONSTRAINT,
@ -157,6 +160,7 @@ define_keywords!(
CURRENT_USER,
CURSOR,
CYCLE,
DATABASE,
DATE,
DAY,
DEALLOCATE,
@ -165,13 +169,16 @@ define_keywords!(
DECLARE,
DEFAULT,
DELETE,
DELIMITED,
DENSE_RANK,
DEREF,
DESC,
DESCRIBE,
DETERMINISTIC,
DIRECTORY,
DISCONNECT,
DISTINCT,
DISTRIBUTE,
DOUBLE,
DROP,
DYNAMIC,
@ -206,6 +213,7 @@ define_keywords!(
FOLLOWING,
FOR,
FOREIGN,
FORMAT,
FRAME_ROW,
FREE,
FROM,
@ -220,6 +228,7 @@ define_keywords!(
GROUPS,
HAVING,
HEADER,
HIVEVAR,
HOLD,
HOUR,
IDENTITY,
@ -229,6 +238,7 @@ define_keywords!(
INDICATOR,
INNER,
INOUT,
INPUTFORMAT,
INSENSITIVE,
INSERT,
INT,
@ -262,11 +272,13 @@ define_keywords!(
LOCALTIMESTAMP,
LOCATION,
LOWER,
MANAGEDLOCATION,
MATCH,
MATERIALIZED,
MAX,
MEMBER,
MERGE,
METADATA,
METHOD,
MIN,
MINUTE,
@ -274,6 +286,7 @@ define_keywords!(
MODIFIES,
MODULE,
MONTH,
MSCK,
MULTISET,
NATIONAL,
NATURAL,
@ -284,6 +297,7 @@ define_keywords!(
NO,
NONE,
NORMALIZE,
NOSCAN,
NOT,
NTH_VALUE,
NTILE,
@ -305,13 +319,17 @@ define_keywords!(
ORDER,
OUT,
OUTER,
OUTPUTFORMAT,
OVER,
OVERFLOW,
OVERLAPS,
OVERLAY,
OVERWRITE,
PARAMETER,
PARQUET,
PARTITION,
PARTITIONED,
PARTITIONS,
PERCENT,
PERCENTILE_CONT,
PERCENTILE_DISC,
@ -327,6 +345,7 @@ define_keywords!(
PREPARE,
PRIMARY,
PROCEDURE,
PURGE,
RANGE,
RANK,
RCFILE,
@ -349,6 +368,7 @@ define_keywords!(
REGR_SYY,
RELEASE,
RENAME,
REPAIR,
REPEATABLE,
REPLACE,
RESTRICT,
@ -372,6 +392,7 @@ define_keywords!(
SELECT,
SENSITIVE,
SEQUENCEFILE,
SERDE,
SERIALIZABLE,
SESSION,
SESSION_USER,
@ -380,6 +401,7 @@ define_keywords!(
SIMILAR,
SMALLINT,
SOME,
SORT,
SPECIFIC,
SPECIFICTYPE,
SQL,
@ -389,21 +411,27 @@ define_keywords!(
SQRT,
START,
STATIC,
STATISTICS,
STDDEV_POP,
STDDEV_SAMP,
STDIN,
STORED,
STRING,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
SUCCEEDS,
SUM,
SYMMETRIC,
SYNC,
SYSTEM,
SYSTEM_TIME,
SYSTEM_USER,
TABLE,
TABLESAMPLE,
TBLPROPERTIES,
TEMP,
TEMPORARY,
TEXT,
TEXTFILE,
THEN,
@ -473,9 +501,12 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::SELECT,
Keyword::WHERE,
Keyword::GROUP,
Keyword::SORT,
Keyword::HAVING,
Keyword::ORDER,
Keyword::TOP,
Keyword::LATERAL,
Keyword::VIEW,
Keyword::LIMIT,
Keyword::OFFSET,
Keyword::FETCH,
@ -492,6 +523,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::RIGHT,
Keyword::NATURAL,
Keyword::USING,
Keyword::CLUSTER,
Keyword::DISTRIBUTE,
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
Keyword::OUTER,
];
@ -506,15 +539,20 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
Keyword::SELECT,
Keyword::WHERE,
Keyword::GROUP,
Keyword::SORT,
Keyword::HAVING,
Keyword::ORDER,
Keyword::TOP,
Keyword::LATERAL,
Keyword::VIEW,
Keyword::LIMIT,
Keyword::OFFSET,
Keyword::FETCH,
Keyword::UNION,
Keyword::EXCEPT,
Keyword::INTERSECT,
Keyword::CLUSTER,
Keyword::DISTRIBUTE,
// Reserved only as a column alias in the `SELECT` clause
Keyword::FROM,
];

View file

@ -12,6 +12,7 @@
mod ansi;
mod generic;
mod hive;
pub mod keywords;
mod mssql;
mod mysql;
@ -24,6 +25,7 @@ use std::fmt::Debug;
pub use self::ansi::AnsiDialect;
pub use self::generic::GenericDialect;
pub use self::hive::HiveDialect;
pub use self::mssql::MsSqlDialect;
pub use self::mysql::MySqlDialect;
pub use self::postgresql::PostgreSqlDialect;

View file

@ -48,12 +48,14 @@ pub enum IsOptional {
Optional,
Mandatory,
}
use IsOptional::*;
pub enum IsLateral {
Lateral,
NotLateral,
}
use crate::ast::Statement::CreateVirtualTable;
use IsLateral::*;
@ -137,6 +139,8 @@ impl<'a> Parser<'a> {
self.prev_token();
Ok(Statement::Query(Box::new(self.parse_query()?)))
}
Keyword::TRUNCATE => Ok(self.parse_truncate()?),
Keyword::MSCK => Ok(self.parse_msck()?),
Keyword::CREATE => Ok(self.parse_create()?),
Keyword::DROP => Ok(self.parse_drop()?),
Keyword::DELETE => Ok(self.parse_delete()?),
@ -169,6 +173,104 @@ impl<'a> Parser<'a> {
}
}
pub fn parse_msck(&mut self) -> Result<Statement, ParserError> {
let repair = self.parse_keyword(Keyword::REPAIR);
self.expect_keyword(Keyword::TABLE)?;
let table_name = self.parse_object_name()?;
let partition_action = self
.maybe_parse(|parser| {
let pa = match parser.parse_one_of_keywords(&[
Keyword::ADD,
Keyword::DROP,
Keyword::SYNC,
]) {
Some(Keyword::ADD) => Some(AddDropSync::ADD),
Some(Keyword::DROP) => Some(AddDropSync::DROP),
Some(Keyword::SYNC) => Some(AddDropSync::SYNC),
_ => None,
};
parser.expect_keyword(Keyword::PARTITIONS)?;
Ok(pa)
})
.unwrap_or_default();
Ok(Statement::Msck {
repair,
table_name,
partition_action,
})
}
pub fn parse_truncate(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword(Keyword::TABLE)?;
let table_name = self.parse_object_name()?;
let mut partitions = None;
if self.parse_keyword(Keyword::PARTITION) {
self.expect_token(&Token::LParen)?;
partitions = Some(self.parse_comma_separated(Parser::parse_expr)?);
self.expect_token(&Token::RParen)?;
}
Ok(Statement::Truncate {
table_name,
partitions,
})
}
pub fn parse_analyze(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword(Keyword::TABLE)?;
let table_name = self.parse_object_name()?;
let mut for_columns = false;
let mut cache_metadata = false;
let mut noscan = false;
let mut partitions = None;
let mut compute_statistics = false;
let mut columns = vec![];
loop {
match self.parse_one_of_keywords(&[
Keyword::PARTITION,
Keyword::FOR,
Keyword::CACHE,
Keyword::NOSCAN,
Keyword::COMPUTE,
]) {
Some(Keyword::PARTITION) => {
self.expect_token(&Token::LParen)?;
partitions = Some(self.parse_comma_separated(Parser::parse_expr)?);
self.expect_token(&Token::RParen)?;
}
Some(Keyword::NOSCAN) => noscan = true,
Some(Keyword::FOR) => {
self.expect_keyword(Keyword::COLUMNS)?;
columns = self
.maybe_parse(|parser| {
parser.parse_comma_separated(Parser::parse_identifier)
})
.unwrap_or_default();
for_columns = true
}
Some(Keyword::CACHE) => {
self.expect_keyword(Keyword::METADATA)?;
cache_metadata = true
}
Some(Keyword::COMPUTE) => {
self.expect_keyword(Keyword::STATISTICS)?;
compute_statistics = true
}
_ => break,
}
}
Ok(Statement::Analyze {
table_name,
for_columns,
columns,
partitions,
cache_metadata,
noscan,
compute_statistics,
})
}
/// Parse a new expression
pub fn parse_expr(&mut self) -> Result<Expr, ParserError> {
self.parse_subexpr(0)
@ -182,6 +284,7 @@ impl<'a> Parser<'a> {
loop {
let next_precedence = self.get_next_precedence()?;
debug!("next precedence: {:?}", next_precedence);
if precedence >= next_precedence {
break;
}
@ -316,13 +419,14 @@ impl<'a> Parser<'a> {
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
})
}
Token::Number(_)
Token::Number(_, _)
| Token::SingleQuotedString(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
}
Token::LParen => {
let expr =
if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) {
@ -334,7 +438,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::RParen)?;
Ok(expr)
}
unexpected => self.expected("an expression", unexpected),
unexpected => self.expected("an expression:", unexpected),
}?;
if self.parse_keyword(Keyword::COLLATE) {
@ -665,6 +769,8 @@ impl<'a> Parser<'a> {
pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
let tok = self.next_token();
let regular_binary_operator = match &tok {
Token::Spaceship => Some(BinaryOperator::Spaceship),
Token::DoubleEq => Some(BinaryOperator::Eq),
Token::Eq => Some(BinaryOperator::Eq),
Token::Neq => Some(BinaryOperator::NotEq),
Token::Gt => Some(BinaryOperator::Gt),
@ -744,12 +850,27 @@ impl<'a> Parser<'a> {
op: UnaryOperator::PGPostfixFactorial,
expr: Box::new(expr),
})
} else if Token::LBracket == tok {
self.parse_map_access(expr)
} else {
// Can only happen if `get_next_precedence` got out of sync with this function
panic!("No infix parser for token {:?}", tok)
}
}
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let key = self.parse_literal_string()?;
let tok = self.consume_token(&Token::RBracket);
debug!("Tok: {}", tok);
match expr {
e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess {
column: Box::new(e),
key,
}),
_ => Ok(expr),
}
}
/// Parses the parens following the `[ NOT ] IN` operator
pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
@ -820,7 +941,14 @@ impl<'a> Parser<'a> {
Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC),
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20),
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Spaceship => Ok(20),
Token::Pipe => Ok(21),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
Token::Ampersand => Ok(23),
@ -828,6 +956,7 @@ impl<'a> Parser<'a> {
Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
Token::DoubleColon => Ok(50),
Token::ExclamationMark => Ok(50),
Token::LBracket | Token::RBracket => Ok(10),
_ => Ok(0),
}
}
@ -911,7 +1040,7 @@ impl<'a> Parser<'a> {
let index = self.index;
for &keyword in keywords {
if !self.parse_keyword(keyword) {
//println!("parse_keywords aborting .. did not find {}", keyword);
// println!("parse_keywords aborting .. did not find {:?}", keyword);
// reset index and return immediately
self.index = index;
return false;
@ -1034,8 +1163,11 @@ impl<'a> Parser<'a> {
/// Parse a SQL CREATE statement
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]);
let temporary = self
.parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
.is_some();
if self.parse_keyword(Keyword::TABLE) {
self.parse_create_table(or_replace)
self.parse_create_table(or_replace, temporary)
} else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) {
self.prev_token();
self.parse_create_view(or_replace)
@ -1088,31 +1220,67 @@ impl<'a> Parser<'a> {
})
}
pub fn parse_create_database(&mut self) -> Result<Statement, ParserError> {
let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
let db_name = self.parse_object_name()?;
let mut location = None;
let mut managed_location = None;
loop {
match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) {
Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?),
Some(Keyword::MANAGEDLOCATION) => {
managed_location = Some(self.parse_literal_string()?)
}
_ => break,
}
}
Ok(Statement::CreateDatabase {
db_name,
if_not_exists: ine,
location,
managed_location,
})
}
pub fn parse_create_external_table(
&mut self,
or_replace: bool,
) -> Result<Statement, ParserError> {
self.expect_keyword(Keyword::TABLE)?;
let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
let table_name = self.parse_object_name()?;
let (columns, constraints) = self.parse_columns()?;
self.expect_keywords(&[Keyword::STORED, Keyword::AS])?;
let file_format = self.parse_file_format()?;
self.expect_keyword(Keyword::LOCATION)?;
let location = self.parse_literal_string()?;
let hive_distribution = self.parse_hive_distribution()?;
let hive_formats = self.parse_hive_formats()?;
let file_format = if let Some(ff) = &hive_formats.storage {
match ff {
HiveIOFormat::FileFormat { format } => Some(format.clone()),
_ => None,
}
} else {
None
};
let location = hive_formats.location.clone();
let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?;
Ok(Statement::CreateTable {
name: table_name,
columns,
constraints,
hive_distribution,
hive_formats: Some(hive_formats),
with_options: vec![],
table_properties,
or_replace,
if_not_exists: false,
if_not_exists,
external: true,
file_format: Some(file_format),
location: Some(location),
temporary: false,
file_format,
location,
query: None,
without_rowid: false,
like: None,
})
}
@ -1139,7 +1307,7 @@ impl<'a> Parser<'a> {
// ANSI SQL and Postgres support RECURSIVE here, but we don't support it either.
let name = self.parse_object_name()?;
let columns = self.parse_parenthesized_column_list(Optional)?;
let with_options = self.parse_with_options()?;
let with_options = self.parse_options(Keyword::WITH)?;
self.expect_keyword(Keyword::AS)?;
let query = Box::new(self.parse_query()?);
// Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here.
@ -1171,6 +1339,7 @@ impl<'a> Parser<'a> {
let names = self.parse_comma_separated(Parser::parse_object_name)?;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
let purge = self.parse_keyword(Keyword::PURGE);
if cascade && restrict {
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
}
@ -1179,6 +1348,7 @@ impl<'a> Parser<'a> {
if_exists,
names,
cascade,
purge,
})
}
@ -1199,18 +1369,85 @@ impl<'a> Parser<'a> {
})
}
pub fn parse_create_table(&mut self, or_replace: bool) -> Result<Statement, ParserError> {
//TODO: Implement parsing for Skewed and Clustered
pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
self.expect_token(&Token::LParen)?;
let columns = self.parse_comma_separated(Parser::parse_column_def)?;
self.expect_token(&Token::RParen)?;
Ok(HiveDistributionStyle::PARTITIONED { columns })
} else {
Ok(HiveDistributionStyle::NONE)
}
}
pub fn parse_hive_formats(&mut self) -> Result<HiveFormat, ParserError> {
let mut hive_format = HiveFormat::default();
loop {
match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) {
Some(Keyword::ROW) => {
hive_format.row_format = Some(self.parse_row_format()?);
}
Some(Keyword::STORED) => {
self.expect_keyword(Keyword::AS)?;
if self.parse_keyword(Keyword::INPUTFORMAT) {
let input_format = self.parse_expr()?;
self.expect_keyword(Keyword::OUTPUTFORMAT)?;
let output_format = self.parse_expr()?;
hive_format.storage = Some(HiveIOFormat::IOF {
input_format,
output_format,
});
} else {
let format = self.parse_file_format()?;
hive_format.storage = Some(HiveIOFormat::FileFormat { format });
}
}
Some(Keyword::LOCATION) => {
hive_format.location = Some(self.parse_literal_string()?);
}
None => break,
_ => break,
}
}
Ok(hive_format)
}
pub fn parse_row_format(&mut self) -> Result<HiveRowFormat, ParserError> {
self.expect_keyword(Keyword::FORMAT)?;
match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) {
Some(Keyword::SERDE) => {
let class = self.parse_literal_string()?;
Ok(HiveRowFormat::SERDE { class })
}
_ => Ok(HiveRowFormat::DELIMITED),
}
}
pub fn parse_create_table(
&mut self,
or_replace: bool,
temporary: bool,
) -> Result<Statement, ParserError> {
let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
let table_name = self.parse_object_name()?;
let like = if self.parse_keyword(Keyword::LIKE) {
self.parse_object_name().ok()
} else {
None
};
// parse optional column list (schema)
let (columns, constraints) = self.parse_columns()?;
// SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE`
let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]);
let hive_distribution = self.parse_hive_distribution()?;
let hive_formats = self.parse_hive_formats()?;
// PostgreSQL supports `WITH ( options )`, before `AS`
let with_options = self.parse_with_options()?;
let with_options = self.parse_options(Keyword::WITH)?;
let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?;
// Parse optional `AS ( query )`
let query = if self.parse_keyword(Keyword::AS) {
Some(Box::new(self.parse_query()?))
@ -1220,16 +1457,21 @@ impl<'a> Parser<'a> {
Ok(Statement::CreateTable {
name: table_name,
temporary,
columns,
constraints,
with_options,
table_properties,
or_replace,
if_not_exists,
hive_distribution,
hive_formats: Some(hive_formats),
external: false,
file_format: None,
location: None,
query,
without_rowid,
like,
})
}
@ -1423,8 +1665,8 @@ impl<'a> Parser<'a> {
}
}
pub fn parse_with_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
if self.parse_keyword(Keyword::WITH) {
pub fn parse_options(&mut self, keyword: Keyword) -> Result<Vec<SqlOption>, ParserError> {
if self.parse_keyword(keyword) {
self.expect_token(&Token::LParen)?;
let options = self.parse_comma_separated(Parser::parse_sql_option)?;
self.expect_token(&Token::RParen)?;
@ -1449,13 +1691,25 @@ impl<'a> Parser<'a> {
if let Some(constraint) = self.parse_optional_table_constraint()? {
AlterTableOperation::AddConstraint(constraint)
} else {
let _ = self.parse_keyword(Keyword::COLUMN);
let column_def = self.parse_column_def()?;
AlterTableOperation::AddColumn { column_def }
let if_not_exists =
self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
if self.parse_keyword(Keyword::PARTITION) {
self.expect_token(&Token::LParen)?;
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
AlterTableOperation::AddPartitions {
if_not_exists,
new_partitions: partitions,
}
} else {
let _ = self.parse_keyword(Keyword::COLUMN);
let column_def = self.parse_column_def()?;
AlterTableOperation::AddColumn { column_def }
}
}
} else if self.parse_keyword(Keyword::RENAME) {
if self.parse_keyword(Keyword::TO) {
let table_name = self.parse_identifier()?;
let table_name = self.parse_object_name()?;
AlterTableOperation::RenameTable { table_name }
} else {
let _ = self.parse_keyword(Keyword::COLUMN);
@ -1468,17 +1722,51 @@ impl<'a> Parser<'a> {
}
}
} else if self.parse_keyword(Keyword::DROP) {
let _ = self.parse_keyword(Keyword::COLUMN);
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let column_name = self.parse_identifier()?;
let cascade = self.parse_keyword(Keyword::CASCADE);
AlterTableOperation::DropColumn {
column_name,
if_exists,
cascade,
if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) {
self.expect_token(&Token::LParen)?;
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
AlterTableOperation::DropPartitions {
partitions,
if_exists: true,
}
} else if self.parse_keyword(Keyword::PARTITION) {
self.expect_token(&Token::LParen)?;
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
AlterTableOperation::DropPartitions {
partitions,
if_exists: false,
}
} else {
let _ = self.parse_keyword(Keyword::COLUMN);
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let column_name = self.parse_identifier()?;
let cascade = self.parse_keyword(Keyword::CASCADE);
AlterTableOperation::DropColumn {
column_name,
if_exists,
cascade,
}
}
} else if self.parse_keyword(Keyword::PARTITION) {
self.expect_token(&Token::LParen)?;
let before = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
self.expect_keyword(Keyword::RENAME)?;
self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?;
self.expect_token(&Token::LParen)?;
let renames = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
AlterTableOperation::RenamePartitions {
old_partitions: before,
new_partitions: renames,
}
} else {
return self.expected("ADD, RENAME, or DROP after ALTER TABLE", self.peek_token());
return self.expected(
"ADD, RENAME, PARTITION or DROP after ALTER TABLE",
self.peek_token(),
);
};
Ok(Statement::AlterTable {
name: table_name,
@ -1545,13 +1833,18 @@ impl<'a> Parser<'a> {
Keyword::TRUE => Ok(Value::Boolean(true)),
Keyword::FALSE => Ok(Value::Boolean(false)),
Keyword::NULL => Ok(Value::Null),
Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style {
Some('"') => Ok(Value::DoubleQuotedString(w.value)),
Some('\'') => Ok(Value::SingleQuotedString(w.value)),
_ => self.expected("A value?", Token::Word(w))?,
},
_ => self.expected("a concrete value", Token::Word(w)),
},
// The call to n.parse() returns a bigdecimal when the
// bigdecimal feature is enabled, and is otherwise a no-op
// (i.e., it returns the input string).
Token::Number(ref n) => match n.parse() {
Ok(n) => Ok(Value::Number(n)),
Token::Number(ref n, l) => match n.parse() {
Ok(n) => Ok(Value::Number(n, l)),
Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)),
},
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
@ -1563,7 +1856,7 @@ impl<'a> Parser<'a> {
pub fn parse_number_value(&mut self) -> Result<Value, ParserError> {
match self.parse_value()? {
v @ Value::Number(_) => Ok(v),
v @ Value::Number(_, _) => Ok(v),
_ => {
self.prev_token();
self.expected("literal number", self.peek_token())
@ -1574,7 +1867,7 @@ impl<'a> Parser<'a> {
/// Parse an unsigned literal integer/long
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
match self.next_token() {
Token::Number(s) => s.parse::<u64>().map_err(|e| {
Token::Number(s, _) => s.parse::<u64>().map_err(|e| {
ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e))
}),
unexpected => self.expected("literal int", unexpected),
@ -1584,6 +1877,7 @@ impl<'a> Parser<'a> {
/// Parse a literal string
pub fn parse_literal_string(&mut self) -> Result<String, ParserError> {
match self.next_token() {
Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value),
Token::SingleQuotedString(s) => Ok(s),
unexpected => self.expected("literal string", unexpected),
}
@ -1632,6 +1926,7 @@ impl<'a> Parser<'a> {
// parse_interval_literal for a taste.
Keyword::INTERVAL => Ok(DataType::Interval),
Keyword::REGCLASS => Ok(DataType::Regclass),
Keyword::STRING => Ok(DataType::String),
Keyword::TEXT => {
if self.consume_token(&Token::LBracket) {
// Note: this is postgresql-specific
@ -1730,6 +2025,7 @@ impl<'a> Parser<'a> {
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
match self.next_token() {
Token::Word(w) => Ok(w.to_ident()),
Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
unexpected => self.expected("identifier", unexpected),
}
}
@ -1805,15 +2101,6 @@ impl<'a> Parser<'a> {
})
}
pub fn parse_analyze(&mut self) -> Result<Statement, ParserError> {
// ANALYZE TABLE table_name
self.expect_keyword(Keyword::TABLE)?;
let table_name = self.parse_object_name()?;
Ok(Statement::Analyze { table_name })
}
/// Parse a query expression, i.e. a `SELECT` statement optionally
/// preceeded with some `WITH` CTE declarations and optionally followed
/// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
@ -1828,53 +2115,88 @@ impl<'a> Parser<'a> {
None
};
let body = self.parse_query_body(0)?;
if !self.parse_keyword(Keyword::INSERT) {
let body = self.parse_query_body(0)?;
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_order_by_expr)?
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_order_by_expr)?
} else {
vec![]
};
let limit = if self.parse_keyword(Keyword::LIMIT) {
self.parse_limit()?
} else {
None
};
let offset = if self.parse_keyword(Keyword::OFFSET) {
Some(self.parse_offset()?)
} else {
None
};
let fetch = if self.parse_keyword(Keyword::FETCH) {
Some(self.parse_fetch()?)
} else {
None
};
Ok(Query {
with,
body,
limit,
order_by,
offset,
fetch,
})
} else {
vec![]
};
let limit = if self.parse_keyword(Keyword::LIMIT) {
self.parse_limit()?
} else {
None
};
let offset = if self.parse_keyword(Keyword::OFFSET) {
Some(self.parse_offset()?)
} else {
None
};
let fetch = if self.parse_keyword(Keyword::FETCH) {
Some(self.parse_fetch()?)
} else {
None
};
Ok(Query {
with,
body,
limit,
order_by,
offset,
fetch,
})
let insert = self.parse_insert()?;
Ok(Query {
with,
body: SetExpr::Insert(insert),
limit: None,
order_by: vec![],
offset: None,
fetch: None,
})
}
}
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
fn parse_cte(&mut self) -> Result<Cte, ParserError> {
let alias = TableAlias {
name: self.parse_identifier()?,
columns: self.parse_parenthesized_column_list(Optional)?,
let name = self.parse_identifier()?;
let mut cte = if self.parse_keyword(Keyword::AS) {
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let alias = TableAlias {
name,
columns: vec![],
};
Cte {
alias,
query,
from: None,
}
} else {
let columns = self.parse_parenthesized_column_list(Optional)?;
self.expect_keyword(Keyword::AS)?;
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let alias = TableAlias { name, columns };
Cte {
alias,
query,
from: None,
}
};
self.expect_keyword(Keyword::AS)?;
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Cte { alias, query })
if self.parse_keyword(Keyword::FROM) {
cte.from = Some(self.parse_identifier()?);
}
Ok(cte)
}
/// Parse a "query body", which is an expression with roughly the
@ -1962,6 +2284,37 @@ impl<'a> Parser<'a> {
} else {
vec![]
};
let mut lateral_views = vec![];
loop {
if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) {
let outer = self.parse_keyword(Keyword::OUTER);
let lateral_view = self.parse_expr()?;
let lateral_view_name = self.parse_object_name()?;
let lateral_col_alias = self
.parse_comma_separated(|parser| {
parser.parse_optional_alias(&[
Keyword::WHERE,
Keyword::GROUP,
Keyword::CLUSTER,
Keyword::HAVING,
Keyword::LATERAL,
]) // This couldn't possibly be a bad idea
})?
.into_iter()
.filter(|i| i.is_some())
.map(|i| i.unwrap())
.collect();
lateral_views.push(LateralView {
lateral_view,
lateral_view_name,
lateral_col_alias,
outer,
});
} else {
break;
}
}
let selection = if self.parse_keyword(Keyword::WHERE) {
Some(self.parse_expr()?)
@ -1975,6 +2328,24 @@ impl<'a> Parser<'a> {
vec![]
};
let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};
let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};
let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};
let having = if self.parse_keyword(Keyword::HAVING) {
Some(self.parse_expr()?)
} else {
@ -1987,26 +2358,42 @@ impl<'a> Parser<'a> {
projection,
from,
selection,
lateral_views,
group_by,
cluster_by,
distribute_by,
sort_by,
having,
})
}
pub fn parse_set(&mut self) -> Result<Statement, ParserError> {
let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL]);
let modifier =
self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]);
if let Some(Keyword::HIVEVAR) = modifier {
self.expect_token(&Token::Colon)?;
}
let variable = self.parse_identifier()?;
if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) {
let token = self.peek_token();
let value = match (self.parse_value(), token) {
(Ok(value), _) => SetVariableValue::Literal(value),
(Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()),
(Err(_), unexpected) => self.expected("variable value", unexpected)?,
};
Ok(Statement::SetVariable {
local: modifier == Some(Keyword::LOCAL),
variable,
value,
})
let mut values = vec![];
loop {
let token = self.peek_token();
let value = match (self.parse_value(), token) {
(Ok(value), _) => SetVariableValue::Literal(value),
(Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()),
(Err(_), unexpected) => self.expected("variable value", unexpected)?,
};
values.push(value);
if self.consume_token(&Token::Comma) {
continue;
}
return Ok(Statement::SetVariable {
local: modifier == Some(Keyword::LOCAL),
hivevar: Some(Keyword::HIVEVAR) == modifier,
variable,
value: values,
});
}
} else if variable.value == "TRANSACTION" && modifier.is_none() {
Ok(Statement::SetTransaction {
modes: self.parse_transaction_modes()?,
@ -2119,7 +2506,7 @@ impl<'a> Parser<'a> {
}
}
Keyword::OUTER => {
return self.expected("LEFT, RIGHT, or FULL", self.peek_token())
return self.expected("LEFT, RIGHT, or FULL", self.peek_token());
}
_ if natural => {
return self.expected("a join type after NATURAL", self.peek_token());
@ -2290,21 +2677,61 @@ impl<'a> Parser<'a> {
let columns = self.parse_parenthesized_column_list(Mandatory)?;
Ok(JoinConstraint::Using(columns))
} else {
self.expected("ON, or USING after JOIN", self.peek_token())
Ok(JoinConstraint::None)
//self.expected("ON, or USING after JOIN", self.peek_token())
}
}
/// Parse an INSERT statement
pub fn parse_insert(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword(Keyword::INTO)?;
let table_name = self.parse_object_name()?;
let columns = self.parse_parenthesized_column_list(Optional)?;
let source = Box::new(self.parse_query()?);
Ok(Statement::Insert {
table_name,
columns,
source,
})
let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?;
let overwrite = action == Keyword::OVERWRITE;
let local = self.parse_keyword(Keyword::LOCAL);
if self.parse_keyword(Keyword::DIRECTORY) {
let path = self.parse_literal_string()?;
let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) {
Some(self.parse_file_format()?)
} else {
None
};
let source = Box::new(self.parse_query()?);
Ok(Statement::Directory {
local,
path,
overwrite,
file_format,
source,
})
} else {
// Hive lets you put table here regardless
let table = self.parse_keyword(Keyword::TABLE);
let table_name = self.parse_object_name()?;
let columns = self.parse_parenthesized_column_list(Optional)?;
let partitioned = if self.parse_keyword(Keyword::PARTITION) {
self.expect_token(&Token::LParen)?;
let r = Some(self.parse_comma_separated(Parser::parse_expr)?);
self.expect_token(&Token::RParen)?;
r
} else {
None
};
// Hive allows you to specify columns after partitions as well if you want.
let after_columns = self.parse_parenthesized_column_list(Optional)?;
let source = Box::new(self.parse_query()?);
Ok(Statement::Insert {
table_name,
overwrite,
partitioned,
columns,
after_columns,
source,
table,
})
}
}
pub fn parse_update(&mut self) -> Result<Statement, ParserError> {

View file

@ -132,6 +132,7 @@ pub fn all_dialects() -> TestedDialects {
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
Box::new(SnowflakeDialect {}),
Box::new(HiveDialect {}),
],
}
}
@ -153,7 +154,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr {
}
pub fn number(n: &'static str) -> Value {
Value::Number(n.parse().unwrap())
Value::Number(n.parse().unwrap(), false)
}
pub fn table_alias(name: impl Into<String>) -> Option<TableAlias> {

View file

@ -35,7 +35,7 @@ pub enum Token {
/// A keyword (like SELECT) or an optionally quoted SQL identifier
Word(Word),
/// An unsigned numeric literal
Number(String),
Number(String, bool),
/// A character that could not be tokenized
Char(char),
/// Single quoted string: i.e: 'string'
@ -48,6 +48,8 @@ pub enum Token {
Comma,
/// Whitespace (space, tab, etc)
Whitespace(Whitespace),
/// Double equals sign `==`
DoubleEq,
/// Equality operator `=`
Eq,
/// Not Equals operator `<>` (or `!=` in some dialects)
@ -60,6 +62,8 @@ pub enum Token {
LtEq,
/// Greater Than Or Equals operator `>=`
GtEq,
/// Spaceship operator <=>
Spaceship,
/// Plus operator `+`
Plus,
/// Minus operator `-`
@ -127,13 +131,15 @@ impl fmt::Display for Token {
match self {
Token::EOF => f.write_str("EOF"),
Token::Word(ref w) => write!(f, "{}", w),
Token::Number(ref n) => f.write_str(n),
Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }),
Token::Char(ref c) => write!(f, "{}", c),
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
Token::Comma => f.write_str(","),
Token::Whitespace(ws) => write!(f, "{}", ws),
Token::DoubleEq => f.write_str("=="),
Token::Spaceship => f.write_str("<=>"),
Token::Eq => f.write_str("="),
Token::Neq => f.write_str("<>"),
Token::Lt => f.write_str("<"),
@ -296,7 +302,7 @@ impl<'a> Tokenizer<'a> {
Token::Whitespace(Whitespace::Tab) => self.col += 4,
Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
Token::Number(s) => self.col += s.len() as u64,
Token::Number(s, _) => self.col += s.len() as u64,
Token::SingleQuotedString(s) => self.col += s.len() as u64,
_ => self.col += 1,
}
@ -358,6 +364,15 @@ impl<'a> Tokenizer<'a> {
ch if self.dialect.is_identifier_start(ch) => {
chars.next(); // consume the first char
let s = self.tokenize_word(ch, chars);
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| {
matches!(ch, '0'..='9' | '.')
});
let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
s += s2.as_str();
return Ok(Some(Token::Number(s, false)));
}
Ok(Some(Token::make_word(&s, None)))
}
// string
@ -383,7 +398,13 @@ impl<'a> Tokenizer<'a> {
'0'..='9' => {
// TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
Ok(Some(Token::Number(s)))
let long = if chars.peek() == Some(&'L') {
chars.next();
true
} else {
false
};
Ok(Some(Token::Number(s, long)))
}
// punctuation
'(' => self.consume_and_return(chars, Token::LParen),
@ -461,7 +482,13 @@ impl<'a> Tokenizer<'a> {
'<' => {
chars.next(); // consume
match chars.peek() {
Some('=') => self.consume_and_return(chars, Token::LtEq),
Some('=') => {
chars.next();
match chars.peek() {
Some('>') => self.consume_and_return(chars, Token::Spaceship),
_ => Ok(Some(Token::LtEq)),
}
}
Some('>') => self.consume_and_return(chars, Token::Neq),
Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
_ => Ok(Some(Token::Lt)),
@ -634,7 +661,7 @@ mod tests {
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
Token::Number(String::from("1"), false),
];
compare(expected, tokens);
@ -652,7 +679,7 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::make_word("sqrt", None),
Token::LParen,
Token::Number(String::from("1")),
Token::Number(String::from("1"), false),
Token::RParen,
];
@ -724,11 +751,11 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::Eq,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
Token::Number(String::from("1"), false),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("LIMIT"),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("5")),
Token::Number(String::from("5"), false),
];
compare(expected, tokens);
@ -758,7 +785,7 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::Eq,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
Token::Number(String::from("1"), false),
];
compare(expected, tokens);
@ -790,7 +817,7 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::Eq,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1")),
Token::Number(String::from("1"), false),
];
compare(expected, tokens);
@ -943,12 +970,12 @@ mod tests {
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Number("0".to_string()),
Token::Number("0".to_string(), false),
Token::Whitespace(Whitespace::SingleLineComment {
prefix: "--".to_string(),
comment: "this is a comment\n".to_string(),
}),
Token::Number("1".to_string()),
Token::Number("1".to_string(), false),
];
compare(expected, tokens);
}
@ -975,11 +1002,11 @@ mod tests {
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::Number("0".to_string()),
Token::Number("0".to_string(), false),
Token::Whitespace(Whitespace::MultiLineComment(
"multi-line\n* /comment".to_string(),
)),
Token::Number("1".to_string()),
Token::Number("1".to_string(), false),
];
compare(expected, tokens);
}
@ -1046,7 +1073,7 @@ mod tests {
Token::Whitespace(Whitespace::Space),
Token::make_keyword("TOP"),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("5")),
Token::Number(String::from("5"), false),
Token::Whitespace(Whitespace::Space),
Token::make_word("bar", Some('[')),
Token::Whitespace(Whitespace::Space),

View file

@ -92,7 +92,7 @@ fn parse_insert_invalid() {
let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError("Expected INTO, found: public".to_string()),
ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()),
res.unwrap_err()
);
}
@ -454,11 +454,11 @@ fn parse_number() {
#[cfg(feature = "bigdecimal")]
assert_eq!(
expr,
Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1)))
Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1), false))
);
#[cfg(not(feature = "bigdecimal"))]
assert_eq!(expr, Expr::Value(Value::Number("1.0".into())));
assert_eq!(expr, Expr::Value(Value::Number("1.0".into(), false)));
}
#[test]
@ -894,7 +894,7 @@ fn parse_select_having() {
name: ObjectName(vec![Ident::new("COUNT")]),
args: vec![FunctionArg::Unnamed(Expr::Wildcard)],
over: None,
distinct: false
distinct: false,
})),
op: BinaryOperator::Gt,
right: Box::new(Expr::Value(number("1")))
@ -1639,18 +1639,6 @@ fn parse_explain_analyze_with_simple_select() {
);
}
#[test]
fn parse_simple_analyze() {
let sql = "ANALYZE TABLE t";
let stmt = verified_stmt(sql);
assert_eq!(
stmt,
Statement::Analyze {
table_name: ObjectName(vec![Ident::new("t")])
}
);
}
#[test]
fn parse_named_argument_function() {
let sql = "SELECT FUN(a => '1', b => '2') FROM foo";
@ -2390,7 +2378,7 @@ fn parse_ctes() {
fn assert_ctes_in_select(expected: &[&str], sel: &Query) {
for (i, exp) in expected.iter().enumerate() {
let Cte { alias, query } = &sel.with.as_ref().unwrap().cte_tables[i];
let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i];
assert_eq!(*exp, query.to_string());
assert_eq!(
if i == 0 {
@ -2479,6 +2467,7 @@ fn parse_recursive_cte() {
}],
},
query: cte_query,
from: None,
};
assert_eq!(with.cte_tables.first().unwrap(), &expected);
}
@ -2799,6 +2788,7 @@ fn parse_drop_table() {
if_exists,
names,
cascade,
purge: _,
} => {
assert_eq!(false, if_exists);
assert_eq!(ObjectType::Table, object_type);
@ -2818,6 +2808,7 @@ fn parse_drop_table() {
if_exists,
names,
cascade,
purge: _,
} => {
assert_eq!(true, if_exists);
assert_eq!(ObjectType::Table, object_type);

212
tests/sqlparser_hive.rs Normal file
View file

@ -0,0 +1,212 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![warn(clippy::all)]
//! Test SQL syntax specific to Hive. The parser based on the generic dialect
//! is also tested (on the inputs it can handle).
use sqlparser::dialect::HiveDialect;
use sqlparser::test_utils::*;
#[test]
fn parse_table_create() {
let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#;
let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#;
hive().verified_stmt(sql);
hive().verified_stmt(iof);
}
#[test]
fn parse_insert_overwrite() {
let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#;
hive().verified_stmt(insert_partitions);
}
#[test]
fn test_truncate() {
let truncate = r#"TRUNCATE TABLE db.table"#;
hive().verified_stmt(truncate);
}
#[test]
fn parse_analyze() {
let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#;
hive().verified_stmt(analyze);
}
#[test]
fn parse_analyze_for_columns() {
let analyze =
r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#;
hive().verified_stmt(analyze);
}
#[test]
fn parse_msck() {
let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#;
let msck2 = r#"MSCK REPAIR TABLE db.table_name"#;
hive().verified_stmt(msck);
hive().verified_stmt(msck2);
}
#[test]
fn parse_set() {
let set = "SET HIVEVAR:name = a, b, c_d";
hive().verified_stmt(set);
}
#[test]
fn test_spaceship() {
let spaceship = "SELECT * FROM db.table WHERE a <=> b";
hive().verified_stmt(spaceship);
}
#[test]
fn parse_with_cte() {
let with = "WITH a AS (SELECT * FROM b) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM b";
hive().verified_stmt(with);
}
#[test]
fn drop_table_purge() {
let purge = "DROP TABLE db.table_name PURGE";
hive().verified_stmt(purge);
}
#[test]
fn create_table_like() {
let like = "CREATE TABLE db.table_name LIKE db.other_table";
hive().verified_stmt(like);
}
// Turning off this test until we can parse identifiers starting with numbers :(
#[test]
fn test_identifier() {
let between = "SELECT a AS 3_barrr_asdf FROM db.table_name";
hive().verified_stmt(between);
}
#[test]
fn test_alter_partition() {
let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)";
hive().verified_stmt(alter);
}
#[test]
fn test_add_partition() {
let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)";
hive().verified_stmt(add);
}
#[test]
fn test_drop_partition() {
let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)";
hive().verified_stmt(drop);
}
#[test]
fn test_drop_if_exists() {
let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')";
hive().verified_stmt(drop);
}
#[test]
fn test_cluster_by() {
let cluster = "SELECT a FROM db.table CLUSTER BY a, b";
hive().verified_stmt(cluster);
}
#[test]
fn test_distribute_by() {
let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b";
hive().verified_stmt(cluster);
}
#[test]
fn no_join_condition() {
let join = "SELECT a, b FROM db.table_name JOIN a";
hive().verified_stmt(join);
}
#[test]
fn columns_after_partition() {
let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table";
hive().verified_stmt(query);
}
#[test]
fn long_numerics() {
let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#;
hive().verified_stmt(query);
}
#[test]
fn decimal_precision() {
let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table";
let expected = "SELECT CAST(a AS NUMERIC(18,2)) FROM db.table";
hive().one_statement_parses_to(query, expected);
}
#[test]
fn create_temp_table() {
let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)";
let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)";
hive().verified_stmt(query);
hive().one_statement_parses_to(query2, query);
}
#[test]
fn create_local_directory() {
let query =
"INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table";
hive().verified_stmt(query);
}
#[test]
fn lateral_view() {
let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1";
hive().verified_stmt(view);
}
#[test]
fn sort_by() {
let sort_by = "SELECT * FROM db.table SORT BY a";
hive().verified_stmt(sort_by);
}
#[test]
fn rename_table() {
let rename = "ALTER TABLE db.table_name RENAME TO db.table_2";
hive().verified_stmt(rename);
}
#[test]
fn map_access() {
let rename = "SELECT a.b[\"asdf\"] FROM db.table WHERE a = 2";
hive().verified_stmt(rename);
}
#[test]
fn from_cte() {
let rename =
"WITH cte AS (SELECT * FROM a.b) FROM cte INSERT INTO TABLE a.b PARTITION (a) SELECT *";
println!("{}", hive().verified_stmt(rename));
}
fn hive() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(HiveDialect {})],
}
}

View file

@ -364,8 +364,9 @@ fn parse_set() {
stmt,
Statement::SetVariable {
local: false,
hivevar: false,
variable: "a".into(),
value: SetVariableValue::Ident("b".into()),
value: vec![SetVariableValue::Ident("b".into())],
}
);
@ -374,8 +375,11 @@ fn parse_set() {
stmt,
Statement::SetVariable {
local: false,
hivevar: false,
variable: "a".into(),
value: SetVariableValue::Literal(Value::SingleQuotedString("b".into())),
value: vec![SetVariableValue::Literal(Value::SingleQuotedString(
"b".into()
))],
}
);
@ -384,8 +388,9 @@ fn parse_set() {
stmt,
Statement::SetVariable {
local: false,
hivevar: false,
variable: "a".into(),
value: SetVariableValue::Literal(number("0")),
value: vec![SetVariableValue::Literal(number("0"))],
}
);
@ -394,8 +399,9 @@ fn parse_set() {
stmt,
Statement::SetVariable {
local: false,
hivevar: false,
variable: "a".into(),
value: SetVariableValue::Ident("DEFAULT".into()),
value: vec![SetVariableValue::Ident("DEFAULT".into())],
}
);
@ -404,8 +410,9 @@ fn parse_set() {
stmt,
Statement::SetVariable {
local: true,
hivevar: false,
variable: "a".into(),
value: SetVariableValue::Ident("b".into()),
value: vec![SetVariableValue::Ident("b".into())],
}
);