PostgreSQL: GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY and GENERATED ALWAYS AS ( generation_expr ) support (#832)

* GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) basic impl - test are failing.

* PostgreSQL GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) and GENERATED ALWAYS AS ( generation_expr ) STORED implementation.
This commit is contained in:
sam 2023-03-16 15:24:00 +05:30 committed by GitHub
parent 4ff3aeb040
commit a8a8e65b7c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 350 additions and 57 deletions

View file

@ -24,7 +24,9 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::value::escape_single_quote_string;
use crate::ast::{display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName};
use crate::ast::{
display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SequenceOptions,
};
use crate::tokenizer::Token;
/// An `ALTER TABLE` (`Statement::AlterTable`) operation
@ -575,6 +577,13 @@ pub enum ColumnOption {
CharacterSet(ObjectName),
Comment(String),
OnUpdate(Expr),
/// `Generated`s are modifiers that follow a column definition in a `CREATE
/// TABLE` statement.
Generated {
generated_as: GeneratedAs,
sequence_options: Option<Vec<SequenceOptions>>,
generation_expr: Option<Expr>,
},
}
impl fmt::Display for ColumnOption {
@ -610,10 +619,63 @@ impl fmt::Display for ColumnOption {
CharacterSet(n) => write!(f, "CHARACTER SET {n}"),
Comment(v) => write!(f, "COMMENT '{}'", escape_single_quote_string(v)),
OnUpdate(expr) => write!(f, "ON UPDATE {expr}"),
Generated {
generated_as,
sequence_options,
generation_expr,
} => match generated_as {
GeneratedAs::Always => {
write!(f, "GENERATED ALWAYS AS IDENTITY")?;
if sequence_options.is_some() {
let so = sequence_options.as_ref().unwrap();
if !so.is_empty() {
write!(f, " (")?;
}
for sequence_option in so {
write!(f, "{sequence_option}")?;
}
if !so.is_empty() {
write!(f, " )")?;
}
}
Ok(())
}
GeneratedAs::ByDefault => {
write!(f, "GENERATED BY DEFAULT AS IDENTITY")?;
if sequence_options.is_some() {
let so = sequence_options.as_ref().unwrap();
if !so.is_empty() {
write!(f, " (")?;
}
for sequence_option in so {
write!(f, "{sequence_option}")?;
}
if !so.is_empty() {
write!(f, " )")?;
}
}
Ok(())
}
GeneratedAs::ExpStored => {
let expr = generation_expr.as_ref().unwrap();
write!(f, "GENERATED ALWAYS AS ({expr}) STORED")
}
},
}
}
}
/// `GeneratedAs`s are modifiers that follow a column option in a `generated`.
/// 'ExpStored' is PostgreSQL specific
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum GeneratedAs {
Always,
ByDefault,
ExpStored,
}
fn display_constraint_name(name: &'_ Option<Ident>) -> impl fmt::Display + '_ {
struct ConstraintName<'a>(&'a Option<Ident>);
impl<'a> fmt::Display for ConstraintName<'a> {

View file

@ -30,7 +30,7 @@ pub use self::data_type::{
};
pub use self::ddl::{
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
ColumnOptionDef, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{

View file

@ -17,13 +17,10 @@ pub struct AnsiDialect {}
impl Dialect for AnsiDialect {
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase() || ch.is_ascii_uppercase()
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
}
}

View file

@ -22,13 +22,13 @@ impl Dialect for BigQueryDialect {
}
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '_'
|| ch == '-'
}

View file

@ -18,10 +18,10 @@ pub struct ClickHouseDialect {}
impl Dialect for ClickHouseDialect {
fn is_identifier_start(&self, ch: char) -> bool {
// See https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-identifiers
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}
}

View file

@ -17,17 +17,13 @@ pub struct GenericDialect;
impl Dialect for GenericDialect {
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ch == '_'
|| ch == '#'
|| ch == '@'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '@'
|| ch == '$'
|| ch == '#'

View file

@ -21,16 +21,13 @@ impl Dialect for HiveDialect {
}
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '$'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '$'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '_'
|| ch == '$'
|| ch == '{'

View file

@ -23,17 +23,13 @@ impl Dialect for MsSqlDialect {
fn is_identifier_start(&self, ch: char) -> bool {
// See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers
// We don't support non-latin "letters" currently.
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ch == '_'
|| ch == '#'
|| ch == '@'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '@'
|| ch == '$'
|| ch == '#'

View file

@ -20,8 +20,8 @@ impl Dialect for MySqlDialect {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// We don't yet support identifiers beginning with numbers, as that
// makes it hard to distinguish numeric literals.
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch == '_'
|| ch == '$'
|| ch == '@'
@ -29,7 +29,7 @@ impl Dialect for MySqlDialect {
}
fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}
fn is_delimited_identifier_start(&self, ch: char) -> bool {

View file

@ -24,13 +24,13 @@ impl Dialect for PostgreSqlDialect {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
// diacritical marks and non-Latin letters"
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}

View file

@ -18,13 +18,13 @@ pub struct SnowflakeDialect;
impl Dialect for SnowflakeDialect {
// see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}
fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}

View file

@ -28,15 +28,15 @@ impl Dialect for SQLiteDialect {
fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.sqlite.org/draft/tokenreq.html
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch == '_'
|| ch == '$'
|| ('\u{007f}'..='\u{ffff}').contains(&ch)
}
fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

View file

@ -77,6 +77,7 @@ define_keywords!(
ALL,
ALLOCATE,
ALTER,
ALWAYS,
ANALYZE,
AND,
ANTI,
@ -270,6 +271,7 @@ define_keywords!(
FUNCTION,
FUNCTIONS,
FUSION,
GENERATED,
GET,
GLOBAL,
GRANT,

View file

@ -3567,6 +3567,55 @@ impl<'a> Parser<'a> {
{
let expr = self.parse_expr()?;
Ok(Some(ColumnOption::OnUpdate(expr)))
} else if self.parse_keyword(Keyword::GENERATED) {
self.parse_optional_column_option_generated()
} else {
Ok(None)
}
}
fn parse_optional_column_option_generated(
&mut self,
) -> Result<Option<ColumnOption>, ParserError> {
if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) {
let mut sequence_options = vec![];
if self.expect_token(&Token::LParen).is_ok() {
sequence_options = self.parse_create_sequence_options()?;
self.expect_token(&Token::RParen)?;
}
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::Always,
sequence_options: Some(sequence_options),
generation_expr: None,
}))
} else if self.parse_keywords(&[
Keyword::BY,
Keyword::DEFAULT,
Keyword::AS,
Keyword::IDENTITY,
]) {
let mut sequence_options = vec![];
if self.expect_token(&Token::LParen).is_ok() {
sequence_options = self.parse_create_sequence_options()?;
self.expect_token(&Token::RParen)?;
}
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::ByDefault,
sequence_options: Some(sequence_options),
generation_expr: None,
}))
} else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) {
if self.expect_token(&Token::LParen).is_ok() {
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
let _ = self.parse_keywords(&[Keyword::STORED]);
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::ExpStored,
sequence_options: None,
generation_expr: Some(expr),
}))
} else {
Ok(None)
}
} else {
Ok(None)
}

View file

@ -596,7 +596,7 @@ impl<'a> Tokenizer<'a> {
let word = self.tokenize_word(ch, chars);
// TODO: implement parsing of exponent here
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
let mut inner_state = State {
peekable: word.chars().peekable(),
line: 0,

View file

@ -126,13 +126,13 @@ fn custom_statement_parser() -> Result<(), ParserError> {
}
fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}

View file

@ -22,6 +22,202 @@ use sqlparser::ast::*;
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect};
use sqlparser::parser::ParserError;
#[test]
fn parse_create_table_generated_always_as_identity() {
//With primary key
let sql = "CREATE TABLE table2 (
column21 bigint primary key generated always as identity ,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column21 BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column21 bigint primary key generated by default as identity ,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column21 BIGINT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, \
column30 TEXT)",
);
//With out primary key
let sql = "CREATE TABLE table2 (
column22 bigint generated always as identity ,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column22 BIGINT GENERATED ALWAYS AS IDENTITY, \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column22 bigint generated by default as identity ,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column22 BIGINT GENERATED BY DEFAULT AS IDENTITY, \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column23 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 NO CYCLE ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column23 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 NO CYCLE ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column24 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 CYCLE ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column24 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 CYCLE ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column25 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column25 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column26 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column26 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column27 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column27 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column28 bigint generated by default as identity ( INCREMENT 1 MINVALUE 1 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column28 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 MINVALUE 1 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column29 bigint generated by default as identity ( INCREMENT 1 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column29 BIGINT GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column22 bigint generated always as identity ,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column22 BIGINT GENERATED ALWAYS AS IDENTITY, \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column23 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 NO CYCLE ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column23 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 NO CYCLE ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column24 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 CYCLE ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column24 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 CYCLE ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column25 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column25 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 CACHE 2 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column26 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column26 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 START WITH 10 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column27 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column27 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 MAXVALUE 20 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column28 bigint generated always as identity ( INCREMENT 1 MINVALUE 1 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column28 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 MINVALUE 1 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
column29 bigint generated always as identity ( INCREMENT 1 ),
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
column29 BIGINT GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 ), \
column30 TEXT)",
);
let sql = "CREATE TABLE table2 (
priceInDollar numeric,
princeInPound numeric GENERATED ALWAYS AS (priceInDollar * 0.22) STORED,
column30 text );";
pg().one_statement_parses_to(
sql,
"CREATE TABLE table2 (\
priceInDollar NUMERIC, \
princeInPound NUMERIC GENERATED ALWAYS AS (priceInDollar * 0.22) STORED, \
column30 TEXT)",
);
}
#[test]
fn parse_create_sequence() {
// SimpleLogger::new().init().unwrap();
@ -1408,12 +1604,10 @@ fn parse_pg_regex_match_ops() {
fn parse_array_index_expr() {
#[cfg(feature = "bigdecimal")]
let num: Vec<Expr> = (0..=10)
.into_iter()
.map(|s| Expr::Value(Value::Number(bigdecimal::BigDecimal::from(s), false)))
.collect();
#[cfg(not(feature = "bigdecimal"))]
let num: Vec<Expr> = (0..=10)
.into_iter()
.map(|s| Expr::Value(Value::Number(s.to_string(), false)))
.collect();